# Retrieval System for Music Data

### Importing necessary libraries

In [1]:
#Importing necessary libraries
from ast import literal_eval
from tqdm import tqdm
import pandas as pd
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.metrics import ndcg_score
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import ipywidgets as widgets
from IPython.display import display as ipy_display
from IPython.display import display, clear_output
from ipywidgets import Output
from ipywidgets import HBox

### Loading Datasets

In [2]:
# Load the datasets with the 'id' column as the index
tfidf_data = pd.read_csv('id_lyrics_tf-idf_mmsr.tsv', sep='\t', index_col='id')
bert_data = pd.read_csv('id_lyrics_bert_mmsr.tsv', sep='\t', index_col='id')
word2vec_data = pd.read_csv('id_lyrics_word2vec_mmsr.tsv', sep='\t', index_col='id')
info_data = pd.read_csv('id_information_mmsr.tsv', sep='\t')
info_data.head(3)

data_mfcc_bow = pd.read_csv('id_mfcc_bow_mmsr.tsv', sep='\t')
data_mfcc_bow = data_mfcc_bow.set_index('id')

# data_mfcc_bow.set_index(data_mfcc_bow.columns[0], inplace=True)


id_ivec256_mmsr = pd.read_csv('id_ivec256_mmsr.tsv', sep='\t')
id_ivec256_mmsr.set_index(id_ivec256_mmsr.columns[0], inplace=True)

id_ivec512_mmsr = pd.read_csv('id_ivec512_mmsr.tsv', sep='\t')


id_blf_correlation_mmsr = pd.read_csv('id_blf_correlation_mmsr.tsv', sep='\t')
id_blf_spectral_mmsr = pd.read_csv('id_blf_spectral_mmsr.tsv', sep='\t')

id_blf_spectral_mmsr.set_index(id_blf_spectral_mmsr.columns[0], inplace=True)

id_musicnn_mmsr = pd.read_csv('id_musicnn_mmsr.tsv', sep='\t')
id_musicnn_mmsr.set_index(id_musicnn_mmsr.columns[0], inplace=True)

id_incp_mmsr = pd.read_csv('id_incp_mmsr.tsv', sep='\t')
id_incp_mmsr = id_incp_mmsr.set_index('id')

id_resnet_mmsr = pd.read_csv('id_resnet_mmsr.tsv', sep='\t')
id_resnet_mmsr = id_resnet_mmsr.set_index('id')

id_url_mmsr = pd.read_csv('id_url_mmsr.tsv', sep='\t')

id_vgg19_mmsr = pd.read_csv('id_vgg19_mmsr.tsv', sep='\t')
id_vgg19_mmsr = id_vgg19_mmsr.set_index('id')

data_id_genres = pd.read_csv('id_genres_mmsr.tsv', sep='\t')
data_id_genres  = data_id_genres .set_index(‘id')

### Defining the Four Test-Based Similarity Retrieval Systems

In [5]:
# Random Similarity
def random_song(song_id, other_song_id, data):
    return random.random()

# Jaccard Similarity
def jaccard_similarity(id1, id2, tfidf_data):
    vec1 = tfidf_data.loc[id1].astype(bool).values
    vec2 = tfidf_data.loc[id2].astype(bool).values
    intersection = np.sum(vec1 & vec2)
    union = np.sum(vec1 | vec2)
    return intersection / union if union != 0 else 0

# Cosine Similarity for Word2Vec and TF-IDF
def cosine_similarity_between_songs(id1, id2, embedding_data):
    vec1 = embedding_data.loc[id1].values.reshape(1, -1)
    vec2 = embedding_data.loc[id2].values.reshape(1, -1)
    return cosine_similarity(vec1, vec2)[0][0]



def retrieve_similar_songs(song_id, embedding_data, similarity_function, info_data, top_n=10095):
    similarities = {}
    
    for other_song_id in tqdm(embedding_data.index, desc="Calculating similarities"):
        if other_song_id != song_id:
            similarity = similarity_function(song_id, other_song_id, embedding_data)
            similarities[other_song_id] = similarity
            
    sorted_similarities = sorted(similarities.items(), key=lambda item: item[1], reverse=True)[:top_n]
    sorted_ids = [song_id for song_id, _ in sorted_similarities]

    similar_songs_info = info_data[info_data['id'].isin(sorted_ids)]

    similarities_df = pd.DataFrame(sorted_similarities, columns=['id', 'similarity_score']).set_index('id')

    similar_songs_info = similar_songs_info.join(similarities_df, on='id')
    similar_songs_info = similar_songs_info.sort_values(by='similarity_score', ascending=False)

    return similar_songs_info

### Whether it has matching genre 

In [None]:
def has_matching_genre(track_genres, query_genre):
    return not set(track_genres).isdisjoint(query_genre)

## Precision - Recall Calculation

In [None]:
def calculate_precision_recall(retrieved_tracks, query_track_genres):
    precision_at_k = []
    recall_at_k = []

    for query_id, query_genres in query_track_genres.items():
        # Find all matches in the dataset
        matches = data_id_genres['genre'].apply(lambda x: has_matching_genre(x, query_genres))
        matching_tracks = data_id_genres[matches]
        all_relevant = len(matching_tracks)

        for k in tqdm(range(1, len(retrieved_tracks) + 1), "Calculating precision and recall"):
            top_k_retrieved = retrieved_tracks.head(k)
            num_relevant = top_k_retrieved['genre'].apply(lambda genres: bool(set(genres) & set(query_genres))).sum()

            precision = num_relevant / k
            recall = num_relevant / all_relevant if all_relevant else 0

            precision_at_k.append(precision)
            recall_at_k.append(recall)

    return precision_at_k, recall_at_k

## Random retrieval for a list of queries

In [None]:
def random_top(ids, info_data, top_n = 100):
    results_by_id={}
    
    for query_song_id in tqdm(ids, desc="Finding similar songs"):
        
        random_similar_songs = info_data.sample(top_n)
        
        results_by_id[query_song_id] = random_similar_songs
        
    return results_by_id

In [None]:
import random 
random.seed(123)

#Sampling 400 queries from the info_data, it also ran for 9 hours
#just the preparation of these datasets 


select_ids = info_data[‘id'].sample(400)

In [None]:
top_100_similar_random = random_top(select_ids, info_data, top_n = 100)

## Retrieval for a list of queries using various methods

In [None]:
def similar_songs_top(ids, embedding_data, similarity, info_data, top_n=100):
    results_by_id = {}

    for song_id in tqdm(ids, desc="Finding similar songs"):
        # Retrieve top 100 similar songs for the current song_id using TF-IDF
        similar_songs = retrieve_similar_songs(song_id, embedding_data, similarity, info_data, top_n=100)
        # Assign the result to a dictionary keyed by song_id
        results_by_id[song_id] = similar_songs
    
    return results_by_id

In [None]:
#For all of pure text, audio, vieo-based Retrieval Systems, 
#running it for a sample of 400 songs, to get an overview 

top_100_similar_songs_tfidf = similar_songs_top(select_ids,tfidf_data, cosine_similarity_between_songs, info_data)

top_100_similar_songs_word2vec = similar_songs_top(select_ids,word2vec_data, cosine_similarity_between_songs, info_data)

top_100_similar_songs_jaccard_tfidf = similar_songs_top(select_ids,tfidf_data, jaccard_similarity, info_data)

top_100_similar_songs_mfcc_bow = similar_songs_top(select_ids,data_mfcc_bow, cosine_similarity_between_songs, info_data)

top_100_similar_songs_id_ivec256 = similar_songs_top(select_ids,id_ivec256_mmsr, cosine_similarity_between_songs, info_data)

top_100_similar_songs_id_blf_spectral = similar_songs_top(select_ids,id_blf_spectral_mmsr, cosine_similarity_between_songs, info_data)

top_100_similar_songs_id_musicnn = similar_songs_top(select_ids,id_musicnn_mmsr, cosine_similarity_between_songs, info_data)

top_100_similar_songs_id_incp = similar_songs_top(select_ids,id_incp_mmsr, cosine_similarity_between_songs, info_data)

top_100_similar_songs_id_resnet = similar_songs_top(select_ids,id_resnet_mmsr, cosine_similarity_between_songs, info_data)

top_100_similar_songs_id_vgg19 = similar_songs_top(select_ids,id_vgg19_mmsr, cosine_similarity_between_songs, info_data)

### Saving the 100 songs retrieved by the distinct methods for various queries 

In [None]:
import pickle 

saving_path = '/Users/user/Desktop/Multimedia/'


data_to_save = {
    'top_100_similar_songs_tfidf' : top_100_similar_songs_tfidf,
    'top_100_similar_songs_word2vec': top_100_similar_songs_word2vec,
    'top_100_similar_songs_jaccard_tfidf' : top_100_similar_songs_jaccard_tfidf,
    'top_100_similar_songs_mfcc_bow': top_100_similar_songs_mfcc_bow,
    'top_100_similar_songs_id_ivec256' : top_100_similar_songs_id_ivec256,
    'top_100_similar_songs_id_blf_spectral': top_100_similar_songs_id_blf_spectral,
    'top_100_similar_songs_id_musicnn' : top_100_similar_songs_id_musicnn,
    'top_100_similar_songs_id_incp': top_100_similar_songs_id_incp,
    'top_100_similar_songs_id_resnet': top_100_similar_songs_id_resnet,
    'top_100_similar_songs_id_vgg19 ' : top_100_similar_songs_id_vgg19
    }

for dataset_name, dataset in data_to_save.items():
    paths_to_dataset = f'{saving_path}{dataset_name}.pkl'
    with open(paths_to_dataset, 'wb') as file: 
        pickle.dump(dataset, file)

## Calculating precision and recall for all methods, averaging the results over the various queries, k = 1, ..., 100

In [None]:
def calculate_recall_precision_new_way(similar_songs_dataset, data_id_genres, query_ids,top_n=100):
    
    precision_df = pd.DataFrame(columns=range(100))
    recall_df = pd.DataFrame(columns=range(100))

    for query_id in tqdm(query_ids):
        query_genres = data_id_genres.loc[query_id]
        #print(query_genres)


        method_songs = similar_songs_dataset[query_id]
        #print(method_songs)
        #print(type(method_songs))


        retrieved = pd.DataFrame()

        for song_id in method_songs['id'].head(top_n):
            try:
                genre_data = data_id_genres.loc[[song_id]] 
                retrieved = pd.concat([retrieved, genre_data])
            except KeyError:
                continue

        precision, recall = calculate_precision_recall(retrieved, query_genres)
        
        precision_df.loc[query_id] = precision + [None] * (top_n - len(precision))
        recall_df.loc[query_id] = recall + [None] * (top_n - len(recall))

        
        #print(precision_df)
        
        average_precision = precision_df.mean()
        average_precision_df = pd.DataFrame(average_precision).transpose()
        
        average_recall = recall_df.mean()
        average_recall_df = pd.DataFrame(average_recall).transpose()

        
    return precision_df, recall_df, average_precision_df, average_recall_df

In [None]:
## Random

precision_df_random, recall_df_random, average_precision_df_random, average_recall_df_random = calculate_recall_precision_new_way(top_100_similar_random, data_id_genres, select_ids,top_n=100)


## Text - based 


precision_df_tfidf, recall_df_tfidf, average_precision_df_tfidf, average_recall_df_tfidf = calculate_recall_precision_new_way(top_100_similar_songs_tfidf, data_id_genres, select_ids,top_n=100)

precision_df_jaccard_tfidf, recall_df_jaccard_tfidf, average_precision_df_jaccard_tfidf, average_recall_df_jaccard_tfidf = calculate_recall_precision_new_way(top_100_similar_songs_jaccard_tfidf, data_id_genres, select_ids,top_n=100)

precision_df_word2vec, recall_df_word2vec, average_precision_df_word2vec, average_recall_df_tword2vec = calculate_recall_precision_new_way(top_100_similar_songs_word2vec, data_id_genres, select_ids,top_n=100)

## Audio - based 

precision_df_mfcc_bow, recall_df_mfcc_bow, average_precision_df_mfcc_bow, average_recall_df_mfcc_bow = calculate_recall_precision_new_way(top_100_similar_songs_mfcc_bow, data_id_genres, select_ids,top_n=100)

precision_df_id_ivec256, recall_df_id_ivec256, average_precision_df_id_ivec256, average_recall_df_id_ivec256 = calculate_recall_precision_new_way(top_100_similar_songs_id_ivec256, data_id_genres, select_ids,top_n=100)

precision_df_blf_spectral, recall_df_blf_spectral, average_precision_df_blf_spectral, average_recall_df_blf_spectral = calculate_recall_precision_new_way(top_100_similar_songs_id_blf_spectral, data_id_genres, select_ids,top_n=100)

precision_df_id_musicnn, recall_df_id_musicnn, average_precision_df_id_musicnn, average_recall_df_id_musicnn = calculate_recall_precision_new_way(top_100_similar_songs_id_musicnn, data_id_genres, select_ids,top_n=100)

## Video - based 

precision_df_id_incp, recall_df_id_incp, average_precision_df_id_incp, average_recall_df_id_incp = calculate_recall_precision_new_way(top_100_similar_songs_id_incp, data_id_genres, select_ids,top_n=100)

precision_df_id_resnet, recall_df_id_resnet, average_precision_df_id_resnet, average_recall_df_id_resnet = calculate_recall_precision_new_way(top_100_similar_songs_id_resnet, data_id_genres, select_ids,top_n=100)

precision_df_id_vgg19, recall_df_id_vgg19, average_precision_df_id_vgg19, average_recall_df_id_vgg19 = calculate_recall_precision_new_way(top_100_similar_songs_id_vgg19, data_id_genres, select_ids,top_n=100)

In [None]:
# Just experimenting with both dataframe and list format 

average_precision_df_random = average_precision_df_random.iloc[0].tolist()
average_recall_df_random= average_recall_df_random.iloc[0].tolist()
average_precision_df_tfidf = average_precision_df_tfidf.iloc[0].tolist()
average_recall_df_tfidf = average_recall_df_tfidf.iloc[0].tolist()
average_precision_df_word2vec = average_precision_df_word2vec.iloc[0].tolist()
average_recall_df_word2vec = average_recall_df_word2vec.iloc[0].tolist()
average_precision_df_jaccard_tfidf = average_precision_df_jaccard_tfidf.iloc[0].tolist()
average_recall_df_jaccard_tfidf = average_recall_df_jaccard_tfidf.iloc[0].tolist()
average_precision_df_mfcc_bow = average_precision_df_mfcc_bow.iloc[0].tolist()
average_recall_df_mfcc_bow = average_recall_df_mfcc_bow.iloc[0].tolist()
average_precision_df_id_ivec256 = average_precision_df_id_ivec256.iloc[0].tolist()
average_recall_df_id_ivec256 = average_recall_df_id_ivec256.iloc[0].tolist()
average_precision_df_blf_spectral = average_precision_df_blf_spectral.iloc[0].tolist()
average_recall_df_blf_spectral = average_recall_df_blf_spectral.iloc[0].tolist()
average_precision_df_id_musicnn = average_precision_df_id_musicnn.iloc[0].tolist()
average_recall_df_id_musicnn = average_recall_df_id_musicnn.iloc[0].tolist()
average_precision_df_id_incp = average_precision_df_id_incp.iloc[0].tolist()
average_recall_df_id_incp = average_recall_df_id_incp.iloc[0].tolist()
average_precision_df_id_resnet = average_precision_df_id_resnet.iloc[0].tolist()
average_recall_df_id_resnet = average_recall_df_id_resnet.iloc[0].tolist()
average_precision_df_id_vgg19 = average_precision_df_id_vgg19.iloc[0].tolist()
average_recall_df_id_vgg19 = average_recall_df_id_vgg19.iloc[0].tolist()

### Saving all average precision datasets computed till now

In [None]:
retrieval_systems_precision = {
    'word2vec': average_precision_df_word2vec,
    'cos_tfidf': average_precision_df_tfidf,
    'jacc_tf_idf': average_precision_df_jaccard_tfidf,
    'mfcc_bow': average_precision_df_mfcc_bow,
    'id_ivec256':average_precision_df_id_ivec256,
    'blf_spectral': average_precision_df_blf_spectral,
    'id_musicNN':average_precision_df_id_musicnn, 
    'id_incp': average_precision_df_id_incp,
    'id_resnet': average_precision_df_id_resnet,
    'id_vgg19': average_precision_df_id_vgg19
}

saving_path = '/Users/user/Desktop/Multimedia/'

for retrieval_system, precision_averages in retrieval_systems_precision.items():
    df = pd.DataFrame(precision_averages)
    file_path = f'{saving_path}{retrieval_system}_average_precision'
    df.to_csv(file_path, index = False)

### Saving all average recall datasets computed till now

In [None]:
retrieval_systems_recall = {
    'word2vec': average_recall_df_word2vec,
    'cos_tfidf': average_recall_df_tfidf,
    'jacc_tf_idf': average_recall_df_jaccard_tfidf,
    'mfcc_bow': average_recall_df_mfcc_bow,
    'id_ivec256':average_recall_df_id_ivec256,
    'blf_spectral': average_recall_df_blf_spectral,
    'id_musicNN':average_recall_df_id_musicnn, 
    'id_incp': average_recall_df_id_incp,
    'id_resnet': average_recall_df_id_resnet,
    'id_vgg19': average_recall_df_id_vgg19
}

saving_path = '/Users/user/Desktop/Multimedia/'

for retrieval_system, recall_averages in retrieval_systems_recall.items():
    df = pd.DataFrame(recall_averages)
    file_path = f'{saving_path}{retrieval_system}_average_recall'
    df.to_csv(file_path, index = False)

## Plotting average precision and recall values against each other in one plot for all retreival systems


In [None]:
plt.plot(average_recall_df_random, average_precision_df_random, label = 'Random')
plt.text(average_recall_df_random[-5],average_precision_df_random[-5], 'random', va = 'baseline')
plt.plot(average_recall_df_word2vec, average_precision_df_word2vec, label = 'Word2vec')
plt.plot(average_recall_df_tfidf, average_precision_df_tfidf, label = 'Cosine Similarity: Tf-Idf')
plt.plot(average_recall_df_jaccard_tfidf, average_precision_df_jaccard_tfidf, label = 'Jaccard Similarity: Tf-Idf')
plt.plot(average_recall_df_mfcc_bow, average_precision_df_mfcc_bow, label = 'MFCC-BoW')
plt.plot(average_recall_df_id_ivec256, average_precision_df_id_ivec256, label = 'id_ivec256')
plt.plot(average_recall_df_blf_spectral, average_precision_df_blf_spectral, label = 'Blf-spectral')
plt.plot(average_recall_df_id_musicnn, average_precision_df_id_musicnn, label = 'id_musicNN')
plt.text(average_recall_df_id_musicnn[-15], average_precision_df_id_musicnn[-15], 'id_musicnn')
plt.plot(average_recall_df_id_incp, average_precision_df_id_incp, label = 'id_incp')
plt.plot(average_recall_df_id_resnet, average_precision_df_id_resnet, label = 'id_resnet')
plt.plot(average_recall_df_id_vgg19, average_precision_df_id_vgg19, label = 'id_vgg19')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for All Retrieval Systems')
plt.legend(fontsize = 'small')
plt.grid(True)
plt.show()

## Precision - Recall plot for text-baset retreival systems

In [None]:
plt.plot(average_recall_df_word2vec, average_precision_df_word2vec, label = 'Word2vec')
plt.plot(average_recall_df_tfidf, average_precision_df_tfidf, label = 'Cosine Similarity: Tf-Idf')
plt.plot(average_recall_df_jaccard_tfidf, average_precision_df_jaccard_tfidf, label = 'Jaccard Similarity: Tf-Idf')


plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Text-based Retrieval Systems')
plt.legend()
plt.grid(True)
plt.show()

## Precision - Recall plot Audio-based Retrieval systems

In [None]:
#Audio-based plots

plt.plot(average_recall_df_mfcc_bow, average_precision_df_mfcc_bow, label = 'MFCC-BoW')
plt.plot(average_recall_df_id_ivec256, average_precision_df_id_ivec256, label = 'id_ivec256')
plt.plot(average_recall_df_blf_spectral, average_precision_df_blf_spectral, label = 'Blf-spectral')
plt.plot(average_recall_df_id_musicnn, average_precision_df_id_musicnn, label = 'id_musicNN')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Audio-based Retrieval Systems')
plt.legend()
plt.grid(True)
plt.show()

## Precision - Recall Video-based Retrieval Systems

In [None]:
#Video-based plots

plt.plot(average_recall_df_id_incp, average_precision_df_id_incp, label = 'id_incp')
plt.plot(average_recall_df_id_resnet, average_precision_df_id_resnet, label = 'id_resnet')
plt.plot(average_recall_df_id_vgg19, average_precision_df_id_vgg19, label = 'id_vgg19')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Video-based Retrieval Systems')
plt.legend()
plt.grid(True)
plt.show()

## Precision @10 and Recall @10

In [None]:
# Data
models = ['word2vec', 'cosine - tfidf', 'jaccard - tfidf', 'mfcc_bow', 'id_ivec256',
        'blf_spectral', 'id_musicnn', 'id_incp', 'id_resnet', 'id_vgg19']
precision_results = [
    average_precision_df_word2vec[9],
    average_precision_df_tfidf[9],
    average_precision_df_jaccard_tfidf[9],
    average_precision_df_mfcc_bow[9],
    average_precision_df_id_ivec256[9],
    average_precision_df_blf_spectral[9],
    average_precision_df_id_musicnn[9],
    average_precision_df_id_incp[9],
    average_precision_df_id_resnet[9],
    average_precision_df_id_vgg19[9]]
recall_results = [
    average_recall_df_word2vec[9],
    average_recall_df_tfidf[9],
    average_recall_df_jaccard_tfidf[9],
    average_recall_df_mfcc_bow[9],
    average_recall_df_id_ivec256[9],
    average_recall_df_blf_spectral[9],
    average_recall_df_id_musicnn[9],
    average_recall_df_id_incp[9],
    average_recall_df_id_resnet[9],
    average_recall_df_id_vgg19[9]
]


input = {'RS - Model': models, 'Precision@10': precision_results, 'Recall@10': recall_results}
recall_precison_df_at_10 = pd.DataFrame(input)

recall_precison_df_at_10.set_index('RS - Model', inplace=True)

print(recall_precison_df_at_10)

### Experimenting with coverage, it wasn't run with this code in the end for the list of queries but with the final Codebook+Interactive_Display code

### Coverage

In [None]:
def calculate_genre_coverage(retrieved_tracks, data_id_genres, top_n=10):

    top_tracks = retrieved_tracks.head(top_n)
    
    unique_genres_in_top_tracks = set(top_tracks['genre'].explode().unique())

    all_unique_genres = set(data_id_genres['genre'].explode().unique())
    
    genre_coverage = len(unique_genres_in_top_tracks) / len(all_unique_genres)

    return genre_coverage


from tqdm import tqdm

def calculate_genre_coverage_for_all_methods(song_id, info_data, retrieval_methods, top_n=10095):
    metrics = {}

    # Get the set of all unique genres in the dataset
    all_unique_genres = set(data_id_genres['genre'].explode().unique())

    # Iterate over each retrieval method
    for method_name, method_data in retrieval_methods.items():
        # Retrieve songs using the current method
        retrieved_songs = retrieve_similar_songs(song_id, method_data['data'], method_data['function'], info_data, top_n)

        # Initialize an empty DataFrame
        retrieved = pd.DataFrame()

        # Create a progress bar for the loop
        with tqdm(total=len(retrieved_songs)) as pbar:
            for song_id in retrieved_songs['id']:
                try:
                    # Attempt to retrieve the genre data for the current ID
                    genre_data = data_id_genres.loc[[song_id]]
                    retrieved = pd.concat([retrieved, genre_data])
                except KeyError:
                    # Skip if the ID is not found
                    print(f"ID {song_id} not found in data_id_genres. Skipping.")
                    continue

                # Update the progress bar
                pbar.update(1)

        # Calculate unique genres in the retrieved tracks
        unique_genres_in_retrieved = set(retrieved['genre'].explode().unique())

        # Calculate genre coverage
        genre_coverage = len(unique_genres_in_retrieved) / len(all_unique_genres)

        # Store the result
        metrics[method_name] = genre_coverage

    return metrics


calculate_genre_coverage_for_all_methods('01gyRHLquwXDlhkO', info_data, retrieval_methods, top_n=10)