# Evaluation (Precision)

In [1]:
import os, sys
import pandas as pd
from tqdm import tqdm
from typing import Dict

target_folder = 'MMSR25-26-Group-E'
current_path = os.getcwd()

while os.path.basename(current_path) != target_folder:
    if os.path.basename(current_path) == 'RetrievalAlgorithm':
        if os.path.join(current_path) not in sys.path:
            sys.path.append(os.path.join(current_path))
    parent = os.path.dirname(current_path)
    os.chdir(parent)
    current_path = parent
print(current_path)


from RetrievalAlgorithm.src.utils.per_query_metrics_evaluation import get_eval_metrics_for_each_query
from EvaluationMetrics.src.data_loader import load_songs_data, get_genre_columns
from EvaluationMetrics.src.metrics import precision_at_k

D:\University\7th_Semester\MMSR25-26-Group-E


In [2]:
norm_names = ['max_abs', 'min_max', 'raw', 'robust', 'standard']
eval_songs_df = load_songs_data()
genres_columns = get_genre_columns()

## Calculate Precision for each Track

### Unimodal

In [3]:
def calculate_unimodal_precision_dfs(feature_name: str = 'lyrics') -> Dict[str, pd.DataFrame]:
    unimodal_precalculated_scores = dict()
    precision_dfs = dict()

    for norm_name in tqdm(norm_names, desc=f'Loading {feature_name} Precalculated scores'):
        unimodal_precalculated_scores[norm_name]= pd.read_parquet(f'RetrievalAlgorithm/results/unimodal/{norm_name}/unimodal_{norm_name}_{feature_name}_similarity_scores.parquet')

    print(f'Calculating {feature_name} Precision for each query ...')

    for norm_name, precalculated_scores_df in unimodal_precalculated_scores.items():
        print('='*100)
        print('Normalization Name:', norm_name, '\n')

        precision_dfs[norm_name] = get_eval_metrics_for_each_query(
            scores_df=precalculated_scores_df,
            k_range=[5, 10, 20, 50, 100, 200],
            genres_columns=genres_columns,
            eval_songs_df=eval_songs_df,
            metric_at_k=precision_at_k
        )
    return precision_dfs

#### Lyrics (BERT embeddings)

In [4]:
lyrics_precision_dfs = calculate_unimodal_precision_dfs(feature_name='lyrics')

target_dir = 'RetrievalAlgorithm/results/unimodal'
os.makedirs(target_dir, exist_ok=True)

for norm_name, feature_name, sim_scores_df in tqdm(lyrics_precision_dfs, desc='Saving Lyrics Precision scores'):
    output_path = os.path.join(target_dir, norm_name)
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'unimodal_{norm_name}_{feature_name}_precision.parquet')
    sim_scores_df.to_parquet(output_path, index=False)
del lyrics_precision_dfs

Loading lyrics Precalculated scores: 100%|██████████| 5/5 [00:08<00:00,  1.75s/it]


Calculating lyrics Precision for each query ...
Normalization Name: max_abs 



[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    6.0s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    9.0s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:   13.4s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   16.8s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   23.6s
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   28.0s
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:   33.7s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:   40.5s
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:   48.9s
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:   56.0s
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:  1.6min
[Parallel(

KeyboardInterrupt: 

#### Audio (MFCC)

In [None]:
audio_precision_dfs = calculate_unimodal_precision_dfs(feature_name='audio')

target_dir = 'RetrievalAlgorithm/results/unimodal'
os.makedirs(target_dir, exist_ok=True)

for norm_name, feature_name, sim_scores_df in tqdm(audio_precision_dfs, desc='Saving Audio Precision scores'):
    output_path = os.path.join(target_dir, norm_name)
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'unimodal_{norm_name}_{feature_name}_precision.parquet')
    sim_scores_df.to_parquet(output_path, index=False)
del audio_precision_dfs

#### Videoclip (VGG19)

In [None]:
video_precision_dfs = calculate_unimodal_precision_dfs(feature_name='video')

target_dir = 'RetrievalAlgorithm/results/unimodal'
os.makedirs(target_dir, exist_ok=True)

for norm_name, feature_name, sim_scores_df in tqdm(video_precision_dfs, desc='Saving Video Precision scores'):
    output_path = os.path.join(target_dir, norm_name)
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'unimodal_{norm_name}_{feature_name}_precision.parquet')
    sim_scores_df.to_parquet(output_path, index=False)
del video_precision_dfs

### Multimodal (Early Fusion)

In [None]:
def calculate_multimodal_precision_dfs(feature_name: str = 'audio_videoclips') -> Dict[str, pd.DataFrame]:
    multimodal_precalculated_scores = dict()
    precision_dfs = dict()

    for norm_name in tqdm(norm_names, desc=f'Loading {feature_name} Precalculated scores'):
        multimodal_precalculated_scores[norm_name]= pd.read_parquet(f'RetrievalAlgorithm/results/multimodal/early_fusion/{norm_name}/multimodal_{norm_name}_{feature_name}_similarity_scores.parquet')

    print(f'Calculating {feature_name} Precision for each query ...')

    for norm_name, precalculated_scores_df in multimodal_precalculated_scores.items():
        print('='*100)
        print('Normalization Name:', norm_name, '\n')

        precision_dfs[norm_name] = get_eval_metrics_for_each_query(
            scores_df=precalculated_scores_df,
            k_range=[5, 10, 20, 50, 100, 200],
            genres_columns=genres_columns,
            eval_songs_df=eval_songs_df,
            metric_at_k=precision_at_k
        )
    return precision_dfs

#### Audio + Videoclips

In [None]:
audio_video_precision_dfs = calculate_multimodal_precision_dfs(feature_name='audio_videoclips')

target_dir = 'RetrievalAlgorithm/results/multimodal/early_fusion'
os.makedirs(target_dir, exist_ok=True)

for norm_name, feature_name, sim_scores_df in tqdm(audio_video_precision_dfs, desc='Saving Audio+Video Precision scores'):
    output_path = os.path.join(target_dir, norm_name)
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'multimodal_{norm_name}_{feature_name}_precision.parquet')
    sim_scores_df.to_parquet(output_path, index=False)
del audio_video_precision_dfs

#### Lyrics + Audio

In [None]:
lyrics_audio_precision_dfs = calculate_multimodal_precision_dfs(feature_name='lyrics_audio')

target_dir = 'RetrievalAlgorithm/results/multimodal/early_fusion'
os.makedirs(target_dir, exist_ok=True)

for norm_name, feature_name, sim_scores_df in tqdm(lyrics_audio_precision_dfs, desc='Saving Lyrics+Audio Precision scores'):
    output_path = os.path.join(target_dir, norm_name)
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'multimodal_{norm_name}_{feature_name}_precision.parquet')
    sim_scores_df.to_parquet(output_path, index=False)
del lyrics_audio_precision_dfs

#### Lyrics + Videoclips

In [None]:
lyrics_videoclips_precision_dfs = calculate_multimodal_precision_dfs(feature_name='lyrics_videoclips')

target_dir = 'RetrievalAlgorithm/results/multimodal/early_fusion'
os.makedirs(target_dir, exist_ok=True)

for norm_name, feature_name, sim_scores_df in tqdm(lyrics_videoclips_precision_dfs, desc='Saving Audio+Video Precision scores'):
    output_path = os.path.join(target_dir, norm_name)
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'multimodal_{norm_name}_{feature_name}_precision.parquet')
    sim_scores_df.to_parquet(output_path, index=False)
del lyrics_videoclips_precision_dfs

#### Lyrics + Audio + Videoclips

In [None]:
lyrics_audio_videoclips_precision_dfs = calculate_multimodal_precision_dfs(feature_name='lyrics_audio_videoclips')

target_dir = 'RetrievalAlgorithm/results/multimodal/early_fusion'
os.makedirs(target_dir, exist_ok=True)

for norm_name, feature_name, sim_scores_df in tqdm(lyrics_audio_videoclips_precision_dfs, desc='Saving Lyrics+Audio+Video Precision scores'):
    output_path = os.path.join(target_dir, norm_name)
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'multimodal_{norm_name}_{feature_name}_precision.parquet')
    sim_scores_df.to_parquet(output_path, index=False)
del lyrics_audio_videoclips_precision_dfs

### Multimodal (Late Fusion)

In [None]:
def calculate_multimodal_max_score_precision_dfs() -> Dict[str, pd.DataFrame]:
    multimodal_precalculated_scores = dict()
    precision_dfs = dict()

    for norm_name in tqdm(norm_names, desc=f'Loading Max Score Precalculated scores'):
        multimodal_precalculated_scores[norm_name]= pd.read_parquet(f'RetrievalAlgorithm/results/multimodal/late_fusion/max_score/multimodal_{norm_name}_max_similarity_scores.parquet')

    print(f'Calculating Max Score Precision for each query ...')

    for norm_name, precalculated_scores_df in multimodal_precalculated_scores.items():
        print('='*100)
        print('Normalization Name:', norm_name, '\n')

        precision_dfs[norm_name] = get_eval_metrics_for_each_query(
            scores_df=precalculated_scores_df,
            k_range=[5, 10, 20, 50, 100, 200],
            genres_columns=genres_columns,
            eval_songs_df=eval_songs_df,
            metric_at_k=precision_at_k
        )
    return precision_dfs

#### Max Score

In [None]:
max_scores_precision_dfs = calculate_multimodal_max_score_precision_dfs()

target_dir = 'RetrievalAlgorithm/results/multimodal/late_fusion/max_score'
os.makedirs(target_dir, exist_ok=True)

for norm_name, sim_scores_df in tqdm(max_scores_precision_dfs, desc='Saving precision scores'):
    os.makedirs(target_dir, exist_ok=True)
    output_path = os.path.join(target_dir, f'multimodal_{norm_name}_max_precision.parquet')
    sim_scores_df.to_parquet(output_path, index=False)
del max_scores_precision_dfs