# Evaluation (Precision)

In [1]:
import os, sys
import pandas as pd
from tqdm import tqdm
from typing import Dict

target_folder = 'MMSR25-26-Group-E'
current_path = os.getcwd()

while os.path.basename(current_path) != target_folder:
    if os.path.basename(current_path) == 'RetrievalAlgorithm':
        if os.path.join(current_path) not in sys.path:
            sys.path.append(os.path.join(current_path))
    parent = os.path.dirname(current_path)
    os.chdir(parent)
    current_path = parent
print(current_path)


from RetrievalAlgorithm.src.utils.per_query_metrics_evaluation import get_eval_metrics_for_each_query
from EvaluationMetrics.src.data_loader import load_songs_data, get_genre_columns
from EvaluationMetrics.src.metrics import precision_at_k

D:\University\7th_Semester\multimedia_search_and_retrieval\MMSR25-26-Group-E


In [2]:
norm_names = ['max_abs', 'min_max', 'raw', 'robust', 'standard']
eval_songs_df = load_songs_data()
genres_columns = get_genre_columns()

## Calculate Precision for each Track

### Unimodal

In [3]:
def calculate_and_save_unimodal_precision(feature_name: str = 'lyrics',
                                          target_dir: str = 'RetrievalAlgorithm/results/unimodal') -> None:
    os.makedirs(target_dir, exist_ok=True)
    for norm_name in norm_names:
        output_norm_dir = os.path.join(target_dir, norm_name)
        output_path = os.path.join(output_norm_dir, f'unimodal_{norm_name}_{feature_name}_precision.parquet')
        if os.path.exists(output_path):
            print(f'Skipping {norm_name}')
            continue

        scores_path = f'{target_dir}/{norm_name}/unimodal_{norm_name}_{feature_name}_similarity_scores.parquet'
        precalculated_scores_df = pd.read_parquet(scores_path)

        print('='*100)
        print('Normalization Name:', norm_name, '\n')

        precision_df = get_eval_metrics_for_each_query(
            scores_df=precalculated_scores_df,
            k_range=[5, 10, 20, 50, 100, 200],
            genres_columns=genres_columns,
            eval_songs_df=eval_songs_df,
            metric_at_k=precision_at_k
        )
        os.makedirs(output_norm_dir, exist_ok=True)

        precision_df.to_parquet(output_path, index=False)
        del precision_df


#### Lyrics (BERT embeddings)

In [4]:
calculate_and_save_unimodal_precision(
    feature_name='lyrics',
    target_dir='RetrievalAlgorithm/results/unimodal'
)

Skipping max_abs
Skipping min_max
Skipping raw
Skipping robust
Skipping standard


#### Audio (MFCC)

In [5]:
calculate_and_save_unimodal_precision(
    feature_name='audio',
    target_dir='RetrievalAlgorithm/results/unimodal'
)

Skipping max_abs
Skipping min_max
Skipping raw
Skipping robust
Skipping standard


#### Videoclip (VGG19)

In [6]:
calculate_and_save_unimodal_precision(
    feature_name='video',
    target_dir='RetrievalAlgorithm/results/unimodal'
)

Skipping max_abs
Skipping min_max
Skipping raw
Skipping robust
Skipping standard


### Multimodal (Early Fusion)

In [7]:
import os
from tqdm import tqdm

def calculate_and_save_multimodal_precision(
        feature_name: str = 'audio_videoclips',
        target_dir: str = 'RetrievalAlgorithm/results/multimodal/early_fusion'
) -> None:
    os.makedirs(target_dir, exist_ok=True)

    for norm_name in norm_names:
        output_norm_dir = os.path.join(target_dir, norm_name)
        output_path = os.path.join(output_norm_dir, f'multimodal_{norm_name}_{feature_name}_precision.parquet')
        if os.path.exists(output_path):
            print(f'Skipping {norm_name}')
            continue

        scores_path = os.path.join(target_dir, norm_name, f'multimodal_{norm_name}_{feature_name}_similarity_scores.parquet')
        precalculated_scores_df = pd.read_parquet(scores_path)

        print('='*100)
        print('Normalization Name:', norm_name, '\n')

        precision_df = get_eval_metrics_for_each_query(
            scores_df=precalculated_scores_df,
            k_range=[5, 10, 20, 50, 100, 200],
            genres_columns=genres_columns,
            eval_songs_df=eval_songs_df,
            metric_at_k=precision_at_k
        )
        os.makedirs(output_norm_dir, exist_ok=True)

        precision_df.to_parquet(output_path, index=False)
        del precision_df, precalculated_scores_df

#### Audio + Videoclips

In [8]:
calculate_and_save_multimodal_precision(
    feature_name='audio_videoclips',
    target_dir='RetrievalAlgorithm/results/multimodal/early_fusion'
)

Normalization Name: max_abs 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:41<00:00, 25.71it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:37<00:00, 26.26it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:51<00:00, 24.17it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:16<00:00, 21.12it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:15<00:00, 16.24it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:51<00:00, 10.08it/s]


Normalization Name: min_max 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:41<00:00, 25.75it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:37<00:00, 26.31it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:50<00:00, 24.34it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:15<00:00, 21.21it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:14<00:00, 16.32it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:53<00:00, 10.04it/s]


Normalization Name: raw 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:41<00:00, 25.72it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:40<00:00, 25.85it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:51<00:00, 24.22it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:16<00:00, 21.07it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:14<00:00, 16.31it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:52<00:00, 10.06it/s]


Normalization Name: robust 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:40<00:00, 25.79it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:36<00:00, 26.43it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:52<00:00, 24.07it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:15<00:00, 21.27it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:12<00:00, 16.40it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:51<00:00, 10.09it/s]


Normalization Name: standard 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:39<00:00, 25.93it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:39<00:00, 26.01it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:54<00:00, 23.76it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:16<00:00, 21.08it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:17<00:00, 16.11it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:48<00:00, 10.16it/s]


#### Lyrics + Audio

In [9]:
calculate_and_save_multimodal_precision(
    feature_name='lyrics_audio',
    target_dir='RetrievalAlgorithm/results/multimodal/early_fusion'
)

Normalization Name: max_abs 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:39<00:00, 25.99it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:39<00:00, 26.00it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:54<00:00, 23.71it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:20<00:00, 20.69it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:14<00:00, 16.29it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:52<00:00, 10.06it/s]


Normalization Name: min_max 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:41<00:00, 25.76it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:43<00:00, 25.37it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:54<00:00, 23.76it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:19<00:00, 20.80it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:18<00:00, 16.07it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:53<00:00, 10.02it/s]


Normalization Name: raw 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:39<00:00, 25.99it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:38<00:00, 26.22it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:51<00:00, 24.22it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:15<00:00, 21.18it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:15<00:00, 16.25it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:48<00:00, 10.16it/s]


Normalization Name: robust 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:39<00:00, 25.96it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:37<00:00, 26.28it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:49<00:00, 24.43it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:14<00:00, 21.33it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:12<00:00, 16.40it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:51<00:00, 10.08it/s]


Normalization Name: standard 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:40<00:00, 25.91it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:38<00:00, 26.19it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:49<00:00, 24.51it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:21<00:00, 20.58it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:08<00:00, 16.69it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:59<00:00,  9.88it/s]


#### Lyrics + Videoclips

In [10]:
calculate_and_save_multimodal_precision(
    feature_name='lyrics_videoclips',
    target_dir='RetrievalAlgorithm/results/multimodal/early_fusion'
)

Normalization Name: max_abs 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:23<00:00, 28.94it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:26<00:00, 28.27it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:56<00:00, 23.54it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:15<00:00, 21.17it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:15<00:00, 16.26it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:52<00:00, 10.05it/s]


Normalization Name: min_max 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:40<00:00, 25.86it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:39<00:00, 26.07it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:51<00:00, 24.22it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:18<00:00, 20.85it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:16<00:00, 16.17it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:53<00:00, 10.04it/s]


Normalization Name: raw 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:39<00:00, 25.93it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:39<00:00, 26.01it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:50<00:00, 24.33it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:15<00:00, 21.26it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:15<00:00, 16.26it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:50<00:00, 10.12it/s]


Normalization Name: robust 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:40<00:00, 25.91it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:39<00:00, 26.06it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:51<00:00, 24.24it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:15<00:00, 21.26it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:17<00:00, 16.09it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [07:03<00:00,  9.79it/s]


Normalization Name: standard 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:40<00:00, 25.87it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:40<00:00, 25.79it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:50<00:00, 24.35it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:15<00:00, 21.20it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:18<00:00, 16.04it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:53<00:00, 10.03it/s]


#### Lyrics + Audio + Videoclips

In [11]:
calculate_and_save_multimodal_precision(
    feature_name='lyrics_audio_videoclips',
    target_dir='RetrievalAlgorithm/results/multimodal/early_fusion'
)

Normalization Name: max_abs 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:40<00:00, 25.80it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:39<00:00, 25.98it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:47<00:00, 24.71it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [02:58<00:00, 23.19it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:16<00:00, 16.17it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:53<00:00, 10.02it/s]


Normalization Name: min_max 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [03:30<00:00, 19.73it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [03:22<00:00, 20.50it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [03:09<00:00, 21.85it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:17<00:00, 20.99it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:22<00:00, 15.80it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [07:14<00:00,  9.54it/s]


Normalization Name: raw 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:40<00:00, 25.83it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:40<00:00, 25.84it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:49<00:00, 24.44it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:18<00:00, 20.95it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:17<00:00, 16.11it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:55<00:00,  9.98it/s]


Normalization Name: robust 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:40<00:00, 25.84it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:40<00:00, 25.92it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:52<00:00, 24.00it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:16<00:00, 21.06it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:18<00:00, 16.06it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:56<00:00,  9.96it/s]


Normalization Name: standard 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:40<00:00, 25.83it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:38<00:00, 26.10it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:50<00:00, 24.35it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [02:58<00:00, 23.30it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:16<00:00, 16.16it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:57<00:00,  9.93it/s]


### Multimodal (Late Fusion)

In [12]:
def calculate_multimodal_max_score_precision_dfs() -> Dict[str, pd.DataFrame]:
    multimodal_precalculated_scores = dict()
    precision_dfs = dict()

    for norm_name in tqdm(norm_names, desc=f'Loading Max Score Precalculated scores'):
        multimodal_precalculated_scores[norm_name]= pd.read_parquet(f'RetrievalAlgorithm/results/multimodal/late_fusion/max_score/multimodal_{norm_name}_max_similarity_scores.parquet')

    print(f'Calculating Max Score Precision for each query ...')

    for norm_name, precalculated_scores_df in multimodal_precalculated_scores.items():
        if os.path.exists(os.path.join('RetrievalAlgorithm/results/multimodal/late_fusion/max_score/',f'multimodal_{norm_name}_max_precision.parquet')):
            print(f'Skipping {norm_name}')
            continue

        print('='*100)
        print('Normalization Name:', norm_name, '\n')

        precision_dfs[norm_name] = get_eval_metrics_for_each_query(
            scores_df=precalculated_scores_df,
            k_range=[5, 10, 20, 50, 100, 200],
            genres_columns=genres_columns,
            eval_songs_df=eval_songs_df,
            metric_at_k=precision_at_k
        )
    return precision_dfs

#### Max Score

In [14]:
max_scores_precision_dfs = calculate_multimodal_max_score_precision_dfs()

Loading Max Score Precalculated scores: 100%|██████████| 5/5 [00:09<00:00,  1.80s/it]


Calculating Max Score Precision for each query ...
Normalization Name: max_abs 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:38<00:00, 26.13it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:34<00:00, 26.86it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:50<00:00, 24.32it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:19<00:00, 20.79it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:19<00:00, 15.99it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [07:11<00:00,  9.61it/s]


Normalization Name: min_max 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:22<00:00, 29.10it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:45<00:00, 25.06it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:56<00:00, 23.56it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:19<00:00, 20.82it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:15<00:00, 16.22it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:44<00:00, 10.26it/s]


Normalization Name: raw 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:43<00:00, 25.35it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:43<00:00, 25.33it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:55<00:00, 23.69it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:20<00:00, 20.68it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:14<00:00, 16.32it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:46<00:00, 10.19it/s]


Normalization Name: robust 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:44<00:00, 25.22it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:43<00:00, 25.32it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:54<00:00, 23.71it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:19<00:00, 20.75it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:16<00:00, 16.17it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:52<00:00, 10.06it/s]


Normalization Name: standard 

----------------------------------------------------------------------------------------------------
k = 5


Evaluated Tracks at k=5: 100%|██████████| 4148/4148 [02:44<00:00, 25.15it/s]


----------------------------------------------------------------------------------------------------
k = 10


Evaluated Tracks at k=10: 100%|██████████| 4148/4148 [02:44<00:00, 25.28it/s]


----------------------------------------------------------------------------------------------------
k = 20


Evaluated Tracks at k=20: 100%|██████████| 4148/4148 [02:55<00:00, 23.64it/s]


----------------------------------------------------------------------------------------------------
k = 50


Evaluated Tracks at k=50: 100%|██████████| 4148/4148 [03:21<00:00, 20.64it/s]


----------------------------------------------------------------------------------------------------
k = 100


Evaluated Tracks at k=100: 100%|██████████| 4148/4148 [04:16<00:00, 16.15it/s]


----------------------------------------------------------------------------------------------------
k = 200


Evaluated Tracks at k=200: 100%|██████████| 4148/4148 [06:46<00:00, 10.20it/s]


In [None]:
target_dir = 'RetrievalAlgorithm/results/multimodal/late_fusion/max_score'
os.makedirs(target_dir, exist_ok=True)

for norm_name, sim_scores_df in tqdm(max_scores_precision_dfs.items(), desc='Saving precision scores'):
    os.makedirs(target_dir, exist_ok=True)
    output_path = os.path.join(target_dir, f'multimodal_{norm_name}_max_precision.parquet')
    sim_scores_df.to_parquet(output_path, index=False)