## Timbre checks
Check the CLMR predictions by listening

In [None]:
import librosa
import pandas as pd
import os
from IPython.display import display, Audio

# In the preprocess module, I've declared most of my functions.
import preprocess
import importlib
importlib.reload(preprocess)

pd.set_option('display.float_format', lambda x: f'{x:.3f}')

In [None]:
# Define filepaths
metadata_filepath = '../dataset/SpotifyAudioFeaturesApril2019_scraped_selection.csv'
mp3_folder = '../user_evaluation_app/static/mp3_previews'
mp3_files = [os.path.join(mp3_folder, file) for file in os.listdir(mp3_folder) if file.endswith('.mp3')]
data_folder = 'clmr_predictions'

# For testing purposes, optionally limit the number of files.
# mp3_files = mp3_files[:200]

In [None]:
clmr_predictions = preprocess.load_dataframe_if_exists_else_process_and_save(data_folder, mp3_files, preprocess.process_audio_files_CLMR, preprocess.save_dataframe_to_csv)
clmr_predictions

In [None]:
# Load metadata
metadata = pd.read_csv(metadata_filepath)

In [None]:
def display_audio(id, plot = True, metadata = metadata, mp3_folder = mp3_folder, clmr_predictions = clmr_predictions):
    song_metadata = metadata[metadata['track_id'] == id]
    artist_name = song_metadata.iloc[0]['artist_name']
    track_name = song_metadata.iloc[0]['track_name']
    print(f"Artist: {artist_name}, Track: {track_name} \n ID: {id}")
    y, sr = librosa.load(f'{mp3_folder}/{id}.mp3')
    display(Audio(data=y, rate=sr))
    if plot:
        preprocess.plot_predictions(clmr_predictions.loc[id])
    return None

In [None]:
# Sanity check. Listen to the music and see if the predictions make sense.
id = '4WxkqmbuOZD9xruYGp85rp'
display_audio(id)

In [None]:
# Compute similarity matrix
similarity_df = preprocess.compute_similarity_matrix(clmr_predictions, clmr_predictions.index, clmr_predictions.columns)

In [None]:
def print_most_similar(target_id, nearest_neighbors, plot = False, metadata = metadata, mp3_folder=mp3_folder):
    display_audio(target_id, plot, metadata, mp3_folder)
    counter = 0
    for neighbor_id, score in nearest_neighbors:
        counter += 1
        print(f"{counter}. Similarity: {score:.2f}")
        display_audio(neighbor_id, plot)
    return None

In [None]:
target_id = '0dCXn1KGyWYe1d94pNb4Cd'
n_neighbors = 3
nearest_neighbors = preprocess.find_most_similar(similarity_df , target_id, n_neighbors)
print_most_similar(target_id, nearest_neighbors)