In [1]:
from IPython.display import HTML, Video, clear_output, Audio

import librosa
import librosa.feature
import librosa.display

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation 

import numpy as np
from scipy import signal
from sklearn.mixture import GaussianMixture
import sklearn
from sklearn.metrics.pairwise import cosine_similarity


import pandas as pd
import os

# plt.style.use("classic")
# plt.style.use("bmh")
plt.rcParams['figure.dpi'] = 100
plt.rcParams['axes.grid'] = True



In [2]:
general_path = '/home/mike/Docs/Datasets/GTZAN/Data'
print(list(os.listdir(f'{general_path}/genres_original/')))

['country', 'reggae', 'rock', 'hiphop', 'pop', 'metal', 'blues', 'classical', 'jazz', 'disco']


In [3]:
df = pd.read_csv(f'{general_path}/features_30_sec.csv', index_col='filename')

labels = df[['label']]

df = df.drop(columns=['length','label'])
df

scaled=sklearn.preprocessing.scale(df)
print('Scaled data type:', type(scaled))

Scaled data type: <class 'numpy.ndarray'>


In [4]:
similarity = cosine_similarity(scaled)
print("Similarity shape:", similarity.shape)

sim_df_labels = pd.DataFrame(similarity)
sim_df_names = sim_df_labels.set_index(labels.index)
sim_df_names.columns = labels.index

sim_df_names

Similarity shape: (1000, 1000)


filename,blues.00000.wav,blues.00001.wav,blues.00002.wav,blues.00003.wav,blues.00004.wav,blues.00005.wav,blues.00006.wav,blues.00007.wav,blues.00008.wav,blues.00009.wav,...,rock.00090.wav,rock.00091.wav,rock.00092.wav,rock.00093.wav,rock.00094.wav,rock.00095.wav,rock.00096.wav,rock.00097.wav,rock.00098.wav,rock.00099.wav
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
blues.00000.wav,1.000000,0.049231,0.589618,0.284862,0.025561,-0.346688,-0.219483,-0.167626,0.641877,-0.097889,...,-0.082829,0.546169,0.578558,0.662590,0.571629,0.610942,0.640835,0.496294,0.284958,0.304098
blues.00001.wav,0.049231,1.000000,-0.096834,0.520903,0.080749,0.307856,0.318286,0.415258,0.120649,0.404168,...,-0.098111,-0.325126,-0.370792,-0.191698,-0.330834,-0.077301,-0.222119,-0.302573,0.499562,0.311723
blues.00002.wav,0.589618,-0.096834,1.000000,0.210411,0.400266,-0.082019,-0.028061,0.104446,0.468113,-0.132532,...,-0.032408,0.561074,0.590779,0.583293,0.514537,0.495707,0.566837,0.589983,0.216378,0.321069
blues.00003.wav,0.284862,0.520903,0.210411,1.000000,0.126437,0.134796,0.300746,0.324566,0.352758,0.295184,...,-0.320107,-0.206516,-0.151132,0.041986,-0.172515,-0.000287,0.020515,-0.107821,0.502279,0.183210
blues.00004.wav,0.025561,0.080749,0.400266,0.126437,1.000000,0.556066,0.482195,0.623455,0.029703,0.471657,...,0.087605,0.017366,0.138035,0.104684,-0.034594,0.063454,0.063546,0.172944,0.153192,0.061785
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
rock.00095.wav,0.610942,-0.077301,0.495707,-0.000287,0.063454,-0.231012,-0.251086,-0.180628,0.619450,-0.258421,...,0.023926,0.843727,0.851726,0.860594,0.851456,1.000000,0.902064,0.865241,0.349870,0.522483
rock.00096.wav,0.640835,-0.222119,0.566837,0.020515,0.063546,-0.272209,-0.291506,-0.256171,0.612170,-0.304636,...,-0.033826,0.871226,0.882825,0.880362,0.863062,0.902064,1.000000,0.902071,0.337834,0.472331
rock.00097.wav,0.496294,-0.302573,0.589983,-0.107821,0.172944,-0.175960,-0.203014,-0.155732,0.461320,-0.330066,...,-0.008336,0.914170,0.926558,0.897654,0.878851,0.865241,0.902071,1.000000,0.287157,0.415173
rock.00098.wav,0.284958,0.499562,0.216378,0.502279,0.153192,0.176351,0.252338,0.239441,0.454673,0.229392,...,-0.233309,0.117855,0.161883,0.327933,0.157977,0.349870,0.337834,0.287157,1.000000,0.589041


In [5]:
def find_similar_songs(name):
    series = sim_df_names[name].sort_values(ascending = False)
    series = series.drop(name)
    print("\n*******\nSimilar songs to ", name)
    print(series.head(5))

In [6]:
def find_opposite_songs(name):
    series = sim_df_names[name].sort_values(ascending = True)
    series = series.drop(name)
    print("\n*******\nSimilar songs to ", name)
    print(series.head(5))

In [7]:
find_similar_songs('rock.00011.wav')


*******
Similar songs to  rock.00011.wav
filename
country.00054.wav    0.600560
jazz.00095.wav       0.593636
jazz.00027.wav       0.554560
jazz.00038.wav       0.544000
jazz.00015.wav       0.530824
Name: rock.00011.wav, dtype: float64


In [8]:
Audio(f'{general_path}/genres_original/rock/rock.00011.wav')

In [9]:
Audio(f'{general_path}/genres_original/country/country.00054.wav')

In [10]:
Audio(f'{general_path}/genres_original/jazz/jazz.00095.wav')

In [11]:
find_opposite_songs('rock.00011.wav')


*******
Similar songs to  rock.00011.wav
filename
reggae.00075.wav   -0.608671
pop.00031.wav      -0.596916
pop.00020.wav      -0.576376
pop.00030.wav      -0.559930
pop.00021.wav      -0.558826
Name: rock.00011.wav, dtype: float64


In [12]:
Audio(f'{general_path}/genres_original/pop/pop.00031.wav')