In [165]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from main.choices import genre_weightage, tags_weightage, instrument_weightage, feature_weights

In [175]:
def get_similar_songs_id(id):
    df = pd.read_csv('data/songs_db.csv')
    df_features = pd.read_csv('data/song_dataset_final.csv')
    merged_df = pd.merge(df, df_features, on='song name', how='inner')

    genre_columns = [col for col in merged_df.columns if col.startswith("Genre")]
    tag_columns = [col for col in merged_df.columns if col.startswith("Tag")]
    instrument_columns = [col for col in merged_df.columns if col.startswith("Instrument")]

    X = merged_df.iloc[:, 3:-2].copy()
    minmax = MinMaxScaler(feature_range=(0,1))
    X.iloc[:,:13] = minmax.fit_transform(X.iloc[:,:13])

    X['voice_male'] = (1- X['voice_female']) * X['overall_voice']
    X['voice_female'] = X['voice_female'] * X['overall_voice']

    for key in feature_weights:
        X[key] = X[key] * feature_weights[key]
    for col in genre_columns:
        X[col] = X[col] * genre_weightage
    for col in instrument_columns:
        X[col] = X[col] * instrument_weightage
    for col in tag_columns:
        X[col] = X[col] * tags_weightage

    df_cosine=pd.DataFrame(cosine_similarity(X, dense_output=True))
    indices = pd.Series(merged_df.index, index = merged_df['ID'])

    index = indices[id]
    similarity_scores = list(enumerate(df_cosine[index]))
    similarity_scores = sorted(similarity_scores, key = lambda x: x[1],reverse = True)
    similarity_scores = similarity_scores[1:16]
    res_indices = [i[0] for i in similarity_scores]
    return merged_df['ID'].iloc[res_indices[:]].to_list()

# Breakdown

In [167]:
df = pd.read_csv('data/songs_db.csv')
df_features = pd.read_csv('data/song_dataset_final.csv')
merged_df = pd.merge(df, df_features, on='song name', how='inner')

In [168]:
genre_columns = [col for col in merged_df.columns if col.startswith("Genre")]
tag_columns = [col for col in merged_df.columns if col.startswith("Tag")]
instrument_columns = [col for col in merged_df.columns if col.startswith("Instrument")]

In [169]:
X = merged_df.iloc[:, 3:-2].copy()
minmax = MinMaxScaler(feature_range=(0,1))
X.iloc[:,:13] = minmax.fit_transform(X.iloc[:,:13])

In [170]:
X['voice_male'] = (1- X['voice_female']) * X['overall_voice']
X['voice_female'] = X['voice_female'] * X['overall_voice']

In [171]:
for key in feature_weights:
    X[key] = X[key] * feature_weights[key]
for col in genre_columns:
    X[col] = X[col] * genre_weightage
for col in instrument_columns:
    X[col] = X[col] * instrument_weightage
for col in tag_columns:
    X[col] = X[col] * tags_weightage

In [162]:
df_cosine=pd.DataFrame(cosine_similarity(X, dense_output=True))
indices = pd.Series(merged_df.index, index = merged_df['ID'])

In [173]:
def get_similar_songs_id(id):
    index = indices[id]
    similarity_scores = list(enumerate(df_cosine[index]))
    similarity_scores = sorted(similarity_scores, key = lambda x: x[1],reverse = True)
    similarity_scores = similarity_scores[1:16]
    res_indices = [i[0] for i in similarity_scores]
    return merged_df['song name'].iloc[res_indices[:]].to_list()

In [174]:
get_similar_songs_id(706)

['Helig - John Lunn, Eivør',
 'Destiny Is All - John Lunn, Eivør',
 'My England - John Lunn, Eivør',
 'Space Song (Beach House Cover) - Alles Club',
 'Goodbye Brother - Ramin Djawadi',
 'Lívstræðrir - John Lunn, Eivør',
 'These Are My Feelings - MrFIRA',
 'Goodbye - Apparat',
 'Chop Shop - Tyler Bates, Joel J Richard',
 'No Time for Caution - Hans Zimmer',
 "Don't Let Me Go - Cigarettes After Sex",
 'Mehabooba - Ananya Bhat',
 'Apocalypse - Cigarettes After Sex',
 'Time - Hans Zimmer',
 'Nee Himamazhayayi - Kailas Menon, K S Hari Shankar, Nithya Mammen']

# Testing individual features

In [141]:
merged_df.sort_values('voice_male', ascending=False)[['song name', 'voice_male']][:15]

Unnamed: 0,song name,voice_male
123,Oru Dinam - Anand Bhaskar,0.944735
31,"Despacito - Luis Fonsi, Daddy Yankee",0.927577
84,Lungi Dance - Yo Yo Honey Singh,0.845536
159,Take Me To Church - Hozier,0.84067
145,"Sanam Re - Arijit Singh, Mithoon",0.836867
2,"Ae Ajnabi - Udit Narayan, Mahalakshmi Iyer",0.824657
0,"1, 2, 3 (feat Jason Derulo & De La Ghetto) - S...",0.820218
38,Dura - Daddy Yankee,0.804959
125,Oru Vakkum Mindathe - Vineeth Sreenivasan,0.801271
82,Let Her Go - Passenger,0.785261


In [142]:
merged_df.sort_values('voice_male', ascending=True)[['song name', 'voice_male']][:15]

Unnamed: 0,song name,voice_male
169,Trust Each Other - Ramin Djawadi,0.0
107,"My England - John Lunn, Eivør",0.0
167,Time - Hans Zimmer,0.004259
156,Still DRE (Instrumental) - Dr Dre,0.021924
53,HITMAN 2 Soundtrack - Results Screen,0.030313
26,"Chop Shop - Tyler Bates, Joel J Richard",0.040529
51,Goodbye Brother - Ramin Djawadi,0.042135
116,No Time for Caution - Hans Zimmer,0.059489
64,I Don't Do Personals - Ramin Djawadi,0.066082
19,Can We Kiss Forever (Instrumental) - Kinà,0.068232
