# CSE 144 Group 3
## Music Recommendation System (MRS)

In this notebook, we write the predictive model for our music recommendation system. Our work leverages modern tools including recurrent neural networks (RNN) and BERT sentence transformers...

<br>

Our work leverages this RNN model:

https://github.com/taylorhawks/RNN-music-recommender/blob/master/cloud/model.ipynb


In [None]:
# import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
%config InlineBackend.figure_format="retina"
import numpy as np
import random
import torch
import os
# from torch import nn, optim
# import math
# from IPython import display
# import torchvision.datasets as datasets
# import torchvision.transforms as transforms
# from torch.utils.data import TensorDataset
# import torch.nn.functional as F
# from sklearn.preprocessing import MinMaxScaler
# import pdb
import plotly.graph_objects as go
import numpy as np

from skimage.util.shape import view_as_windows as viewW
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.decomposition import PCA

# import tensorflow as tf

# import keras.backend as K
from keras.models import Sequential, load_model
# from keras.optimizers import RMSprop
from keras.layers import Dense, SimpleRNN, Input
from keras.losses import *


### Load the data

In [None]:
song_features_data = pd.read_csv('misc/processed_music_info_extended.csv')
user_listening_data = pd.read_csv('misc/processed_user_listening_hist.csv')

# from google.colab import drive
# drive.mount('/content/drive')
# import pandas as pd
# song_features_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/music_info.csv')
# user_listening_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/user_listening_hist.csv')

### Set Random Seed

In [None]:
torch.manual_seed(24)

### Read and Display Data

In [None]:
print('# of rows of Song Data: ' + str(len(song_features_data)))
print('# of unique songs: ' + str(len(song_features_data['track_id'].unique())))
song_features_data.head()

In [None]:
print('# of rows of User Listening Data: ' + str(len(user_listening_data)))
print('# of unique users: ' + str(len(user_listening_data['user_id'].unique())))
user_listening_data.head()

### Data Preprocessing


In [None]:
# Drop unnecessary columns
song_features_data = song_features_data.drop(columns=['year', 'time_signature', 'key'])

In [None]:
# Convert song duration from milliseconds to minutes
song_features_data["duration_mins"] = song_features_data["duration_ms"] / 60000
song_features_data.drop("duration_ms", axis=1, inplace=True)


song_features_data.info()

In [None]:
data = pd.merge(song_features_data, user_listening_data, on='track_id')
data.head()

### Obtain total number of listens per song

In [None]:
play_counts = data.groupby('name')['playcount'].sum().reset_index()
play_counts

### Create playlists for input to RNN

In [None]:
data = data.sort_values(['user_id'])
data

In [None]:
# Changed name to track_id
playlists = data.groupby('user_id')['track_id'].apply(lambda x: list(x.head(20)))
playlist_dict = playlists.to_dict()
print(playlists)

In [None]:
# Changed track_id to name
data_dict = data.drop(['artist', 'tags', 'playcount'], axis=1)
# Changed name to track_id
data_dict = data_dict.set_index(['user_id', 'track_id']).to_dict('index')

In [None]:
songs_done = 0
updated_playlist_dict = {}
for user_id, songs in playlist_dict.items():
    updated_songs = []
    for song in songs:
        key = (user_id, song)
        if key in data_dict:
            the_features = list(data_dict[key].values())
            updated_songs.append([song] + the_features)
            songs_done += 1
            if songs_done % 10000 == 0:
                print(songs_done)
    updated_playlist_dict[user_id] = updated_songs

playlist_dict = updated_playlist_dict

In [None]:
arr = []
for user_id, playlist in playlist_dict.items():
    arr2 = []
    for song in playlist:
        arr2.append(np.concatenate((song[0:6], song[7:12])))
    arr.append(arr2)

arr_np = np.array(arr)

In [None]:
playlists = pd.DataFrame.from_dict(playlist_dict, orient='index')
playlists.head()

### Train and Test Split

In [None]:
# Train and test splits for playlist

X = arr_np[:,:-1,:]
Y = arr_np[:,1:,:]
x_train, x_val, y_train, y_val = train_test_split(X,Y,train_size=0.75,random_state=3000)
x_train, x_test, y_train, y_test = train_test_split(x_train,y_train,train_size=0.92,random_state=3000)


In [None]:
x_test = x_test.tolist()
y_test = y_test.tolist()


for i in range(len(x_test)):
    x_value = y_test[i][-1]
    y_value = x_test[i][0]
    x_test[i].append(x_value)
    y_test[i].insert(0, y_value)

In [None]:
# Original Playlists
ops_x_train, ops_y_train, ops_x_val, ops_y_val, ops_x_test, ops_y_test = [], [], [], [], [], []

# This only works based on size if val and test sets switch in size switch them in these loops
for user in range(np.ma.size(x_train, axis=0)):
    names_x_train, names_y_train, names_x_val, names_y_val, names_x_test, names_y_test = [], [], [], [], [], []
    for song in range(np.ma.size(x_train, axis=1)):
        names_x_train.append(x_train[user, song, 0:3])
        names_y_train.append(y_train[user, song, 0:3])
        try:
            names_x_val.append(x_val[user, song, 0:3])
            names_y_val.append(y_val[user, song, 0:3])
        except IndexError:
            continue
        # try:
        #     names_x_test.append(x_test[user, song, 0:3])
        #     names_y_test.append(y_test[user, song, 0:3])
        # except IndexError:
        #     continue

    ops_x_train.append(names_x_train)
    ops_y_train.append(names_y_train)
    if not names_x_val:
        continue
    ops_x_val.append(names_x_val)
    ops_y_val.append(names_y_val)
    if not names_x_test:
        continue
    # ops_x_test.append(names_x_test)
    # ops_y_test.append(names_y_test)
x_train = x_train[:, :, 3:].astype(np.float64)
y_train = y_train[:, :, 3:].astype(np.float64)
x_val = x_val[:, :, 3:].astype(np.float64)
y_val = y_val[:, :, 3:].astype(np.float64)
# x_test = x_test[:, :, 3:].astype(np.float64)
# y_test = y_test[:, :, 3:].astype(np.float64)

### Define the Model

In [None]:
if os.path.exists('misc/mae_optimized_model.keras'):
    print("using saved model")
    model = load_model('misc/mae_optimized_model.keras')
else:
    print("training model")
    model = Sequential()
    model.add(Input(shape=(None,8)))
    model.add(SimpleRNN(
        16,
        activation='linear',
        return_sequences=True,
        kernel_initializer='random_uniform',
    ))
    model.add(SimpleRNN(
        16,
        activation='linear',
        return_sequences=True,
        kernel_initializer='random_uniform',
    ))
    model.add(Dense(8, activation='linear', kernel_initializer='random_uniform',))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(torch.cuda.get_device_name(0))

    
    model.compile(loss='mae', optimizer='adam')
    model.fit(x_train, y_train, epochs=20, batch_size=32, validation_data=(x_val, y_val))
    model.save('misc/mae_optimized_model.keras')

In [None]:
mae_optimized_model_adam = model

In [None]:
def predict_sample(sample,model):
    return (model.predict(np.array([sample]))[0,-1])

In [None]:
np.save("misc/x_test", x_test)
np.save("misc/y_test", y_test)

### Run RNN

In [None]:
# print('Selecting a random index in our test dataset: ')
# # random_index = random.randint(0,len(x_test)-1)
# random_index = 2
# print(random_index)

# print('Input: ')
# print(x_test[random_index])

# print('\n','Output: ')
# predicted = predict_sample(x_test[random_index], mae_optimized_model_adam)
# print(predicted)

In [None]:
distance_frame = song_features_data.drop(['artist','tags','tempo','duration_mins','mode'], axis=1)
distance_frame.head()

In [None]:
distance_frame.drop_duplicates(subset='track_id', keep='first', inplace=True)
distance_frame.track_id.nunique()

In [None]:
distance_frame.to_csv("misc/distance_frame.csv")

In [None]:
distance_frame.head()

In [None]:
def get_distances(data, p_vector):
    names = data['name']
    data = data.drop(['name', 'spotify_id'], axis=1)
    distance_dict = data.set_index(['track_id']).to_dict('index')
    for key in distance_dict:
        distance_dict[key] = list(distance_dict[key].values())
    distance_dict = distance_calc(distance_dict, p_vector, names)
    return pd.DataFrame.from_dict(distance_dict, orient='index', columns=['id', 'distance'])

def distance_calc(dict, v1, name_list):
    distances = {}
    i = 0
    name_list = name_list.to_list()
    for id in dict.keys():
        v2 = dict[id]
        value = 0.0
        for n in range(len(v1)):
            value += np.linalg.norm(v1[n] - v2[n])
        distances[name_list[i]] = (id, value)
        i += 1
    return distances

distance_frame2 = get_distances(distance_frame, predicted)


In [None]:
POTENTIAL_N = 50 #defines size
def potential_songs(frame, n):
    temp = frame.nsmallest(n, columns='distance', keep='all')
    # print(temp)
    return temp

potential_songs_data = potential_songs(distance_frame2, POTENTIAL_N)
potential_songs_data

In [39]:
lyrics_embeddings_csv = pd.read_csv('misc/lyrics_embeddings.csv')
lyrics_embeddings_3d_csv = pd.read_csv('misc/lyrics_embeddings_3d.csv')

In [46]:
def get_embeddings(frame, frame3D):
    lyrics_embeddings = dict()
    lyrics_embeddings_3d  = dict()

    for i in range(len(frame)):
        lyrics_embeddings[frame.iloc[i, 0]] = frame.iloc[i, 1:].to_numpy()

    for i in range(len(frame3D)):
        lyrics_embeddings_3d[frame3D.iloc[i, 0]] = frame3D.iloc[i, 1:].to_numpy()

    return lyrics_embeddings, lyrics_embeddings_3d

lyrics_embeddings, lyrics_embeddings_3d = get_embeddings(lyrics_embeddings_csv, lyrics_embeddings_3d_csv) 


In [159]:
def get_candidates(original_playlist, index, p_songs):
    candidates = dict()
    rnn_track_ids = []
    for track in original_playlist[index]:
        rnn_track_ids.append(track[0])
        candidates[track[0]] = lyrics_embeddings_3d[track[0]]

    cutoff = len(candidates)

    for _, row in p_songs.head(100).iterrows():
        candidates[row['id']] = lyrics_embeddings_3d[row['id']]

    print(len(candidates))

    return candidates, rnn_track_ids, cutoff

# candidates, rnn_track_ids, cutoff = get_candidates(ops_x_test, random_index, potential_songs_data)

In [49]:
# For reducing dimensions of the embeddings
def reduce_dims(lyrics_embeddings):
    raw_embeddings = np.concatenate(list(lyrics_embeddings.values())).reshape(len(lyrics_embeddings), 768)
    track_ids = list(lyrics_embeddings.keys())
    dim_model = PCA(n_components=150, random_state=42)
    dim_model.fit(raw_embeddings)
    reduced_embeddings = dim_model.transform(raw_embeddings)
    reduced_embeddings_dict = {track_ids[i]: reduced_embeddings[i] for i in range(len(track_ids))}

    og_embeddings = np.array([reduced_embeddings_dict[track_id] for track_id in rnn_track_ids])

    return reduced_embeddings_dict, og_embeddings

reduced_embeddings_dict, og_embeddings = reduce_dims(lyrics_embeddings)

At this stage, we must compare the embeddings in the predicted list against those in the original input list and find the best candidates
### Cosine Similarity

In [50]:
def calc_cosine(reduced_embeddings_dict, potential_songs_data):
    similarities = list()

    for track_id in potential_songs_data['id']:

        candidate_embedding = reduced_embeddings_dict[track_id].reshape(1, -1)
        similarity = cosine_similarity(candidate_embedding, og_embeddings)
        similarities.append(np.mean(similarity))

    similarities = np.array(similarities)
    most_similar_indices = np.argsort(similarities)[::-1]
    selected_songs_cs = potential_songs_data.iloc[most_similar_indices[:10]]
    return selected_songs_cs

selected_songs_cs = calc_cosine(reduced_embeddings_dict, potential_songs_data)

### Pairwise Distances

In [51]:
def calc_pairwise(reduced_embeddings_dict, selected_songs_cs):
    candidate_embeddings = np.array([reduced_embeddings_dict[track_id] for track_id in selected_songs_cs['id']])

    distances = pairwise_distances(candidate_embeddings, og_embeddings, metric='euclidean')
    mean_distances = np.mean(distances, axis=1)
    closest_candidates_indices = np.argsort(mean_distances)[:10]
    selected_songs_pd = selected_songs_cs.iloc[closest_candidates_indices]

    print(closest_candidates_indices)
    return selected_songs_pd, closest_candidates_indices

selected_songs_pd, closest_candidates_indices = calc_pairwise(reduced_embeddings_dict, selected_songs_cs)
selected_songs_pd

[8 7 6 4 3 5 2 1 9 0]


Unnamed: 0,id,distance
Forever Walking Alone,TRLULEP128F4250885,0.344285
Treacherous,TRGCHDC12903CE8BA0,0.320569
Madder Red,TRAXEWG12903CB2CC3,0.443406
An Aptly Fictional Description,TRJASCD128F147C8FB,0.435235
Nothin' Like Tomorrow,TRJNUOP128F933196D,0.451129
You & A Promise,TRJFSED128F42A0BCA,0.357444
Burndt Jamb,TROPJNO128E079464E,0.363579
Mirror Remains,TRPOXMV128F428965C,0.36655
Rowing,TRHZZKT128F425372C,0.366589
Scary Kids Scaring Kids,TRGYPIW12903CDEC3F,0.439857


In [52]:
#Predicted Data
def get_recs(song_features_data, selected_songs_pd):
    return song_features_data[song_features_data['track_id'].isin(selected_songs_pd['id'])]

rec_songs = get_recs(song_features_data, selected_songs_pd)
rec_songs.head(10)

Unnamed: 0,track_id,name,artist,spotify_id,tags,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins
2844,TRAXEWG12903CB2CC3,Madder Red,Yeasayer,0m7SrrXFJ0DIueUB5FGGFZ,"electronic, indie, experimental, indie_rock, p...",0.495,0.833,-8.123,0,0.0361,0.00938,0.0132,0.142,0.256,81.975,4.061333
13467,TROPJNO128E079464E,Burndt Jamb,Weezer,1ytYTPuKbW0NIXb2kgmSVq,"rock, alternative, alternative_rock, punk, gui...",0.591,0.666,-8.062,0,0.0463,0.0012,0.000686,0.088,0.451,97.696,2.6432
16836,TRGCHDC12903CE8BA0,Treacherous,Taylor Swift,0XfOV7qY3834QpFVwOb6CC,"pop, female_vocalists, singer_songwriter, coun...",0.705,0.621,-8.086,1,0.0334,0.101,4e-06,0.147,0.395,109.993,4.012883
34517,TRLULEP128F4250885,Forever Walking Alone,Dragonland,3JVGJTh5rhfSHVsdncD8pY,"power_metal, symphonic_metal",0.514,0.676,-7.965,0,0.0309,0.0505,0.000784,0.14,0.277,139.11,4.882667
37874,TRHZZKT128F425372C,Rowing,Soundgarden,0HjAux3k1rduZMGyCEpnau,"alternative_rock, grunge",0.515,0.624,-7.952,0,0.0423,0.0723,6.7e-05,0.113,0.472,115.95,5.1051
38374,TRJFSED128F42A0BCA,You & A Promise,Howie Day,0T4STwIhxP3Ji87xKDfDYN,"rock, alternative, singer_songwriter, acoustic...",0.562,0.684,-8.182,1,0.0285,0.0656,0.0321,0.111,0.471,89.026,6.414883
41290,TRGYPIW12903CDEC3F,Scary Kids Scaring Kids,Cap'n Jazz,62aNPbzpP3XP7ft2R0ADYI,"indie, punk, emo, screamo",0.387,0.649,-8.092,0,0.0296,0.0119,6e-06,0.117,0.299,109.587,4.815767
41546,TRJNUOP128F933196D,Nothin' Like Tomorrow,Supreme Beings of Leisure,4QQuGB9jW0JdyvP78dOH0D,"electronic, chillout, trip_hop, lounge",0.556,0.672,-8.125,0,0.146,0.186,0.000439,0.125,0.289,174.203,4.724
42829,TRJASCD128F147C8FB,An Aptly Fictional Description,The Number Twelve Looks Like You,2c0FZy21SMRu13s7jcDTYP,"hardcore, screamo, grindcore",0.378,0.723,-7.947,1,0.0645,0.0575,0.0167,0.209,0.418,107.347,5.540433
47716,TRPOXMV128F428965C,Mirror Remains,Bauhaus,0Btv4pmoFImcfL2opu4Ih4,post_punk,0.445,0.682,-8.123,1,0.0375,0.0142,0.0146,0.0923,0.475,80.392,4.969767


In [53]:
#Original Playlist
def get_ogp(song_features_data, rnn_track_ids):
    return song_features_data[song_features_data['track_id'].isin(rnn_track_ids)]

og_songs = get_ogp(song_features_data, rnn_track_ids)
og_songs.head(19)

Unnamed: 0,track_id,name,artist,spotify_id,tags,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins
43,TRNNGMK128F423F034,Where Is My Mind?,Pixies,0DJU6KGwdkxEPCy3BZ3UZY,"rock, alternative, indie, alternative_rock, in...",0.514,0.435,-13.517,1,0.0353,0.0078,0.000859,0.0802,0.212,82.104,3.834883
44,TRAALAH128E078234A,Bitter Sweet Symphony,The Verve,0jLnevC3Vn34qVWrAa4X6x,"rock, alternative, indie, pop, alternative_roc...",0.383,0.907,-5.409,1,0.0405,0.0313,0.0,0.347,0.514,171.052,5.972217
95,TRWQINA128F9339E57,Buddy Holly,Weezer,0gOyllwzM7IvfuYZ903zNv,"rock, alternative, indie, alternative_rock, in...",0.556,0.92,-4.606,1,0.0367,0.00271,0.00021,0.117,0.771,121.138,2.653767
401,TRSEHKD128E0782311,Coffee & TV,Blur,0HFBn4Grwd2AHZk3H1ntNo,"rock, alternative, indie, alternative_rock, in...",0.738,0.786,-9.07,1,0.0363,0.0483,0.294,0.101,0.718,121.609,5.978217
448,TRNXEPE128F9339E47,My Name Is Jonas,Weezer,0YU04WSkTVomRgeDOWlEzX,"rock, alternative, indie, alternative_rock, in...",0.261,0.947,-3.031,1,0.0488,0.000197,0.00332,0.31,0.55,185.942,3.435333
694,TRRKODA128F146DE22,Robot Rock,Daft Punk,0mqH3csuodko0zRL8WCuce,"rock, electronic, dance, house, techno, electr...",0.59,0.787,-5.766,1,0.0512,7e-06,0.845,0.0293,0.598,111.926,4.795333
842,TRUKZDD128F4281931,In the Backseat,Arcade Fire,1iC9x6brahv6MqMTs5rRTi,"rock, alternative, indie, female_vocalists, al...",0.329,0.538,-10.044,1,0.03,0.478,0.0864,0.0705,0.264,169.981,6.336883
1063,TRUMION12903CB6373,Cousins,Vampire Weekend,1p5fFes80K9IPnZFqNca53,"rock, alternative, indie, pop, indie_rock, ame...",0.499,0.809,-5.132,1,0.206,0.00123,0.0,0.0707,0.808,176.11,2.4211
1209,TRVQBRZ128F423EEDC,Hotel Yorba,The White Stripes,0FhrVQDIomjSy7pNXPEDlN,"rock, alternative, indie, alternative_rock, in...",0.383,0.785,-2.546,1,0.0878,0.482,0.0,0.0797,0.748,195.105,2.172217
1249,TRZPVLJ128F148D2F7,Rock And Roll All Nite,Kiss,03PtniQc7l5ew6PD6Ydejh,"rock, metal, classic_rock, hard_rock, 80s, 70s",0.624,0.857,-8.993,1,0.0595,0.00978,0.0,0.0829,0.938,144.881,2.792217


In [54]:
def trim_recs(rec_songs):
    rec_songs = rec_songs.loc[:, ['name', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
                                    'liveness', 'valence']]
    return rec_songs

rec_songs = trim_recs(rec_songs)
rec_songs.head(10)

Unnamed: 0,name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
2844,Madder Red,0.495,0.833,-8.123,0.0361,0.00938,0.0132,0.142,0.256
13467,Burndt Jamb,0.591,0.666,-8.062,0.0463,0.0012,0.000686,0.088,0.451
16836,Treacherous,0.705,0.621,-8.086,0.0334,0.101,4e-06,0.147,0.395
34517,Forever Walking Alone,0.514,0.676,-7.965,0.0309,0.0505,0.000784,0.14,0.277
37874,Rowing,0.515,0.624,-7.952,0.0423,0.0723,6.7e-05,0.113,0.472
38374,You & A Promise,0.562,0.684,-8.182,0.0285,0.0656,0.0321,0.111,0.471
41290,Scary Kids Scaring Kids,0.387,0.649,-8.092,0.0296,0.0119,6e-06,0.117,0.299
41546,Nothin' Like Tomorrow,0.556,0.672,-8.125,0.146,0.186,0.000439,0.125,0.289
42829,An Aptly Fictional Description,0.378,0.723,-7.947,0.0645,0.0575,0.0167,0.209,0.418
47716,Mirror Remains,0.445,0.682,-8.123,0.0375,0.0142,0.0146,0.0923,0.475


In [55]:
def pipeline_helper(data, p_vector, n, lyrics_e, song_data):
    frame = get_distances(data, p_vector)
    p_s_d = potential_songs(frame, n)
    r_e_d, _ = reduce_dims(lyrics_e)
    s_s_cs = calc_cosine(r_e_d, p_s_d)
    s_s_pd, _ = calc_pairwise(r_e_d, s_s_cs)
    rec = get_recs(song_data, s_s_pd)
    return trim_recs(rec) 

In [160]:
# def show_fig()
#     fig = go.Figure()

#     text_data = list(candidates.keys())
#     embeddings_3d = np.concatenate(list(candidates.values())).reshape(len(candidates), 3)

#     color_data = ['blue' if i < cutoff else 'red' for i in range(len(candidates))]
#     for i in closest_candidates_indices:
#         color_data[i] = 'green'
#     color_data[closest_candidates_indices[0]] = 'purple'

#     fig.add_trace(go.Scatter3d(
#         x=embeddings_3d[:, 0],
#         y=embeddings_3d[:, 1],
#         z=embeddings_3d[:, 2],
#         text=text_data,
#         mode='markers',
#         marker=dict(
#             size=5,
#             color=color_data,
#             colorscale='Viridis',
#             opacity=1
#         )
#     ))


#     fig.update_layout(
#         scene=dict(
#             xaxis=dict(title='x'),
#             yaxis=dict(title='y'),
#             zaxis=dict(title='z')
#         ),
#         width=1000,
#         height=800
#     )
#     fig.update_layout(legend_title_text = "Songs")

#     fig.show()


NameError: name 'candidates' is not defined

### Spotify API Evaluation
Get recommendations from Spotify by passing in minimum and maximum feature values, and compare the songs to our recommended playlist.


In [110]:
# from spotify import SpotifyAPI
# from dotenv import load_dotenv
# # Get the input playlist
# # Traverse along each song in playlist and preserve min max of feature values and 
# # Store in list that gets passed to parameters for Spotify APIcommendations

# input_playlist = og_songs
# features = dict()
# feature_names = ["danceability","energy","loudness","speechiness","acousticness","instrumentalness","liveness", "valence"]

# for feature in feature_names:
#     features["min_" + feature] = input_playlist[feature].min()
#     features["max_" + feature] = input_playlist[feature].max()

# features["seed_genres"] = input_playlist['tags'].iloc[0].split(',')[:3]
# features["seed_genres"] = ','.join(features["seed_genres"])
# features


{'min_danceability': 0.383,
 'max_danceability': 0.733,
 'min_energy': 0.187,
 'max_energy': 0.954,
 'min_loudness': -17.221,
 'max_loudness': -2.903,
 'min_speechiness': 0.03,
 'max_speechiness': 0.352,
 'min_acousticness': 0.000205,
 'max_acousticness': 0.923,
 'min_instrumentalness': 0.0,
 'max_instrumentalness': 0.894,
 'min_liveness': 0.0508,
 'max_liveness': 0.71,
 'min_valence': 0.038,
 'max_valence': 0.977,
 'seed_genres': 'rock, alternative, indie'}

In [111]:
# env_path = os.path.join('misc', '.env')
# load_dotenv(dotenv_path=env_path)
# SPOTIFY_CREDS = [os.getenv('SPOTIFY_CLIENT_ID'), 
#                 os.getenv('SPOTIFY_CLIENT_SECRET')]

# if not all(SPOTIFY_CREDS):
#         print("Please set the SPOTIFY_CLIENT_ID and SPOTIFY_CLIENT_SECRET environment variables")
#         exit(1)

# spotify = SpotifyAPI(*SPOTIFY_CREDS)
# SPOTIFY_CREDS

['1fa1ca9358074af48f6230e7b13c0737', '880a92c5194f496b929d0d4d96ec3a59']

In [112]:
# recs_from_spotify = spotify.get_recommendations(features=features, limit=10)

In [113]:
# # SPOTIFY's PREDICTED RECOMMENDATIONS
# recs_from_spotify = recs_from_spotify.loc[:, ['name', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
#                                 'liveness', 'valence']]
# recs_from_spotify

Unnamed: 0,name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
0,Ain't Talkin' 'Bout Love - 2015 Remaster,0.518,0.938,-5.964,0.0416,0.021,0.000305,0.288,0.71
1,Pressure,0.622,0.842,-3.678,0.0609,0.00342,0.00014,0.0575,0.724
2,The Zephyr Song,0.725,0.803,-4.083,0.0337,0.013,3.7e-05,0.0891,0.403
3,Reptilia,0.488,0.65,-5.11,0.0336,0.000597,0.714,0.101,0.77
4,Halo,0.421,0.823,-3.327,0.0446,0.00078,3.2e-05,0.204,0.155
5,Rebel Yell,0.531,0.864,-4.948,0.0611,0.000753,0.00046,0.354,0.485
6,Burnin' for You,0.532,0.728,-9.079,0.0352,0.106,0.0108,0.0849,0.69
7,Hellfire,0.529,0.847,-5.513,0.0482,0.244,0.0,0.205,0.69
8,Rag Doll,0.579,0.954,-3.947,0.0558,0.583,0.0,0.139,0.583
9,The Passenger,0.501,0.846,-8.098,0.0458,0.0796,0.0,0.241,0.739


In [114]:
# OUR PREDICTED RECOMMENDATIONS
# rec_songs

Unnamed: 0,name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
3072,The Days,0.59,0.595,-8.415,0.0471,0.0423,0.000151,0.097,0.587
4998,Reconstruction Site,0.59,0.549,-8.365,0.0338,0.234,7.7e-05,0.129,0.304
26660,Temples Of Gold,0.604,0.598,-8.452,0.0276,0.132,9e-06,0.103,0.345
29639,My Culture,0.621,0.834,-8.391,0.0824,0.107,5.9e-05,0.183,0.452
29811,Stupid,0.543,0.486,-8.383,0.0302,0.147,3e-06,0.0714,0.462
36552,Assim Assado,0.557,0.619,-8.431,0.0538,0.0218,0.0,0.0578,0.54
38374,You & A Promise,0.562,0.684,-8.182,0.0285,0.0656,0.0321,0.111,0.471
44381,Happier Times,0.513,0.612,-8.396,0.0295,0.312,0.158,0.119,0.485
46628,What Whorse You Wrote Id On,0.546,0.608,-8.483,0.0257,0.0106,0.0,0.0877,0.38
47026,(This Is) The Dream of Evan and Chan (Superpit...,0.617,0.722,-8.421,0.0308,0.0442,0.127,0.11,0.465


In [115]:
# def calculate_euclidean_distance(v1, v2):
#     return np.linalg.norm(v1 - v2)

# def calculate_score(pd1, pd2):
#     if pd1.shape[1] != pd2.shape[1]:
#         raise ValueError("Dataframes must have the same number of features.")
    
#     for _, x in pd1.iterrows():
#         fx = np.array(x[1:].values)
#         dist = list()
#         for _, y in pd2.iterrows():
#             fy = np.array(y[1:].values)
#             dist.append(calculate_euclidean_distance(fx, fy))
#         print(np.mean(dist))


In [116]:
# calculate_score(rec_songs, recs_from_spotify)

3.216405762942128
3.22111601121845
3.270662810756221
3.194585374275433
3.2188667168347473
3.2309139168002345
3.0407262432556807
3.2210300470317703
3.2886804440513684
3.2219872553590463
