# CSE 144 Group 3
## Music Recommendation System (MRS)

In this notebook, we write the predictive model for our music recommendation system. Our work leverages modern tools including recurrent neural networks (RNN) and BERT sentence transformers...

<br>

Our work leverages this RNN model:

https://github.com/taylorhawks/RNN-music-recommender/blob/master/cloud/model.ipynb


In [1]:
# import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
%config InlineBackend.figure_format="retina"
import numpy as np
import random
import torch
import os
# from torch import nn, optim
# import math
# from IPython import display
# import torchvision.datasets as datasets
# import torchvision.transforms as transforms
# from torch.utils.data import TensorDataset
# import torch.nn.functional as F
# from sklearn.preprocessing import MinMaxScaler
# import pdb
import plotly.graph_objects as go
import numpy as np

from skimage.util.shape import view_as_windows as viewW
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.decomposition import PCA

# import tensorflow as tf

# import keras.backend as K
from keras.models import Sequential, load_model
# from keras.optimizers import RMSprop
from keras.layers import Dense, SimpleRNN, Input
from keras.losses import *


### Load the data

In [2]:
song_features_data = pd.read_csv('misc/processed_music_info_extended.csv')
user_listening_data = pd.read_csv('misc/processed_user_listening_hist.csv')

# from google.colab import drive
# drive.mount('/content/drive')
# import pandas as pd
# song_features_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/music_info.csv')
# user_listening_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/user_listening_hist.csv')

### Set Random Seed

In [3]:
torch.manual_seed(24)

<torch._C.Generator at 0x1b658d84b90>

### Read and Display Data

In [4]:
print('# of rows of Song Data: ' + str(len(song_features_data)))
print('# of unique songs: ' + str(len(song_features_data['track_id'].unique())))
song_features_data.head()

# of rows of Song Data: 50683
# of unique songs: 50683


Unnamed: 0,track_id,name,artist,spotify_id,tags,year,duration_ms,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,TRIOREW128F424EAF0,Mr. Brightside,The Killers,09ZQ5TmUG8TSL56n0knqrj,"rock, alternative, indie, alternative_rock, in...",2004,222200,0.355,0.918,1,-4.36,1,0.0746,0.00119,0.0,0.0971,0.24,148.114,4
1,TRRIVDJ128F429B0E8,Wonderwall,Oasis,06UfBBDISthj1ZJAtX4xjj,"rock, alternative, indie, pop, alternative_roc...",2006,258613,0.409,0.892,2,-4.373,1,0.0336,0.000807,0.0,0.207,0.651,174.426,4
2,TROUVHL128F426C441,Come as You Are,Nirvana,0keNu0t0tqsWtExGM3nT1D,"rock, alternative, alternative_rock, 90s, grunge",1991,218920,0.508,0.826,4,-5.783,0,0.04,0.000175,0.000459,0.0878,0.543,120.012,4
3,TRUEIND128F93038C4,Take Me Out,Franz Ferdinand,0ancVQ9wEcHVd0RrGICTE4,"rock, alternative, indie, alternative_rock, in...",2004,237026,0.279,0.664,9,-8.851,1,0.0371,0.000389,0.000655,0.133,0.49,104.56,4
4,TRLNZBD128F935E4D8,Creep,Radiohead,01QoK9DA7VTeTSE3MNzp4I,"rock, alternative, indie, alternative_rock, in...",2008,238640,0.515,0.43,7,-9.935,1,0.0369,0.0102,0.000141,0.129,0.104,91.841,4


In [5]:
print('# of rows of User Listening Data: ' + str(len(user_listening_data)))
print('# of unique users: ' + str(len(user_listening_data['user_id'].unique())))
user_listening_data.head()

# of rows of User Listening Data: 806745
# of unique users: 25343


Unnamed: 0,track_id,user_id,playcount
0,TRLATHU128F92FC275,5a905f000fc1ff3df7ca807d57edb608863db05d,11
1,TRMKFPN128F42858C3,5a905f000fc1ff3df7ca807d57edb608863db05d,2
2,TRGAOLV128E0789D40,5a905f000fc1ff3df7ca807d57edb608863db05d,2
3,TREAQSX128E07818CA,5a905f000fc1ff3df7ca807d57edb608863db05d,2
4,TRUMDRI128F424FEFC,5a905f000fc1ff3df7ca807d57edb608863db05d,3


### Data Preprocessing


In [6]:
# Drop unnecessary columns
song_features_data = song_features_data.drop(columns=['year', 'time_signature', 'key'])

In [7]:
# Convert song duration from milliseconds to minutes
song_features_data["duration_mins"] = song_features_data["duration_ms"] / 60000
song_features_data.drop("duration_ms", axis=1, inplace=True)


song_features_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50683 entries, 0 to 50682
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   track_id          50683 non-null  object 
 1   name              50683 non-null  object 
 2   artist            50683 non-null  object 
 3   spotify_id        50683 non-null  object 
 4   tags              49556 non-null  object 
 5   danceability      50683 non-null  float64
 6   energy            50683 non-null  float64
 7   loudness          50683 non-null  float64
 8   mode              50683 non-null  int64  
 9   speechiness       50683 non-null  float64
 10  acousticness      50683 non-null  float64
 11  instrumentalness  50683 non-null  float64
 12  liveness          50683 non-null  float64
 13  valence           50683 non-null  float64
 14  tempo             50683 non-null  float64
 15  duration_mins     50683 non-null  float64
dtypes: float64(10), int64(1), object(5)
memo

In [8]:
data = pd.merge(song_features_data, user_listening_data, on='track_id')
data.head()

Unnamed: 0,track_id,name,artist,spotify_id,tags,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins,user_id,playcount
0,TRIOREW128F424EAF0,Mr. Brightside,The Killers,09ZQ5TmUG8TSL56n0knqrj,"rock, alternative, indie, alternative_rock, in...",0.355,0.918,-4.36,1,0.0746,0.00119,0.0,0.0971,0.24,148.114,3.703333,fe31db6d197a667d265ff5a35d80d60f3660f729,2
1,TRRIVDJ128F429B0E8,Wonderwall,Oasis,06UfBBDISthj1ZJAtX4xjj,"rock, alternative, indie, pop, alternative_roc...",0.409,0.892,-4.373,1,0.0336,0.000807,0.0,0.207,0.651,174.426,4.310217,67874d1a189c83326c529e554be6f7acf55effae,12
2,TRRIVDJ128F429B0E8,Wonderwall,Oasis,06UfBBDISthj1ZJAtX4xjj,"rock, alternative, indie, pop, alternative_roc...",0.409,0.892,-4.373,1,0.0336,0.000807,0.0,0.207,0.651,174.426,4.310217,e3ee8846c9a5a0916700a9e7abfc1c5b2fcb8e36,5
3,TRRIVDJ128F429B0E8,Wonderwall,Oasis,06UfBBDISthj1ZJAtX4xjj,"rock, alternative, indie, pop, alternative_roc...",0.409,0.892,-4.373,1,0.0336,0.000807,0.0,0.207,0.651,174.426,4.310217,cbb6b8dccf0af0d221dfd4684072c04bb0346f30,2
4,TRRIVDJ128F429B0E8,Wonderwall,Oasis,06UfBBDISthj1ZJAtX4xjj,"rock, alternative, indie, pop, alternative_roc...",0.409,0.892,-4.373,1,0.0336,0.000807,0.0,0.207,0.651,174.426,4.310217,2cdf67cd70a64964cb914835af0043fcc28a8f48,12


### Obtain total number of listens per song

In [9]:
play_counts = data.groupby('name')['playcount'].sum().reset_index()
play_counts

Unnamed: 0,name,playcount
0,#1 Zero,13
1,#16,110
2,#17,7
3,#24,5
4,$20 for Boban,43
...,...,...
23579,慟哭と去りぬ,134
23580,我、闇とて･･･,7
23581,朔-saku-,51
23582,蜷局,368


### Create playlists for input to RNN

In [10]:
data = data.sort_values(['user_id'])
data

Unnamed: 0,track_id,name,artist,spotify_id,tags,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins,user_id,playcount
306346,TRTVXIH128F426625A,Come Round Soon,Sara Bareilles,0jkVXytWSisMUtrBEej9mi,"pop, female_vocalists, singer_songwriter, soul...",0.338,0.819,-4.495,0,0.0776,0.077700,0.000000,0.1590,0.545,74.751,3.552000,0000f88f8d76a238c251450913b0d070e4a77d19,2
417455,TRWUFEW128F14782F3,Forever My Friend,Ray LaMontagne,0Ev7atdl0qS2n39OO7051O,"folk, singer_songwriter, soul, blues, acoustic...",0.493,0.524,-13.553,1,0.0423,0.334000,0.014100,0.3570,0.379,176.233,5.788883,0000f88f8d76a238c251450913b0d070e4a77d19,2
32466,TRNXEPE128F9339E47,My Name Is Jonas,Weezer,0YU04WSkTVomRgeDOWlEzX,"rock, alternative, indie, alternative_rock, in...",0.261,0.947,-3.031,1,0.0488,0.000197,0.003320,0.3100,0.550,185.942,3.435333,0000f88f8d76a238c251450913b0d070e4a77d19,2
698954,TRMKCCV128F92EB22E,Light On,David Cook,1BnoZbPDh9dbYqabvM6qZg,"rock, alternative_rock, male_vocalists",0.448,0.830,-4.156,0,0.0332,0.067300,0.000000,0.1130,0.362,131.991,3.816883,0000f88f8d76a238c251450913b0d070e4a77d19,3
227171,TRJGJTH128F4291A81,"Oh My God, Whatever, Etc.",Ryan Adams,0sUzPqm1gdsabzX5htMvf7,"rock, indie, folk, singer_songwriter, acoustic...",0.572,0.395,-10.630,1,0.0304,0.700000,0.000250,0.1260,0.483,79.552,2.532667,0000f88f8d76a238c251450913b0d070e4a77d19,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
802077,TRSEFCM128F429354D,Set It Up,Xavier Rudd,1sF9FiOQivhgedQnS1j3fK,"acoustic, 00s",0.469,0.385,-11.300,0,0.0270,0.503000,0.001390,0.1150,0.116,130.767,4.141550,fffbab4b8416fc41d05fcbdcf0e6735c4f37cb39,2
417463,TRWUFEW128F14782F3,Forever My Friend,Ray LaMontagne,0Ev7atdl0qS2n39OO7051O,"folk, singer_songwriter, soul, blues, acoustic...",0.493,0.524,-13.553,1,0.0423,0.334000,0.014100,0.3570,0.379,176.233,5.788883,fffbab4b8416fc41d05fcbdcf0e6735c4f37cb39,8
649627,TRDVGIH128F429353C,Come Let Go,Xavier Rudd,258CEuV9zzGk2PraoCH2yx,"reggae, male_vocalists",0.547,0.546,-8.634,1,0.0470,0.114000,0.037200,0.3810,0.280,140.477,6.870433,fffbab4b8416fc41d05fcbdcf0e6735c4f37cb39,28
553208,TROXFVJ128F1465265,Bottom Of the Barrel,Amos Lee,1VWGfrhpY8IiNmqMHavRXS,"folk, soul, acoustic, guitar",0.609,0.346,-12.703,1,0.1460,0.761000,0.000000,0.1100,0.550,178.137,2.006433,fffbab4b8416fc41d05fcbdcf0e6735c4f37cb39,4


In [11]:
# Changed name to track_id
playlists = data.groupby('user_id')['track_id'].apply(lambda x: list(x.head(20)))
playlist_dict = playlists.to_dict()
print(playlists)

user_id
0000f88f8d76a238c251450913b0d070e4a77d19    [TRTVXIH128F426625A, TRWUFEW128F14782F3, TRNXE...
0005eb11fd1dad47e6e6719a4db30340073a9e38    [TRGOJNK128F92F2A03, TRQPSHM128F92F29ED, TRTUW...
000d80cd9b58a8f77b33aa613dcfc5cbf1daf5e8    [TRDYYKS128F4275626, TRBHLYP12903D0D107, TRABF...
000e9296161b73a1821aaed3d7f50d95e8665bf6    [TROPEIV128F428F5A8, TRIAZQY128F934D58D, TRMKA...
00100482b3f3074549c751e718c57ed211b35991    [TRSNCIW128F14557BC, TRJKPFL12903CCE490, TRWJN...
                                                                  ...                        
fff7352d8ca192c451ce4fa00d18e33e261ecad3    [TRDRVJA128F4267831, TRCKWGF12903CD2DCD, TRXUW...
fff759a45a3a68de552740e8285a97d5f65d4e58    [TRDJZFF128F92D2627, TRULONW128F9302209, TRBNY...
fff9bd021bf6e07936883b9bb045207fcf372a2c    [TROHXCJ128F935A6AC, TRUMJNK12903CF465A, TRXYM...
fffb0b218640d86e5cb99d41cd3ecad977142da5    [TRZGGHL12903CDBF1F, TRCAUIX128F4277AD0, TRYIK...
fffbab4b8416fc41d05fcbdcf0e6735c4f37cb39    [TRGPCUN

In [12]:
# Changed track_id to name
data_dict = data.drop(['artist', 'tags', 'playcount'], axis=1)
# Changed name to track_id
data_dict = data_dict.set_index(['user_id', 'track_id']).to_dict('index')

In [13]:
songs_done = 0
updated_playlist_dict = {}
for user_id, songs in playlist_dict.items():
    updated_songs = []
    for song in songs:
        key = (user_id, song)
        if key in data_dict:
            the_features = list(data_dict[key].values())
            updated_songs.append([song] + the_features)
            songs_done += 1
            if songs_done % 10000 == 0:
                print(songs_done)
    updated_playlist_dict[user_id] = updated_songs

playlist_dict = updated_playlist_dict

10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
290000
300000
310000
320000
330000
340000
350000
360000
370000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
480000
490000
500000


In [14]:
arr = []
for user_id, playlist in playlist_dict.items():
    arr2 = []
    for song in playlist:
        arr2.append(np.concatenate((song[0:6], song[7:12])))
    arr.append(arr2)

arr_np = np.array(arr)

In [15]:
playlists = pd.DataFrame.from_dict(playlist_dict, orient='index')
playlists.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0000f88f8d76a238c251450913b0d070e4a77d19,"[TRTVXIH128F426625A, Come Round Soon, 0jkVXytW...","[TRWUFEW128F14782F3, Forever My Friend, 0Ev7at...","[TRNXEPE128F9339E47, My Name Is Jonas, 0YU04WS...","[TRMKCCV128F92EB22E, Light On, 1BnoZbPDh9dbYqa...","[TRJGJTH128F4291A81, Oh My God, Whatever, Etc....","[TRFQYFT128F14840BC, Nobody Girl, 2YqjAMK5eeSk...","[TRKSXHR128F1455E4D, Dear Chicago, 2J8P81JjKem...","[TRGJTIY128F4296A0E, All You Need Is Love, 0BW...","[TRZLJOC128F14840BE, Enemy Fire, 13gnIRFWtBQN1...","[TRSMEUG128F14856D2, Within You, 0VRC5T7fDBY1S...","[TRZYESA128F148D67F, Please Do Not Let Me Go, ...","[TRRUNEV128F148D719, Burning Photographs, 2Mco...","[TRDKRLP128F4291A80, Halloweenhead, 04N3X9vfSz...","[TRFTUIW128E0784B9F, Bubble Toes, 1CFwwYZ58s34...","[TRKGCIA128F92C315D, Joe's Head, 0A2BgEzGWU9HB...","[TRFUCYR128F92DC67F, California Waiting, 0txCP...","[TROZZNY128F14782F7, All the Wild Horses, 0FFm...","[TRTWOCA128F14840B8, La Cienega Just Smiled, 0...","[TRQSEMJ128F4294F24, Pearls On A String, 02WVv...","[TRUNKTP12903CD1EFB, Blue Sky, 08SPbOlgCODbnWE..."
0005eb11fd1dad47e6e6719a4db30340073a9e38,"[TRGOJNK128F92F2A03, The Technicolor Phase, 27...","[TRQPSHM128F92F29ED, The Airway, 3Cy5wM1kAWdQ3...","[TRTUWMO128F92F2A09, Dear Vienna, 2LBdBoz94BqE...","[TRRNWAK128F92F29FB, Super Honeymoon, 0aMWS9ld...","[TRYEGSH12903CD2DCE, Overboard, 0cfsbkanGUO3yz...","[TRCKWGF12903CD2DCD, Never Let You Go, 7mP4fGw...","[TRNFVQI128F931BAEA, The Saltwater Room, 1eX8F...","[TRTKLFX12903CD2DC2, First Dance, 0OQuXXwwYt2j...","[TRPGPDK12903CCC651, Bring Me To Life, 0rJ8HF2...","[TRJDMHS128F92F2A0C, I'll Meet You There, 2XGM...","[TRLVQME128F931BAF3, Vanilla Twilight, 0hXBVbr...","[TRUGOGT128F92F29E9, Captains and Cruise Ships...","[TRCXWLU128F92F2A0D, This Is The Future, 17jG9...","[TRRVJCK12903CD2DCB, U Smile, 0KDJBhhe2OYnnoJt...","[TRCJAHJ128E07815B6, Stacy's Mom, 0b5Z4MPCgSFm...","[TRPWIGO128F931BAEB, Dental Care, 1IyackM7hvB1...","[TRNEITZ128F92F29EA, Designer Skyline, 30KmLL3...","[TRMIHFS128F92F2A01, Early Birdie, 1TvtrJ6uyfQ...","[TRRLGDR128F933A7C9, Injection, 0it4CBT8IGSbXv...","[TRLNFKN128F931BAF2, The Tip Of The Iceberg, 1..."
000d80cd9b58a8f77b33aa613dcfc5cbf1daf5e8,"[TRDYYKS128F4275626, Music Is Happiness, 5eWkK...","[TRBHLYP12903D0D107, 4X4, 21SudxOkg2z2LMBrghl7...","[TRABFDT12903CADD73, Up Up & Away, 0InFAWpnO2z...","[TRLNVSC12903CADD67, Simple As..., 04nE0pNbhPQ...","[TRKOCXI128F9316B54, Harmony One, 1BtLEUri7ROn...","[TRSEFCM128F429354D, Set It Up, 1sF9FiOQivhged...","[TRUWANM128F1485EE2, LDN, 016gjTKLZX8Sgaos4DRq...","[TRXKEMH128F423381D, Superfresh, 0mCoxFFYs0TRZ...","[TREMDON128F427C701, Crimewave (Crystal Castle...","[TRHPKWO128F92E01D5, The Lightning Strike, 1rE...","[TRPONOG128F4275608, The Adjustor, 6sC8fTO6Ja6...","[TRJGDTG128F421CE22, Lights & Music, 0FezhHZVm...","[TROTYPC128E07940AB, Door Peep, 0ceGoYvdbcsRll...","[TRPXIWX128F429831F, One Minute to Midnight, 0...","[TROINZB128F932F740, Crazy in Love, 0klMKiGV38...","[TROUAEG128F429354A, Message Stick, 0jFN4WAx76...","[TRQEBRP12903CADD6C, Sky Might Fall, 2Pq2jkcG8...","[TROTWMO128F42B9238, Iconography, 04gW4W5ziYM3...","[TRJYECB128F4230F29, Second Chances, 1WPZR8Kf1...","[TRJLGXB128F93043EA, Colourful, 21rILkLpA1vsYZ..."
000e9296161b73a1821aaed3d7f50d95e8665bf6,"[TROPEIV128F428F5A8, Fatal, 5HeBXKvt8Kc9wY7rrk...","[TRIAZQY128F934D58D, El Pueblo Unido, 6M3ONz42...","[TRMKAZB128F92F2F3E, Can't Keep, 08SE6CEP3gjL9...","[TRPHDFT128F92C5A75, So Com Voce, 1f0V4eqYAmy1...","[TRNXBBR128F425ECE3, We Came Along This Road, ...","[TRKPWGR128E078EE06, Where Did You Sleep Last ...","[TRLPOFY128F425ECE8, Darker With the Day, 1PKj...","[TRCHYZB128F425ECE1, The Sorrowful Wife, 3DFrC...","[TRXEAZB128E078EDCE, Something In The Way, 7hh...","[TRFVSOZ128F4281933, I'm Sleeping in a Submari...","[TRDMUWU128E078EDDB, Dumb, 13noTim30TG19L0rg9f...","[TRDRFVY128F4281937, Headlights Look Like Diam...","[TRIPLBA128F427200F, My Moon My Man, 0Bl1KVabX...","[TRJSAID128F934D596, Beautiful Drug, 50t5tH0xK...","[TRMYAYJ128F934D0AF, Until the Morning, 20F3Fc...","[TRWGIOT128F425ECDE, Sweetheart Come, 0pcV8SPE...","[TRLRCIA128F425ECD7, Fifteen Feet of Pure Whit...","[TRIAGDA128F4296176, Recycled Air, 0k0UEpGDB2x...","[TRIDPWO128F423DBC6, Faust Arp, 5SdmtFbNOD7Qej...","[TRPFLRB128F14A895D, No Cars Go, 0nev4XL4Y6hrD..."
00100482b3f3074549c751e718c57ed211b35991,"[TRSNCIW128F14557BC, Col, 4XZ9hQzKr4hUf2IRzwqx...","[TRJKPFL12903CCE490, A Well Deserved Break, 2t...","[TRWJNEC128E079654F, Part of the Process, 06yd...","[TRACWHF128F14557BB, Enjoy The Wait, 03n4bUnpU...","[TRAZCMI128F14557B9, Howling, 2PFP9QAfVE4cmPuS...","[TRUEXGL128F14557BD, Who Can You Trust?, 3e2UE...","[TREECSZ128F14557BE, Almost Done, 13ccsvjo5S9Q...","[TRUAJOJ128F14557B6, Post Houmous, 0BrSfR3QBDY...","[TRASVEM128E0796553, Trigger Hippie, 0oWC3y01a...","[TROXRVT128E079650A, Aqualung, 0NQEKTasUwXVu03...","[TRZJHGG128E079655A, Never An Easy Way, 1c1KOD...","[TRIXKKQ12903CCE495, Coming Down Gently, 1THrj...","[TRORPWW12903CCE48E, Love Is Rare, 07ZOef7Bqy9...","[TRYIASQ128E079650E, Undress Me Now, 2cZu9PrRr...","[TRDNHAW128F429DB9A, The Ballad of Michael Val...","[TRXYEKR128E079654C, Otherwise, 0NTSwjegwCGXjm...","[TRHZMPR128F42A52CB, Challengers, 33ZcFxD1Ohwj...","[TRXZMLY128E0796512, Public Displays of Affect...","[TRJSQQT128F149F9B4, Street Justice, 0lJRL3H6x...","[TRXCZNS128F428A15E, Next To You, 0rUmVbfsJQzW..."


### Train and Test Split

In [16]:
# Train and test splits for playlist
X = arr_np[:,:-1,:]
Y = arr_np[:,1:,:]
x_train, x_val, y_train, y_val = train_test_split(X,Y,train_size=0.9,random_state=3000)
x_train, x_test, y_train, y_test = train_test_split(x_train,y_train,train_size=0.9,random_state=3000)


In [17]:
spotify_data_x = x_test[-10:, 1:6, :]
spotify_data_y = y_test[-10:, :5, :]


In [18]:
# Original Playlists
ops_x_train, ops_y_train, ops_x_val, ops_y_val, ops_x_test, ops_y_test = [], [], [], [], [], []

# This only works based on size if val and test sets switch in size switch them in these loops
for user in range(np.ma.size(x_train, axis=0)):
    names_x_train, names_y_train, names_x_val, names_y_val, names_x_test, names_y_test = [], [], [], [], [], []
    for song in range(np.ma.size(x_train, axis=1)):
        names_x_train.append(x_train[user, song, 0:3])
        names_y_train.append(y_train[user, song, 0:3])
        try:
            names_x_val.append(x_val[user, song, 0:3])
            names_y_val.append(y_val[user, song, 0:3])
        except IndexError:
            continue
        try:
            names_x_test.append(x_test[user, song, 0:3])
            names_y_test.append(y_test[user, song, 0:3])
        except IndexError:
            continue

    ops_x_train.append(names_x_train)
    ops_y_train.append(names_y_train)
    if not names_x_val:
        continue
    ops_x_val.append(names_x_val)
    ops_y_val.append(names_y_val)
    if not names_x_test:
        continue
    ops_x_test.append(names_x_test)
    ops_y_test.append(names_y_test)
x_train = x_train[:, :, 3:].astype(np.float64)
y_train = y_train[:, :, 3:].astype(np.float64)
x_val = x_val[:, :, 3:].astype(np.float64)
y_val = y_val[:, :, 3:].astype(np.float64)
x_test = x_test[:, :, 3:].astype(np.float64)
y_test = y_test[:, :, 3:].astype(np.float64)

### Define the Model

In [19]:
if os.path.exists('misc/mae_optimized_model.keras'):
    print("using saved model")
    model = load_model('misc/mae_optimized_model.keras')
else:
    print("training model")
    model = Sequential()
    model.add(Input(shape=(None,8)))
    model.add(SimpleRNN(
        16,
        activation='linear',
        return_sequences=True,
        kernel_initializer='random_uniform',
    ))
    model.add(SimpleRNN(
        16,
        activation='linear',
        return_sequences=True,
        kernel_initializer='random_uniform',
    ))
    model.add(Dense(8, activation='linear', kernel_initializer='random_uniform',))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(torch.cuda.get_device_name(0))

    
    model.compile(loss='mae', optimizer='adam')
    model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val))
    model.save('misc/mae_optimized_model.keras')

using saved model


In [20]:
mae_optimized_model_adam = model

In [21]:
def predict_sample(sample,model):
    return (model.predict(np.array([sample]))[0,-1])

In [22]:
np.save("misc/x_test", x_test)
np.save("misc/y_test", y_test)

### Run RNN

In [23]:
print('Selecting a random index in our test dataset: ')
random_index = random.randint(0,len(x_test)-1)
print(random_index)

print('Input: ')
print(x_test[random_index])

print('\n','Output: ')
predicted = predict_sample(x_test[random_index], mae_optimized_model_adam)
print(predicted)

Selecting a random index in our test dataset: 
128
Input: 
[[ 4.6700e-01  7.3400e-01 -7.6070e+00  3.3300e-02  9.1700e-04  8.8600e-03
   2.5700e-01  3.0600e-01]
 [ 5.9700e-01  9.7400e-01 -4.5260e+00  7.7600e-02  6.9600e-03  0.0000e+00
   7.8600e-02  8.4500e-01]
 [ 4.1300e-01  8.7300e-01 -4.1510e+00  5.3600e-02  2.6400e-04  2.4900e-03
   2.0400e-01  4.8000e-01]
 [ 5.9000e-01  7.7400e-01 -7.7740e+00  2.9500e-02  3.6000e-01  4.8500e-02
   9.5600e-02  1.8600e-01]
 [ 6.3800e-01  7.9000e-01 -7.6760e+00  4.8700e-02  3.4200e-02  5.2700e-04
   6.4000e-02  9.0500e-01]
 [ 9.0200e-01  6.0100e-01 -6.1180e+00  5.4000e-02  1.5100e-01  4.7300e-04
   8.5100e-02  9.0500e-01]
 [ 2.9600e-01  9.6100e-01 -6.3730e+00  1.9900e-01  7.5700e-05  1.2800e-06
   8.9500e-02  3.8100e-01]
 [ 5.2800e-01  3.9700e-01 -1.2024e+01  9.2600e-02  5.6200e-03  3.6400e-06
   4.3000e-01  5.7900e-01]
 [ 4.4400e-01  3.5700e-01 -2.1436e+01  6.3200e-02  1.3600e-03  1.5700e-03
   8.3200e-02  7.8800e-01]
 [ 2.7700e-01  9.9000e-01 -4.824

In [24]:
print(type(x_test[random_index]))

<class 'numpy.ndarray'>


In [24]:
distance_frame = song_features_data.drop(['artist','tags','tempo','duration_mins','mode'], axis=1)
distance_frame.head()

Unnamed: 0,track_id,name,spotify_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
0,TRIOREW128F424EAF0,Mr. Brightside,09ZQ5TmUG8TSL56n0knqrj,0.355,0.918,-4.36,0.0746,0.00119,0.0,0.0971,0.24
1,TRRIVDJ128F429B0E8,Wonderwall,06UfBBDISthj1ZJAtX4xjj,0.409,0.892,-4.373,0.0336,0.000807,0.0,0.207,0.651
2,TROUVHL128F426C441,Come as You Are,0keNu0t0tqsWtExGM3nT1D,0.508,0.826,-5.783,0.04,0.000175,0.000459,0.0878,0.543
3,TRUEIND128F93038C4,Take Me Out,0ancVQ9wEcHVd0RrGICTE4,0.279,0.664,-8.851,0.0371,0.000389,0.000655,0.133,0.49
4,TRLNZBD128F935E4D8,Creep,01QoK9DA7VTeTSE3MNzp4I,0.515,0.43,-9.935,0.0369,0.0102,0.000141,0.129,0.104


In [25]:
distance_frame.drop_duplicates(subset='track_id', keep='first', inplace=True)
distance_frame.track_id.nunique()

50683

In [79]:
distance_frame.to_csv("misc/distance_frame.csv")

In [42]:
def get_distances(data, p_vector):
    names = data['name']
    distance_frame = data.drop(['name', 'spotify_id'], axis=1)
    distance_dict = distance_frame.set_index(['track_id']).to_dict('index')
    for key in distance_dict:
        distance_dict[key] = list(distance_dict[key].values())
    distance_dict = distance_calc(distance_dict, p_vector, names)
    return pd.DataFrame.from_dict(distance_dict, orient='index', columns=['id', 'distance'])

def distance_calc(dict, v1, name_list):
    distances = {}
    i = 0
    name_list = name_list.to_list()
    for id in dict.keys():
        v2 = dict[id]
        value = 0.0
        for n in range(len(v1)):
            value += np.linalg.norm(v1[n] - v2[n])
        distances[name_list[i]] = (id, value)
        i += 1
    return distances

distance_frame2 = get_distances(distance_frame, predicted)


In [48]:
POTENTIAL_N = 50 #defines size
def potential_songs(frame, n):
    return frame.nsmallest(n, columns='distance', keep='all')

potential_songs_data = potential_songs(distance_frame2, POTENTIAL_N)

In [49]:
lyrics_embeddings_csv = pd.read_csv('misc/lyrics_embeddings.csv')
lyrics_embeddings_3d_csv = pd.read_csv('misc/lyrics_embeddings_3d.csv')

In [81]:
def get_embeddings(frame, frame3D):
    lyrics_embeddings = dict()
    lyrics_embeddings_3d  = dict()
    for _, row in frame.iterrows():
        lyrics_embeddings[row[0]] = np.array(row[1:])

    for _, row in frame3D.iterrows():
        lyrics_embeddings_3d[row[0]] = np.array(row[1:])

    return lyrics_embeddings, lyrics_embeddings_3d

lyrics_embeddings, lyrics_embeddings_3d = get_embeddings(lyrics_embeddings_csv, lyrics_embeddings_3d_csv) 



Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



In [52]:
def get_candidates(original_playlist, index, p_songs):
    candidates = dict()
    rnn_track_ids = []
    for track in original_playlist[index]:
        rnn_track_ids.append(track[0])
        candidates[track[0]] = lyrics_embeddings_3d[track[0]]

    cutoff = len(candidates)

    for _, row in p_songs.head(100).iterrows():
        candidates[row['id']] = lyrics_embeddings_3d[row['id']]

    print(len(candidates))

    return candidates, rnn_track_ids, cutoff

candidates, rnn_track_ids, cutoff = get_candidates(ops_x_test, random_index, potential_songs_data)

69


In [53]:
# For reducing dimensions of the embeddings
def reduce_dims(lyrics_embeddings):
    raw_embeddings = np.concatenate(list(lyrics_embeddings.values())).reshape(len(lyrics_embeddings), 768)
    track_ids = list(lyrics_embeddings.keys())
    dim_model = PCA(n_components=150, random_state=42)
    dim_model.fit(raw_embeddings)
    reduced_embeddings = dim_model.transform(raw_embeddings)
    reduced_embeddings_dict = {track_ids[i]: reduced_embeddings[i] for i in range(len(track_ids))}

    og_embeddings = np.array([reduced_embeddings_dict[track_id] for track_id in rnn_track_ids])

    return reduced_embeddings_dict, og_embeddings

reduced_embeddings_dict, og_embeddings = reduce_dims(lyrics_embeddings)

At this stage, we must compare the embeddings in the predicted list against those in the original input list and find the best candidates
### Cosine Similarity

In [58]:
def calc_cosine(reduced_embeddings_dict, potential_songs_data):
    similarities = list()

    for track_id in potential_songs_data['id']:

        candidate_embedding = reduced_embeddings_dict[track_id].reshape(1, -1)
        similarity = cosine_similarity(candidate_embedding, og_embeddings)
        similarities.append(np.mean(similarity))

    similarities = np.array(similarities)
    most_similar_indices = np.argsort(similarities)[::-1]
    selected_songs_cs = potential_songs_data.iloc[most_similar_indices[:10]]
    return selected_songs_cs

selected_songs_cs = calc_cosine(reduced_embeddings_dict, potential_songs_data)

10


### Pairwise Distances

In [66]:
def calc_pairwise(reduced_embeddings_dict, selected_songs_cs):
    candidate_embeddings = np.array([reduced_embeddings_dict[track_id] for track_id in selected_songs_cs['id']])

    distances = pairwise_distances(candidate_embeddings, og_embeddings, metric='euclidean')
    mean_distances = np.mean(distances, axis=1)
    closest_candidates_indices = np.argsort(mean_distances)[:10]
    selected_songs_pd = selected_songs_cs.iloc[closest_candidates_indices]

    print(closest_candidates_indices)
    return selected_songs_pd, closest_candidates_indices

selected_songs_pd, closest_candidates_indices = calc_pairwise(reduced_embeddings_dict, selected_songs_cs)
selected_songs_pd

[5 9 4 8 3 1 0 2 7 6]


Unnamed: 0,id,distance
Trance-Former,TRJXOTB128F4275351,0.277169
OK,TRCPZFE128EF34A94B,0.288674
Suicide on My Mind,TRVIYGB128F42464E9,0.217319
Hooked,TRGFXCY128F42A6AD7,0.265555
Tongue Tied,TRZBMMA128F42872D2,0.291028
War Inside My Head,TRISTCR128F428FEFB,0.263776
Prezent,TRERUNO128F427B749,0.301072
Suppose,TREIVOG128F92D5331,0.308889
Red Dirt Road,TROXWBT128F9305E67,0.254445
TRUST,TRHNIIF128F42884A7,0.274137


In [68]:
#Predicted Data
def get_recs(song_features_data, selected_songs_pd):
    return song_features_data[song_features_data['track_id'].isin(selected_songs_pd['id'])]

rec_songs = get_recs(song_features_data, selected_songs_pd)
rec_songs.head(10)

Unnamed: 0,track_id,name,artist,spotify_id,tags,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins
886,TRZBMMA128F42872D2,Tongue Tied,Grouplove,060CTIdsvB1FCDrYSougHk,"rock, electronic, alternative, indie, pop, ind...",0.56,0.936,-5.835,1,0.0439,0.00847,0.0,0.161,0.371,112.96,3.63355
13800,TRCPZFE128EF34A94B,OK,Farin Urlaub,2Kjm6WHTwilOTKKLm50PrJ,"rock, punk, punk_rock, german",0.623,0.748,-5.888,1,0.0633,0.00998,0.014,0.145,0.452,94.478,4.3211
18104,TRISTCR128F428FEFB,War Inside My Head,Dream Theater,0Q1YgE1Woe6PpWHseZqxFI,"progressive_rock, progressive_metal",0.478,0.938,-5.816,0,0.118,7.8e-05,9.8e-05,0.138,0.431,101.421,2.133333
25790,TRHNIIF128F42884A7,TRUST,L'Arc~en~Ciel,5ROfpAhDOiDkyn17SNDtAG,japanese,0.507,0.895,-5.892,1,0.0432,0.00216,0.0026,0.0745,0.393,95.965,4.484433
33033,TROXWBT128F9305E67,Red Dirt Road,Brooks & Dunn,0VfvJTraX71EEuGT3IRBaf,country,0.55,0.819,-5.745,1,0.0402,0.0622,0.000291,0.114,0.453,136.127,4.36955
37517,TRVIYGB128F42464E9,Suicide on My Mind,Angtoria,4AOl7tgOtRfn8nzEUtQavT,"gothic_metal, symphonic_metal",0.467,0.832,-5.762,0,0.0331,0.00466,0.00325,0.175,0.423,90.003,3.857333
43822,TREIVOG128F92D5331,Suppose,Secondhand Serenade,12J6BLv1QlN7fSFLEUytJg,"rock, alternative, indie, acoustic, emo, beaut...",0.519,0.788,-5.751,0,0.0465,0.014,0.0,0.0877,0.374,152.99,3.7791
44116,TRERUNO128F427B749,Prezent,Kombajn Do Zbierania Kur Po Wioskach,09xnX6V0QYGkPZR2vKsSLR,"alternative_rock, polish",0.456,0.84,-5.77,1,0.0347,0.002,0.0187,0.0984,0.362,148.129,3.806517
47701,TRGFXCY128F42A6AD7,Hooked,Seabound,0iSGwIjRnrEPOBq0OykeLi,"electronic, synthpop",0.618,0.875,-5.809,0,0.0595,0.00185,0.0144,0.101,0.447,133.003,4.948217
48829,TRJXOTB128F4275351,Trance-Former,Dope Stars Inc.,5BHpgmP3soBN4SeeGxd8dw,industrial,0.494,0.874,-5.821,0,0.0918,6.3e-05,0.00282,0.0777,0.364,164.995,4.177767


In [70]:
#Original Playlist
def get_ogp(song_features_data, rnn_track_ids):
    return song_features_data[song_features_data['track_id'].isin(rnn_track_ids)]

og_songs = get_ogp(song_features_data, rnn_track_ids)
og_songs.head(19)

Unnamed: 0,track_id,name,artist,spotify_id,tags,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins
44,TRAALAH128E078234A,Bitter Sweet Symphony,The Verve,0jLnevC3Vn34qVWrAa4X6x,"rock, alternative, indie, pop, alternative_roc...",0.383,0.907,-5.409,1,0.0405,0.0313,0.0,0.347,0.514,171.052,5.972217
522,TRUOJJS128F145C256,A Rush of Blood to the Head,Coldplay,04ZAwhY76rVwALw4Xyhmkl,"rock, alternative, indie, pop, alternative_roc...",0.536,0.49,-7.176,1,0.0245,0.178,2e-06,0.255,0.136,138.331,5.832883
553,TRPRQXU128E0789D38,White Shadows,Coldplay,0WWz2AaqxLoO0fa9ou6Fqc,"rock, alternative, indie, pop, alternative_roc...",0.491,0.783,-8.997,0,0.0484,0.00521,0.0361,0.0951,0.26,127.437,5.470317
820,TRTGUWJ128F146EC54,Hands Open,Snow Patrol,0MBS2kSgIyisxYQSgC8BlO,"rock, alternative, indie, alternative_rock, in...",0.518,0.969,-2.053,1,0.0489,0.000301,7e-06,0.135,0.564,123.993,3.260667
2498,TRMYZFA128F146DE4E,Rollin' & Scratchin',Daft Punk,3Wi8fiEg2RkoPaC3PGIQEv,"electronic, dance, 90s, house, techno, electro...",0.823,0.787,-4.862,1,0.0417,0.0011,0.639,0.0524,0.207,130.038,7.4771
3844,TRULOVL128F92DCF53,If There's a Rocket Tie Me To It,Snow Patrol,172bN0rExrcjJ6eNZQY1PR,"rock, alternative, indie, alternative_rock, in...",0.452,0.695,-6.383,1,0.0313,0.0004,0.000444,0.229,0.0801,139.927,4.32755
6818,TRCQRWT128F92E494F,Desert Song,Edward Sharpe & The Magnetic Zeros,3Azd3uB3lOVls6aXjF2q6o,"indie, female_vocalists, experimental, folk, p...",0.48,0.722,-8.684,0,0.0356,0.664,0.259,0.366,0.324,118.879,4.504433
13142,TRIHPDV128F932B5DC,Long Forgotten Sons,Rise Against,112Kmd9k5UqZn1r2idDUhZ,"punk, hardcore, punk_rock",0.47,0.979,-3.312,0,0.0684,0.000127,0.00107,0.103,0.424,134.93,4.026
13749,TRZDDRC128F9346B9C,Alexithymia,Anberlin,37oN3JitYjpvTfWSIJJ9ia,"rock, alternative, alternative_rock, punk, emo",0.446,0.867,-4.886,0,0.0473,0.00592,0.0,0.171,0.723,153.975,3.371333
13903,TRYMFCL128F92E016D,The Resistance,Anberlin,1n1pmnMZQckWYCJMxwDGUZ,"rock, alternative, punk, metalcore, emo, post_...",0.457,0.933,-4.83,0,0.106,0.00172,0.0257,0.0523,0.556,192.01,3.282667


Unnamed: 0,track_id,name,artist,spotify_id,tags,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins
3072,TRAXJTE128F42563E1,The Days,Avicii,08nmBd9QzcaE0L9DoFzKRq,"electronic, dance, chillout, house, male_vocal...",0.59,0.595,-8.415,1,0.0471,0.0423,0.000151,0.097,0.587,126.998,4.607767
4998,TRXCYYZ128F426267C,Reconstruction Site,The Weakerthans,0aaLrvZs6YQGFP5dhg0Ysa,"alternative, indie_rock",0.59,0.549,-8.365,1,0.0338,0.234,7.7e-05,0.129,0.304,91.026,2.7451
26660,TRVXLHQ12903CFB25B,Temples Of Gold,Kamelot,1NzmukZZfJTeO6xacS8KKM,"progressive_metal, power_metal, symphonic_metal",0.604,0.598,-8.452,0,0.0276,0.132,9e-06,0.103,0.345,114.983,4.160433
29639,TRFPMJE128F4226ED6,My Culture,1 Giant Leap,2Daig2XMdbEMZKhA6SCqXq,"electronic, chillout, house, trip_hop, lounge,...",0.621,0.834,-8.391,1,0.0824,0.107,5.9e-05,0.183,0.452,92.487,5.653333
29811,TRXHEPX128F42AF0B3,Stupid,Sarah McLachlan,0AJ3G74BJAiysZ3khyv3kl,"pop, female_vocalists, singer_songwriter, pian...",0.543,0.486,-8.383,0,0.0302,0.147,3e-06,0.0714,0.462,155.083,3.398


In [78]:
def trim_recs(rec_songs):
    rec_songs = rec_songs.loc[:, ['name', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
                                    'liveness', 'valence']]
    return rec_songs

rec_songs = trim_recs(rec_songs)
rec_songs.head(10)

Unnamed: 0,name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
886,Tongue Tied,0.56,0.936,-5.835,0.0439,0.00847,0.0,0.161,0.371
13800,OK,0.623,0.748,-5.888,0.0633,0.00998,0.014,0.145,0.452
18104,War Inside My Head,0.478,0.938,-5.816,0.118,7.8e-05,9.8e-05,0.138,0.431
25790,TRUST,0.507,0.895,-5.892,0.0432,0.00216,0.0026,0.0745,0.393
33033,Red Dirt Road,0.55,0.819,-5.745,0.0402,0.0622,0.000291,0.114,0.453
37517,Suicide on My Mind,0.467,0.832,-5.762,0.0331,0.00466,0.00325,0.175,0.423
43822,Suppose,0.519,0.788,-5.751,0.0465,0.014,0.0,0.0877,0.374
44116,Prezent,0.456,0.84,-5.77,0.0347,0.002,0.0187,0.0984,0.362
47701,Hooked,0.618,0.875,-5.809,0.0595,0.00185,0.0144,0.101,0.447
48829,Trance-Former,0.494,0.874,-5.821,0.0918,6.3e-05,0.00282,0.0777,0.364


In [80]:
def pipeline_helper(data, p_vector, n, lyrics_embeddings):
    frame = get_distances(data, p_vector)
    potential_songs_data = potential_songs(frame, n)
    # candidates, rnn_track_ids, cutoff = get_candidates(original_playlist, index, potential_songs_data)
    reduced_embeddings_dict, _ = reduce_dims(lyrics_embeddings)
    selected_songs_cs = calc_cosine(reduced_embeddings_dict, potential_songs_data)
    selected_songs_pd, _ = calc_pairwise(reduced_embeddings_dict, selected_songs_cs)
    rec_songs = get_recs(song_features_data, selected_songs_pd)
    return trim_recs(rec_songs) 

In [72]:
fig = go.Figure()

text_data = list(candidates.keys())
embeddings_3d = np.concatenate(list(candidates.values())).reshape(len(candidates), 3)

color_data = ['blue' if i < cutoff else 'red' for i in range(len(candidates))]
for i in closest_candidates_indices:
    color_data[i] = 'green'
color_data[closest_candidates_indices[0]] = 'purple'

fig.add_trace(go.Scatter3d(
    x=embeddings_3d[:, 0],
    y=embeddings_3d[:, 1],
    z=embeddings_3d[:, 2],
    text=text_data,
    mode='markers',
    marker=dict(
        size=5,
        color=color_data,
        colorscale='Viridis',
        opacity=1
    )
))


fig.update_layout(
    scene=dict(
        xaxis=dict(title='x'),
        yaxis=dict(title='y'),
        zaxis=dict(title='z')
    ),
	width=1000,
    height=800
)
fig.update_layout(legend_title_text = "Songs")

fig.show()


### Spotify API Evaluation
Get recommendations from Spotify by passing in minimum and maximum feature values, and compare the songs to our recommended playlist.


In [110]:
# from spotify import SpotifyAPI
# from dotenv import load_dotenv
# # Get the input playlist
# # Traverse along each song in playlist and preserve min max of feature values and 
# # Store in list that gets passed to parameters for Spotify APIcommendations

# input_playlist = og_songs
# features = dict()
# feature_names = ["danceability","energy","loudness","speechiness","acousticness","instrumentalness","liveness", "valence"]

# for feature in feature_names:
#     features["min_" + feature] = input_playlist[feature].min()
#     features["max_" + feature] = input_playlist[feature].max()

# features["seed_genres"] = input_playlist['tags'].iloc[0].split(',')[:3]
# features["seed_genres"] = ','.join(features["seed_genres"])
# features


{'min_danceability': 0.383,
 'max_danceability': 0.733,
 'min_energy': 0.187,
 'max_energy': 0.954,
 'min_loudness': -17.221,
 'max_loudness': -2.903,
 'min_speechiness': 0.03,
 'max_speechiness': 0.352,
 'min_acousticness': 0.000205,
 'max_acousticness': 0.923,
 'min_instrumentalness': 0.0,
 'max_instrumentalness': 0.894,
 'min_liveness': 0.0508,
 'max_liveness': 0.71,
 'min_valence': 0.038,
 'max_valence': 0.977,
 'seed_genres': 'rock, alternative, indie'}

In [111]:
# env_path = os.path.join('misc', '.env')
# load_dotenv(dotenv_path=env_path)
# SPOTIFY_CREDS = [os.getenv('SPOTIFY_CLIENT_ID'), 
#                 os.getenv('SPOTIFY_CLIENT_SECRET')]

# if not all(SPOTIFY_CREDS):
#         print("Please set the SPOTIFY_CLIENT_ID and SPOTIFY_CLIENT_SECRET environment variables")
#         exit(1)

# spotify = SpotifyAPI(*SPOTIFY_CREDS)
# SPOTIFY_CREDS

['1fa1ca9358074af48f6230e7b13c0737', '880a92c5194f496b929d0d4d96ec3a59']

In [112]:
# recs_from_spotify = spotify.get_recommendations(features=features, limit=10)

In [113]:
# # SPOTIFY's PREDICTED RECOMMENDATIONS
# recs_from_spotify = recs_from_spotify.loc[:, ['name', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
#                                 'liveness', 'valence']]
# recs_from_spotify

Unnamed: 0,name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
0,Ain't Talkin' 'Bout Love - 2015 Remaster,0.518,0.938,-5.964,0.0416,0.021,0.000305,0.288,0.71
1,Pressure,0.622,0.842,-3.678,0.0609,0.00342,0.00014,0.0575,0.724
2,The Zephyr Song,0.725,0.803,-4.083,0.0337,0.013,3.7e-05,0.0891,0.403
3,Reptilia,0.488,0.65,-5.11,0.0336,0.000597,0.714,0.101,0.77
4,Halo,0.421,0.823,-3.327,0.0446,0.00078,3.2e-05,0.204,0.155
5,Rebel Yell,0.531,0.864,-4.948,0.0611,0.000753,0.00046,0.354,0.485
6,Burnin' for You,0.532,0.728,-9.079,0.0352,0.106,0.0108,0.0849,0.69
7,Hellfire,0.529,0.847,-5.513,0.0482,0.244,0.0,0.205,0.69
8,Rag Doll,0.579,0.954,-3.947,0.0558,0.583,0.0,0.139,0.583
9,The Passenger,0.501,0.846,-8.098,0.0458,0.0796,0.0,0.241,0.739


In [114]:
# OUR PREDICTED RECOMMENDATIONS
# rec_songs

Unnamed: 0,name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
3072,The Days,0.59,0.595,-8.415,0.0471,0.0423,0.000151,0.097,0.587
4998,Reconstruction Site,0.59,0.549,-8.365,0.0338,0.234,7.7e-05,0.129,0.304
26660,Temples Of Gold,0.604,0.598,-8.452,0.0276,0.132,9e-06,0.103,0.345
29639,My Culture,0.621,0.834,-8.391,0.0824,0.107,5.9e-05,0.183,0.452
29811,Stupid,0.543,0.486,-8.383,0.0302,0.147,3e-06,0.0714,0.462
36552,Assim Assado,0.557,0.619,-8.431,0.0538,0.0218,0.0,0.0578,0.54
38374,You & A Promise,0.562,0.684,-8.182,0.0285,0.0656,0.0321,0.111,0.471
44381,Happier Times,0.513,0.612,-8.396,0.0295,0.312,0.158,0.119,0.485
46628,What Whorse You Wrote Id On,0.546,0.608,-8.483,0.0257,0.0106,0.0,0.0877,0.38
47026,(This Is) The Dream of Evan and Chan (Superpit...,0.617,0.722,-8.421,0.0308,0.0442,0.127,0.11,0.465


In [115]:
# def calculate_euclidean_distance(v1, v2):
#     return np.linalg.norm(v1 - v2)

# def calculate_score(pd1, pd2):
#     if pd1.shape[1] != pd2.shape[1]:
#         raise ValueError("Dataframes must have the same number of features.")
    
#     for _, x in pd1.iterrows():
#         fx = np.array(x[1:].values)
#         dist = list()
#         for _, y in pd2.iterrows():
#             fy = np.array(y[1:].values)
#             dist.append(calculate_euclidean_distance(fx, fy))
#         print(np.mean(dist))


In [116]:
# calculate_score(rec_songs, recs_from_spotify)

3.216405762942128
3.22111601121845
3.270662810756221
3.194585374275433
3.2188667168347473
3.2309139168002345
3.0407262432556807
3.2210300470317703
3.2886804440513684
3.2219872553590463
