In [73]:
%matplotlib inline
%config InlineBackend.figure_format="retina"
import numpy as np
import pandas as pd
from keras.models import load_model
import torch
import os

In [74]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
%run mrs.ipynb

# of rows of Song Data: 50683
# of unique songs: 50683
# of rows of User Listening Data: 806745
# of unique users: 25343
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50683 entries, 0 to 50682
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   track_id          50683 non-null  object 
 1   name              50683 non-null  object 
 2   artist            50683 non-null  object 
 3   spotify_id        50683 non-null  object 
 4   tags              49556 non-null  object 
 5   danceability      50683 non-null  float64
 6   energy            50683 non-null  float64
 7   loudness          50683 non-null  float64
 8   mode              50683 non-null  int64  
 9   speechiness       50683 non-null  float64
 10  acousticness      50683 non-null  float64
 11  instrumentalness  50683 non-null  float64
 12  liveness          50683 non-null  float64
 13  valence           50683 non-null  float64
 14  tempo        

In [61]:
#Note that the bert model and the mrs model need to run before this

x_test = np.load("misc/x_test.npy")
y_test = np.load("misc/y_test.npy")
model = load_model("misc/mae_optimized_model.keras")
lyrics_embeddings_csv = pd.read_csv('misc/lyrics_embeddings.csv')
lyrics_embeddings_3d_csv = pd.read_csv('misc/lyrics_embeddings_3d.csv')
distance_frame = pd.read_csv('misc/distance_frame.csv')
spotify_input = np.load("misc/spotify_data.npy")
compare_spotify = np.load("misc/compare_spotify.npy")



In [None]:
lyrics_embeddings, lyrics_embeddings_3d = get_embeddings(lyrics_embeddings_csv, lyrics_embeddings_3d_csv)

In [41]:
POTENTIAL_N = 50

predicted = predict_sample(x_test[0], model)
recommendation = pipeline_helper(distance_frame, predicted, POTENTIAL_N, lyrics_embeddings)  
print(recommendation)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step
[8 7 6 5 9 1 4 0 2 3]
                   name  danceability  energy  loudness  speechiness  \
2       Come as You Are         0.508   0.826    -5.783       0.0400   
7          Karma Police         0.360   0.505    -9.129       0.0260   
12       Feel Good Inc.         0.809   0.720    -7.776       0.1360   
18         Chasing Cars         0.558   0.568    -5.731       0.0267   
20  When You Were Young         0.466   0.981    -3.944       0.1120   
24            Starlight         0.555   0.880    -3.888       0.0312   
27         No Surprises         0.255   0.393   -10.654       0.0278   
34       Clint Eastwood         0.659   0.712    -7.602       0.1730   
40             Paranoid         0.413   0.772    -8.626       0.0778   
47     Heart-Shaped Box         0.609   0.842    -5.861       0.0342   

    acousticness  instrumentalness  liveness  valence  
2       0.000175          0.000459    0.0878    0.543  


### Spotify API Evaluation
Get recommendations from Spotify by passing in minimum and maximum feature values, and compare the songs to our recommended playlist.

In [46]:
from spotify import SpotifyAPI
from dotenv import load_dotenv
# Get the input playlist
# Traverse along each song in playlist and preserve min max of feature values and 
# Store in list that gets passed to parameters for Spotify APIcommendations

# input_playlist = og_songs
# features = dict()
# feature_names = ["danceability","energy","loudness","speechiness","acousticness","instrumentalness","liveness", "valence"]

# for feature in feature_names:
#     features["min_" + feature] = input_playlist[feature].min()
#     features["max_" + feature] = input_playlist[feature].max()

# features["seed_genres"] = input_playlist['tags'].iloc[0].split(',')[:3]
# features["seed_genres"] = ','.join(features["seed_genres"])
# features

In [63]:
print(compare_spotify[0])
print(spotify_input[0])

[[ 4.9800e-01  2.6400e-01 -2.0461e+01  4.1600e-02  4.3800e-01  4.8400e-01
   1.2400e-01  9.1800e-01]
 [ 6.2700e-01  4.6800e-01 -1.4815e+01  6.7700e-02  1.5500e-01  2.8100e-02
   9.6300e-02  4.1300e-01]
 [ 4.8900e-01  4.7500e-01 -7.8540e+00  2.4200e-02  1.7100e-01  2.6400e-02
   2.0900e-01  4.7400e-01]
 [ 3.1500e-01  5.7900e-01 -6.8260e+00  3.0900e-02  9.1100e-03  7.8900e-03
   1.5000e-01  4.0400e-01]
 [ 5.2300e-01  5.5000e-01 -7.1960e+00  2.7000e-02  5.2700e-01  7.4400e-01
   3.6700e-01  1.8900e-01]]
['0BYHHSxMS1zDQp85t4sxL0' '0d8roVOosN85HzEK0Oft3L'
 '1u2g7up9g5p7eXeuNyAXjt' '27ezR6SR0Q2YlpLXiMKP4y'
 '1XmIOdIP1Bnbd7ALE0dIi8']


In [64]:
predicted = predict_sample(compare_spotify[0], model)
our_recs = pipeline_helper(distance_frame, predicted, POTENTIAL_N, lyrics_embeddings) 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 405ms/step
[8 7 6 5 9 1 4 0 2 3]


In [49]:
env_path = os.path.join('misc', '.env')
load_dotenv(dotenv_path=env_path)
SPOTIFY_CREDS = [os.getenv('SPOTIFY_CLIENT_ID'), 
                os.getenv('SPOTIFY_CLIENT_SECRET')]

if not all(SPOTIFY_CREDS):
        print("Please set the SPOTIFY_CLIENT_ID and SPOTIFY_CLIENT_SECRET environment variables")
        exit(1)

spotify = SpotifyAPI(*SPOTIFY_CREDS)
SPOTIFY_CREDS

['1fa1ca9358074af48f6230e7b13c0737', '880a92c5194f496b929d0d4d96ec3a59']

In [68]:
print(spotify_input[0])

<class 'numpy.ndarray'>


In [69]:
recs_from_spotify = spotify.get_recommendations(seed_tracks=list(spotify_input[0]), limit=10)

In [70]:
# SPOTIFY's PREDICTED RECOMMENDATIONS
recs_from_spotify = recs_from_spotify.loc[:, ['name', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
                                'liveness', 'valence']]
recs_from_spotify

Unnamed: 0,name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
0,Backlines,0.515,0.777,-5.348,0.0367,0.0448,0.00172,0.112,0.512
1,Heavy Metal Drummer - 2022 Remaster,0.663,0.691,-7.516,0.036,0.0199,0.00949,0.37,0.816
2,Sycamore Down,0.717,0.0971,-17.715,0.0414,0.88,8e-06,0.0933,0.234
3,We Will Become Silhouettes - Remastered,0.606,0.555,-8.418,0.0319,0.0798,0.0712,0.12,0.155
4,Dinner Bells,0.516,0.634,-7.427,0.0304,0.775,0.319,0.138,0.133
5,My Girls,0.508,0.953,-6.031,0.0806,0.256,0.0445,0.929,0.555
6,Autumn Fallin',0.539,0.275,-11.801,0.0354,0.824,1e-06,0.0761,0.0902
7,All I Want,0.531,0.829,-7.015,0.0364,0.0521,0.882,0.106,0.952
8,Why Is the Night Sad?,0.394,0.312,-13.854,0.0315,0.891,0.32,0.0823,0.242
9,Don't Die in Me,0.582,0.434,-14.961,0.065,0.554,0.142,0.109,0.2


In [71]:
def calculate_euclidean_distance(v1, v2):
    return np.linalg.norm(v1 - v2)

def calculate_score(pd1, pd2):
    if pd1.shape[1] != pd2.shape[1]:
        raise ValueError("Dataframes must have the same number of features.")
    
    for _, x in pd1.iterrows():
        fx = np.array(x[1:].values)
        dist = list()
        for _, y in pd2.iterrows():
            fy = np.array(y[1:].values)
            dist.append(calculate_euclidean_distance(fx, fy))
        print(np.mean(dist))

In [72]:
calculate_score(our_recs, recs_from_spotify)

4.4775354849364515
3.597193169750189
3.6756047293838163
4.543599866804323
6.144331656953964
6.199067034583981
3.906910969220018
3.5808191696047507
3.528674871040298
4.410782000871417
