# Mock Spotify Song Recommender Model & Explainer
_Author: Jonathan Finger_

## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

## Helper Functions

In [2]:
def trackname_from_index(index):
    return df[df.index == index]["track_name"].values.astype(str)[0]

def artist_from_index(index):
    return df[df.index == index]["artist_name"].values.astype(str)[0]

def index_from_trackid(trackid):
    return df[df['track_id'] == trackid].index.values.astype(int)[0]

## Mock Data v2
Spotify song sample from kaggle challenge ()†

In [3]:
DATA_PATH = "./data/"
pd.options.display.max_columns = None
df = pd.read_csv(DATA_PATH+'SpotifyAudioFeaturesApril2019.csv')

In [4]:
df

Unnamed: 0,artist_name,track_id,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,popularity
0,YG,2RM4jf1Xa9zPgMGRDiht8O,"Big Bank feat. 2 Chainz, Big Sean, Nicki Minaj",0.005820,0.743,238373,0.339,0.000,1,0.0812,-7.678,1,0.4090,203.927,4,0.1180,15
1,YG,1tHDG53xJNGsItRA3vfVgs,BAND DRUM (feat. A$AP Rocky),0.024400,0.846,214800,0.557,0.000,8,0.2860,-7.259,1,0.4570,159.009,4,0.3710,0
2,R3HAB,6Wosx2euFPMT14UXiWudMy,Radio Silence,0.025000,0.603,138913,0.723,0.000,9,0.0824,-5.890,0,0.0454,114.966,4,0.3820,56
3,Chris Cooq,3J2Jpw61sO7l6Hc7qdYV91,Lactose,0.029400,0.800,125381,0.579,0.912,5,0.0994,-12.118,0,0.0701,123.003,4,0.6410,0
4,Chris Cooq,2jbYvQCyPgX3CdmAzeVeuS,Same - Original mix,0.000035,0.783,124016,0.792,0.878,7,0.0332,-10.277,1,0.0661,120.047,4,0.9280,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130658,Calum Scott,0cvfSKcm9VeduwyYPrxtLx,Come Back Home,0.006780,0.601,190539,0.801,0.000,11,0.0991,-5.174,1,0.0323,131.049,4,0.2890,57
130659,Saint Claire,43MP9F7UzvfilSrw2SqZGJ,Enough for You,0.918000,0.387,194583,0.249,0.000,9,0.1030,-13.233,1,0.0437,94.039,4,0.3460,60
130660,Mike Stud,4TWlUuFk81NGUNKwndyS5Q,Do It,0.330000,0.717,139191,0.532,0.000,8,0.0997,-8.351,0,0.2060,156.977,4,0.5460,47
130661,D Savage,5iGBXzOoRo4sBTy8wdzMyK,No Smoke,0.007900,0.772,180013,0.510,0.000,4,0.1310,-9.670,0,0.1200,120.049,4,0.0755,50


## Model preparation

In [5]:
features = [
 'danceability',
 'duration_ms',
 'energy',
 'key',
 'liveness',
 'loudness',
#  'instrumentalness',
#  'acousticness',
#  'mode',
#  'speechiness',
#  'tempo',
#  'time_signature',
#  'valence',
#  'popularity'
           ]

In [6]:
df1 = df.copy()
df1 = df1.head(75000)

In [7]:
cosine_simil = cosine_similarity(df1[features])

In [8]:
test_trackid = index_from_trackid('2jbYvQCyPgX3CdmAzeVeuS')

In [9]:
#Create list of similar songs
similar_songs =  list(enumerate(cosine_simil[test_trackid]))

In [10]:
sorted_similar_songs = sorted(similar_songs,key=lambda x:x[1],reverse=True)

In [11]:
## Print titles of first 10 songs
i=0
for song in sorted_similar_songs:
    if i == 0:
        i = i+1
    else:
        print(f'No. {i} "{trackname_from_index(song[0])}" by {artist_from_index(song[0])})')
        i=i+1
        if i>10:
            break

No. 1 "Sunshine Dancer 2" by Daniel Miller)
No. 2 "Grateful - Original mix" by Fishman)
No. 3 "Horchata" by FrankJavCee)
No. 4 "Sunshine Dancer - Remix" by Daniel Miller)
No. 5 "Love" by Jd Flames)
No. 6 "Don't Play" by Dullastar)
No. 7 "Waves (ASOT 850 - Part 2)" by Kryder)
No. 8 "Fortnite" by Cmoney2423)
No. 9 "Bass Way" by Beeralit)
No. 10 "Bunky" by Dropping Ugly)
