In [0]:
from joblib import load
import numpy as np
import pandas as pd

In [3]:
datapath = 'https://raw.githubusercontent.com/Build-Spotify-flow/Data-science/master/Data/Dataset.csv'
df = pd.read_csv(datapath)
df.head()

Unnamed: 0,artist_name,track_id,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,popularity
0,YG,2RM4jf1Xa9zPgMGRDiht8O,"Big Bank feat. 2 Chainz, Big Sean, Nicki Minaj",0.00582,0.743,238373,0.339,0.0,1,0.0812,-7.678,1,0.409,203.927,4,0.118,15
1,YG,1tHDG53xJNGsItRA3vfVgs,BAND DRUM (feat. A$AP Rocky),0.0244,0.846,214800,0.557,0.0,8,0.286,-7.259,1,0.457,159.009,4,0.371,0
2,R3HAB,6Wosx2euFPMT14UXiWudMy,Radio Silence,0.025,0.603,138913,0.723,0.0,9,0.0824,-5.89,0,0.0454,114.966,4,0.382,56
3,Chris Cooq,3J2Jpw61sO7l6Hc7qdYV91,Lactose,0.0294,0.8,125381,0.579,0.912,5,0.0994,-12.118,0,0.0701,123.003,4,0.641,0
4,Chris Cooq,2jbYvQCyPgX3CdmAzeVeuS,Same - Original mix,3.5e-05,0.783,124016,0.792,0.878,7,0.0332,-10.277,1,0.0661,120.047,4,0.928,0


In [4]:
non_numeric = ['artist_name', 'track_id', 'track_name']
num = df.drop(non_numeric, axis=1)
num.head()

Unnamed: 0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,popularity
0,0.00582,0.743,238373,0.339,0.0,1,0.0812,-7.678,1,0.409,203.927,4,0.118,15
1,0.0244,0.846,214800,0.557,0.0,8,0.286,-7.259,1,0.457,159.009,4,0.371,0
2,0.025,0.603,138913,0.723,0.0,9,0.0824,-5.89,0,0.0454,114.966,4,0.382,56
3,0.0294,0.8,125381,0.579,0.912,5,0.0994,-12.118,0,0.0701,123.003,4,0.641,0
4,3.5e-05,0.783,124016,0.792,0.878,7,0.0332,-10.277,1,0.0661,120.047,4,0.928,0


### Using the pickled models!

In [0]:
model = load('/content/NNmodel.joblib')

In [0]:
scaler = load('/content/scaler.joblib')

In [0]:
scaled = scaler.transform(num)

In [0]:
def get_index(song_id):
  '''
  Returns the index number of the song to be used in KDTree model
  
  input must have [] around it!

  Single Example: get_index(['3J2Jpw61sO7l6Hc7qdYV91'])
  
  Multiple Ex: get_index(['3J2Jpw61sO7l6Hc7qdYV91', '6Wosx2euFPMT14UXiWudMy',
                          '2jbYvQCyPgX3CdmAzeVeuS', '7BQaRTHk44DkMhIVNcXy2D'])
  '''
  song_index = []
  for i in song_id:
    song_index.append(df[df['track_id'] == i].index[0])
  return song_index

In [0]:
song_id = ['2RM4jf1Xa9zPgMGRDiht8O']

In [69]:
song_index = get_index(song_id)
song_index

[0]

In [0]:
def recommendations(song_index, k=11):
  '''
  song_index = Index of song(or songs) as seed for recommendations
  
  k = Number of recommendations, default 11 (leaving us with 10 recommendations)
      because first song is input!

  returns distance and indices of each song
  '''
  songs = model.query(scaled[song_index], k=k)
  distance = songs[0]
  indices = songs[1]
  return distance, indices

In [0]:
dist, ind = recommendations(song_index)

In [72]:
# Distance (similarity) from original son
dist

array([[0.        , 1.51707785, 1.53074399, 1.61287789, 1.67284757,
        1.71847884, 1.73613316, 1.76147125, 1.77136484, 1.77275347,
        1.8076944 ]])

In [73]:
# Indices of recommended song
ind

array([[     0,  53717,  51244,  24890,  43424,  92336,  39349, 116877,
         19909,  24230,  57691]])

In [74]:
# Full stats on recommended songs ([1:] to exclude first entry which is always
# the song we are recemmonding from)
df.iloc[ind[0][1:]]

Unnamed: 0,artist_name,track_id,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,popularity
53717,Swank Davis,2ogGsvbGjJz71Ppc9mTExA,Rapper Actor,0.145,0.699,168840,0.405,0.000977,1,0.0858,-9.815,1,0.432,181.94,4,0.292,0
51244,Paul Ma$$on,56GWUZPELOP1nK4hQwfVVV,God Given,0.0394,0.617,264986,0.431,0.0,2,0.0703,-12.907,1,0.413,173.915,4,0.127,17
24890,Gyyps,02s1Voowwhr0qTSOrMVEXk,PRETEND,0.234,0.771,182884,0.411,1e-06,1,0.114,-6.952,1,0.37,171.945,4,0.31,19
43424,Don Macki,1rG9U7m2wqM0AVmZeIsdtz,Pass It,0.239,0.744,238621,0.529,0.0,1,0.112,-9.066,1,0.389,174.045,4,0.217,0
92336,T-Pain,3UDOPqusSZZ7NqyWyMMk50,That's Yo Money,0.0776,0.656,244898,0.459,1.2e-05,1,0.136,-9.405,1,0.366,164.018,4,0.103,28
39349,HBK,7oDuAatXvqWD8DAa87KRJ2,Pull Mercedes Up (feat. Payroll Giovanni),0.0362,0.779,190877,0.558,0.0,1,0.0705,-4.679,1,0.324,169.96,4,0.222,16
116877,Ca$his,0MiAP4EQGIuikH23RTyP53,What,0.00497,0.753,152704,0.502,0.0,1,0.0903,-8.804,1,0.313,169.834,4,0.219,7
19909,Caash,2lD4dQpcac8H2Rq5nttJJh,Bands,0.0242,0.723,127957,0.593,0.0152,2,0.105,-8.655,1,0.387,180.024,4,0.142,0
24230,Secret Circle,2MwIkkJujEmXa7o9Nc6HeI,Ounce of It,0.0184,0.638,247978,0.542,0.0,1,0.0847,-9.344,1,0.476,178.007,4,0.31,0
57691,Rah-C,06oDi0OztS33sxfYtxJPO2,Waterfall,0.187,0.598,175984,0.542,0.0,2,0.118,-9.151,1,0.334,178.491,4,0.258,14


In [75]:
# Just the track_ids as array
df['track_id'].iloc[ind[0][1:]].values

array(['2ogGsvbGjJz71Ppc9mTExA', '56GWUZPELOP1nK4hQwfVVV',
       '02s1Voowwhr0qTSOrMVEXk', '1rG9U7m2wqM0AVmZeIsdtz',
       '3UDOPqusSZZ7NqyWyMMk50', '7oDuAatXvqWD8DAa87KRJ2',
       '0MiAP4EQGIuikH23RTyP53', '2lD4dQpcac8H2Rq5nttJJh',
       '2MwIkkJujEmXa7o9Nc6HeI', '06oDi0OztS33sxfYtxJPO2'], dtype=object)

In [76]:
# As a list
df['track_id'].iloc[ind[0][1:]].to_list()

['2ogGsvbGjJz71Ppc9mTExA',
 '56GWUZPELOP1nK4hQwfVVV',
 '02s1Voowwhr0qTSOrMVEXk',
 '1rG9U7m2wqM0AVmZeIsdtz',
 '3UDOPqusSZZ7NqyWyMMk50',
 '7oDuAatXvqWD8DAa87KRJ2',
 '0MiAP4EQGIuikH23RTyP53',
 '2lD4dQpcac8H2Rq5nttJJh',
 '2MwIkkJujEmXa7o9Nc6HeI',
 '06oDi0OztS33sxfYtxJPO2']

In [77]:
# As a dict/json
df['track_id'].iloc[ind[0][1:]].to_dict()

{19909: '2lD4dQpcac8H2Rq5nttJJh',
 24230: '2MwIkkJujEmXa7o9Nc6HeI',
 24890: '02s1Voowwhr0qTSOrMVEXk',
 39349: '7oDuAatXvqWD8DAa87KRJ2',
 43424: '1rG9U7m2wqM0AVmZeIsdtz',
 51244: '56GWUZPELOP1nK4hQwfVVV',
 53717: '2ogGsvbGjJz71Ppc9mTExA',
 57691: '06oDi0OztS33sxfYtxJPO2',
 92336: '3UDOPqusSZZ7NqyWyMMk50',
 116877: '0MiAP4EQGIuikH23RTyP53'}

### Using 2 Songs

In [78]:
song_id = ['2RM4jf1Xa9zPgMGRDiht8O', '1tHDG53xJNGsItRA3vfVgs']
song_index = get_index(song_id)
song_index

[0, 1]

In [0]:
dist, ind = recommendations(song_index)

In [80]:
# Distance (similarity) from original songs
dist

array([[0.        , 1.51707785, 1.53074399, 1.61287789, 1.67284757,
        1.71847884, 1.73613316, 1.76147125, 1.77136484, 1.77275347,
        1.8076944 ],
       [0.        , 1.19710936, 1.2072914 , 1.23143485, 1.2427973 ,
        1.27617768, 1.29284688, 1.39535769, 1.40511457, 1.41506049,
        1.43229805]])

In [81]:
# Indices of recommended songs
ind

array([[     0,  53717,  51244,  24890,  43424,  92336,  39349, 116877,
         19909,  24230,  57691],
       [     1,  82704,  79610,  81094, 100569,  84494,  67682,  87994,
         21132,  54058,  50984]])

In [82]:
# Full stats on recommended songs ([1:] to exclude first entry which is always
# the song we are recemmonding from) for the first input
df.iloc[ind[0][1:]]

Unnamed: 0,artist_name,track_id,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,popularity
53717,Swank Davis,2ogGsvbGjJz71Ppc9mTExA,Rapper Actor,0.145,0.699,168840,0.405,0.000977,1,0.0858,-9.815,1,0.432,181.94,4,0.292,0
51244,Paul Ma$$on,56GWUZPELOP1nK4hQwfVVV,God Given,0.0394,0.617,264986,0.431,0.0,2,0.0703,-12.907,1,0.413,173.915,4,0.127,17
24890,Gyyps,02s1Voowwhr0qTSOrMVEXk,PRETEND,0.234,0.771,182884,0.411,1e-06,1,0.114,-6.952,1,0.37,171.945,4,0.31,19
43424,Don Macki,1rG9U7m2wqM0AVmZeIsdtz,Pass It,0.239,0.744,238621,0.529,0.0,1,0.112,-9.066,1,0.389,174.045,4,0.217,0
92336,T-Pain,3UDOPqusSZZ7NqyWyMMk50,That's Yo Money,0.0776,0.656,244898,0.459,1.2e-05,1,0.136,-9.405,1,0.366,164.018,4,0.103,28
39349,HBK,7oDuAatXvqWD8DAa87KRJ2,Pull Mercedes Up (feat. Payroll Giovanni),0.0362,0.779,190877,0.558,0.0,1,0.0705,-4.679,1,0.324,169.96,4,0.222,16
116877,Ca$his,0MiAP4EQGIuikH23RTyP53,What,0.00497,0.753,152704,0.502,0.0,1,0.0903,-8.804,1,0.313,169.834,4,0.219,7
19909,Caash,2lD4dQpcac8H2Rq5nttJJh,Bands,0.0242,0.723,127957,0.593,0.0152,2,0.105,-8.655,1,0.387,180.024,4,0.142,0
24230,Secret Circle,2MwIkkJujEmXa7o9Nc6HeI,Ounce of It,0.0184,0.638,247978,0.542,0.0,1,0.0847,-9.344,1,0.476,178.007,4,0.31,0
57691,Rah-C,06oDi0OztS33sxfYtxJPO2,Waterfall,0.187,0.598,175984,0.542,0.0,2,0.118,-9.151,1,0.334,178.491,4,0.258,14


In [83]:
# Full stats on recommended songs ([1:] to exclude first entry which is always
# the song we are recemmonding from) for the second input
df.iloc[ind[1][1:]]

Unnamed: 0,artist_name,track_id,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,popularity
82704,Yung Finnatic$,76wqmxKVKXradVxmfGpZfX,"Audi (feat. Skeemo, Jay Critch & Yoshi Lite)",0.257,0.731,212500,0.624,0.0,8,0.257,-8.856,1,0.386,164.943,4,0.292,0
79610,Kenny Muney,0x1MJ34QQHGK9nTmOdSe33,Ian Going,0.0567,0.763,215856,0.497,1.8e-05,10,0.297,-11.091,1,0.436,145.057,4,0.313,10
81094,Mark Battles,0D8Qknpc7ZtnOU4pEwXGos,Amber Rose,0.0575,0.726,222877,0.593,0.0,7,0.231,-6.883,1,0.434,132.019,4,0.422,3
100569,Mukesh,3eUyRzsgH4Vjraw3oVqVDg,Uttalakkadi,0.0381,0.837,218630,0.605,1.8e-05,7,0.232,-7.96,1,0.327,145.973,4,0.418,0
84494,Kacee Playaa,51jT8cWCrANwOvDkzqKpF4,Credits,0.112,0.849,121783,0.728,0.0,7,0.219,-5.128,1,0.441,153.087,4,0.266,0
67682,'Lgado,7vBFNgj6XAYYcp6pKuvjp2,Shinin',0.0143,0.837,170580,0.529,0.0,9,0.111,-9.108,1,0.431,150.044,4,0.33,7
87994,G2g,4uO9h2EWJqr1ViWEON957k,Rainin' (feat. Jae Mo),0.00903,0.726,209580,0.54,0.000108,8,0.114,-8.431,1,0.392,165.04,4,0.281,3
21132,Chevy Woods,1Dfwu69pIy52gVUzbJ6Bwk,On Me,0.0716,0.911,171628,0.529,0.0,8,0.344,-8.743,1,0.356,140.997,4,0.4,14
54058,M.I. Abaga,1SE0FG6lc3MMZoHj65DTVq,Soup,0.113,0.821,226000,0.622,0.0,7,0.165,-6.278,1,0.361,138.037,4,0.471,0
50984,Beatking,7A5WzjLKRSglnolIcKDJHm,Head Down,0.0439,0.855,182520,0.501,0.0,9,0.13,-5.837,1,0.44,160.086,4,0.523,15
