In [4]:
import pandas as pd
import numpy as np
from joblib import dump
from sklearn.neighbors import NearestNeighbors
from joblib import dump

In [None]:
"""
Primary:
- id (Id of track generated by Spotify)
Numerical:
- acousticness (Ranges from 0 to 1)
- danceability (Ranges from 0 to 1)
- energy (Ranges from 0 to 1)
- duration_ms (Integer typically ranging from 200k to 300k)
- instrumentalness (Ranges from 0 to 1)
- valence (Ranges from 0 to 1)
- popularity (Ranges from 0 to 100)
- tempo (Float typically ranging from 50 to 150)
- liveness (Ranges from 0 to 1)
- loudness (Float typically ranging from -60 to 0)
- speechiness (Ranges from 0 to 1)
- year (Ranges from 1921 to 2020)
Dummy:
- mode (0 = Minor, 1 = Major)
- explicit (0 = No explicit content, 1 = Explicit content)
Categorical:
- key (All keys on octave encoded as values ranging from 0 to 11, starting on C as 0, C# as 1 and so on…)
- artists (List of artists mentioned)
- release_date (Date of release mostly in yyyy-mm-dd format, however precision of date may vary)
- name (Name of the song)"""

In [13]:
def song_model(input):
    df = pd.read_csv('data/edited_data.csv')
    # adding direct url to data set by adding url prefix and id
    url = 'http://open.spotify.com/track/' + df['id']
    df['url'] = url
    
    # reordering columns, leaving out ID and release date
    df = df[['artists',  'name', 'url', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy',
       'explicit', 'instrumentalness', 'key', 'liveness', 'loudness',
       'mode', 'popularity', 'speechiness', 'tempo', 'valence']]
    
   # target set will be both artist and name
    y_set = ['artists', 'name', 'url']

    # droping target from data matrix
    df_data = df.drop(y_set, axis=1)

    # set target
    df_target = df[y_set]

    # fit on data, 12 neighbors
    nn = NearestNeighbors(algorithm='brute', leaf_size =15, n_neighbors=12, n_jobs=-1)
    nn.fit(df_data)

    # sample a song(index) from df_data to use as our query point 
    input_index=input

    # vectorize 
    data_vect = [df_data.iloc[input_index].values]
    neigh_dist, neigh_indices = nn.kneighbors(data_vect)
    indexs = neigh_indices.flat[0:12].tolist()

    #adding url to each track
    output = df_target.iloc[indexs]
    return (output)

In [22]:
input = 'Golfing Papa'
index = df.loc[df.isin([input]).any(axis=1)].index.tolist()
index = index[0]
model = song_model(index) 
model

Unnamed: 0,artists,name,url
2,MAMIE SMITH,Golfing Papa,http://open.spotify.com/track/11m7laMUgmOKqI3o...
76845,IGNACIO CORSINI,Amigazo - Remasterizado,http://open.spotify.com/track/4jZqFCvnJGRmpX5u...
41957,OLGA SVENDSEN,Jeg har en ven,http://open.spotify.com/track/1Tx1znywMQIFw7XB...
20807,FRANCISCO CANARO,El Hijo de Julián - Instrumental (Remasterizado),http://open.spotify.com/track/2vcuvAEPQpAzP76l...
77006,SEWERYN GOSZCZYŃSKI,Chapter 3.12 - Zamek kaniowski,http://open.spotify.com/track/4Vwtssoxgp8pLoCI...
24127,LEELA BAI,Mat Josh Men Aa Tu Hosh Men Aa,http://open.spotify.com/track/11tbdjA0CzMCaGap...
76935,FRANCISCO CANARO,Tus Besos Fueron Mios - Remasterizado,http://open.spotify.com/track/5Mbw0ksTVAn1A6ul...
43884,"GIACOMO PUCCINI', 'GIUSEPPE ANTONICELLI', 'MET...",La Bohème: Act I - Ehi! Rodolfo!,http://open.spotify.com/track/14CTJThAB8YQa2hs...
4120,"LENA HORNE', 'LENNIE HAYTON & HIS ORCHESTRA",It's All Right With Me - Remastered - April 1992,http://open.spotify.com/track/5kJcvEF9W4zfglzq...
21880,VAL ROSING,Wonderful You,http://open.spotify.com/track/33VyqPeK0dpKY58H...


In [17]:
input = 'Golfing Papa'
index = df.loc[df.isin([input]).any(axis=1)].index.tolist()
index = index[0]
index

2

In [19]:
type(index)

int

In [20]:
type(174385)

int

In [8]:
song_model(174385)

Unnamed: 0,artists,name,url
174381,ALESSIA CARA,A Little More,http://open.spotify.com/track/4pPFI9jsguIh3wC7...
174383,ALESSIA CARA,A Little More,http://open.spotify.com/track/52YtxLVUyvtiGPxw...
174377,ALESSIA CARA,A Little More,http://open.spotify.com/track/3N3Wi5Un7iT8amLe...
174385,ALESSIA CARA,A Little More,http://open.spotify.com/track/7tue2Wemjd0FZzRt...
174379,ALESSIA CARA,A Little More,http://open.spotify.com/track/45XnLMuqf3vRfskE...
157514,"ROY ORBISON', 'ALEX ORBISON', 'CHUCK TURNER","Go, Go, Go (Down The Line) - Remastered 2015",http://open.spotify.com/track/5H7orZTIjEINJRPg...
91653,CLIFF BENNETT & THE REBEL ROUSERS,Said I Weren't Gonna Tell Nobody - Mono; 2009 ...,http://open.spotify.com/track/0QdfbadoCPZw95ml...
122369,"PHIL COLLINS', '*NSYNC","Trashin' The Camp - From ""Tarzan""/Soundtrack V...",http://open.spotify.com/track/0ZtBhi59lpoOk7Zx...
148417,HARRY NILSSON,Bath,http://open.spotify.com/track/2iPyjxb7vNKfoGvr...
162467,TITO PUENTE,Batacumba,http://open.spotify.com/track/1XKSQTNh2ear1TYP...


In [11]:
# reading csv
df = pd.read_csv('data/edited_data.csv')

In [12]:
df

Unnamed: 0.1,Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,valence,year
0,0,0.991000,MAMIE SMITH,0.598,168333,0.224,0,0cS0A1fUEUd1EW3FcF8AEI,0.000522,5,0.3790,-12.628,0,Keep A Song In Your Soul,12,1920,0.0936,149.976,0.6340,1920
1,1,0.643000,SCREAMIN' JAY HAWKINS,0.852,150200,0.517,0,0hbkKFIJm7Z05H8Zl9w30f,0.026400,5,0.0809,-7.261,0,I Put A Spell On You,7,1920-01-05,0.0534,86.889,0.9500,1920
2,2,0.993000,MAMIE SMITH,0.647,163827,0.186,0,11m7laMUgmOKqI3oYzuhne,0.000018,0,0.5190,-12.098,1,Golfing Papa,4,1920,0.1740,97.600,0.6890,1920
3,3,0.000173,OSCAR VELAZQUEZ,0.730,422087,0.798,0,19Lc5SfJJ5O1oaxY0fpwfh,0.801000,2,0.1280,-7.311,1,True House Music - Xavier Santos & Carlos Gomi...,17,1920-01-01,0.0425,127.997,0.0422,1920
4,4,0.295000,MIXE,0.704,165224,0.707,1,2hJjbsLCytGsnAHfdsLejp,0.000246,10,0.4020,-6.036,0,Xuniverxe,2,1920-10-01,0.0768,122.076,0.2990,1920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174384,174384,0.009170,"DJ COMBO', 'SANDER-7', 'TONY T",0.792,147615,0.866,0,46LhBf6TvYjZU2SMvGZAbn,0.000060,6,0.1780,-5.089,0,The One,0,2020-12-25,0.0356,125.972,0.1860,2020
174385,174385,0.795000,ALESSIA CARA,0.429,144720,0.211,0,7tue2Wemjd0FZzRtDrQFZd,0.000000,4,0.1960,-11.665,1,A Little More,0,2021-01-22,0.0360,94.710,0.2280,2021
174386,174386,0.806000,ROGER FLY,0.671,218147,0.589,0,48Qj61hOdYmUCFJbpQ29Ob,0.920000,4,0.1130,-12.393,0,Together,0,2020-12-09,0.0282,108.058,0.7140,2020
174387,174387,0.920000,TAYLOR SWIFT,0.462,244000,0.240,1,1gcyHQpBQ1lfXGdhZmWrHP,0.000000,0,0.1130,-12.077,1,champagne problems,69,2021-01-07,0.0377,171.319,0.3200,2021


In [4]:
# adding direct url to data set by adding url prefix and id
url = 'http://open.spotify.com/track/' + df['id']
df['url'] = url

In [5]:
# reordering columns, leaving out ID and release date
df = df[['artists',  'name', 'url', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy',
       'explicit', 'instrumentalness', 'key', 'liveness', 'loudness',
       'mode', 'popularity', 'speechiness', 'tempo', 'valence']]

In [16]:
def org_df():
    df = pd.read_csv('data/edited_data.csv')
    url = 'http://open.spotify.com/track/' + df['id']
    df['url'] = url
    df = df[['artists',  'name', 'url', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy',
       'explicit', 'instrumentalness', 'key', 'liveness', 'loudness',
       'mode', 'popularity', 'speechiness', 'tempo', 'valence']]
    return df

In [18]:
df = org_df()

In [19]:
df

Unnamed: 0,artists,name,url,year,acousticness,danceability,duration_ms,energy,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,valence
0,MAMIE SMITH,Keep A Song In Your Soul,http://open.spotify.com/track/0cS0A1fUEUd1EW3F...,1920,0.991000,0.598,168333,0.224,0,0.000522,5,0.3790,-12.628,0,12,0.0936,149.976,0.6340
1,SCREAMIN' JAY HAWKINS,I Put A Spell On You,http://open.spotify.com/track/0hbkKFIJm7Z05H8Z...,1920,0.643000,0.852,150200,0.517,0,0.026400,5,0.0809,-7.261,0,7,0.0534,86.889,0.9500
2,MAMIE SMITH,Golfing Papa,http://open.spotify.com/track/11m7laMUgmOKqI3o...,1920,0.993000,0.647,163827,0.186,0,0.000018,0,0.5190,-12.098,1,4,0.1740,97.600,0.6890
3,OSCAR VELAZQUEZ,True House Music - Xavier Santos & Carlos Gomi...,http://open.spotify.com/track/19Lc5SfJJ5O1oaxY...,1920,0.000173,0.730,422087,0.798,0,0.801000,2,0.1280,-7.311,1,17,0.0425,127.997,0.0422
4,MIXE,Xuniverxe,http://open.spotify.com/track/2hJjbsLCytGsnAHf...,1920,0.295000,0.704,165224,0.707,1,0.000246,10,0.4020,-6.036,0,2,0.0768,122.076,0.2990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174384,"DJ COMBO', 'SANDER-7', 'TONY T",The One,http://open.spotify.com/track/46LhBf6TvYjZU2SM...,2020,0.009170,0.792,147615,0.866,0,0.000060,6,0.1780,-5.089,0,0,0.0356,125.972,0.1860
174385,ALESSIA CARA,A Little More,http://open.spotify.com/track/7tue2Wemjd0FZzRt...,2021,0.795000,0.429,144720,0.211,0,0.000000,4,0.1960,-11.665,1,0,0.0360,94.710,0.2280
174386,ROGER FLY,Together,http://open.spotify.com/track/48Qj61hOdYmUCFJb...,2020,0.806000,0.671,218147,0.589,0,0.920000,4,0.1130,-12.393,0,0,0.0282,108.058,0.7140
174387,TAYLOR SWIFT,champagne problems,http://open.spotify.com/track/1gcyHQpBQ1lfXGdh...,2021,0.920000,0.462,244000,0.240,1,0.000000,0,0.1130,-12.077,1,69,0.0377,171.319,0.3200


In [6]:
# Previewing datset
df.head(5)

Unnamed: 0,artists,name,url,year,acousticness,danceability,duration_ms,energy,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,valence
0,MAMIE SMITH,Keep A Song In Your Soul,http://open.spotify.com/track/0cS0A1fUEUd1EW3F...,1920,0.991,0.598,168333,0.224,0,0.000522,5,0.379,-12.628,0,12,0.0936,149.976,0.634
1,SCREAMIN' JAY HAWKINS,I Put A Spell On You,http://open.spotify.com/track/0hbkKFIJm7Z05H8Z...,1920,0.643,0.852,150200,0.517,0,0.0264,5,0.0809,-7.261,0,7,0.0534,86.889,0.95
2,MAMIE SMITH,Golfing Papa,http://open.spotify.com/track/11m7laMUgmOKqI3o...,1920,0.993,0.647,163827,0.186,0,1.8e-05,0,0.519,-12.098,1,4,0.174,97.6,0.689
3,OSCAR VELAZQUEZ,True House Music - Xavier Santos & Carlos Gomi...,http://open.spotify.com/track/19Lc5SfJJ5O1oaxY...,1920,0.000173,0.73,422087,0.798,0,0.801,2,0.128,-7.311,1,17,0.0425,127.997,0.0422
4,MIXE,Xuniverxe,http://open.spotify.com/track/2hJjbsLCytGsnAHf...,1920,0.295,0.704,165224,0.707,1,0.000246,10,0.402,-6.036,0,2,0.0768,122.076,0.299


In [15]:
# finding a song I want to use as an input
drake = df[df['artists'] == 'DRAKE'].sort_values('popularity', ascending=False)

In [16]:
drake

Unnamed: 0,artists,name,url,year,acousticness,danceability,duration_ms,energy,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,valence
19542,DRAKE,God's Plan,http://open.spotify.com/track/6DCZcSspjsKoFjzj...,2018,0.0332,0.754,198973,0.449,1,0.000083,7,0.552,-9.211,1,83,0.1090,77.169,0.357
58286,DRAKE,Toosie Slide,http://open.spotify.com/track/127QTOFJsJQp5LbJ...,2020,0.3210,0.834,247059,0.454,1,0.000006,1,0.114,-9.750,0,83,0.2010,81.618,0.837
19360,DRAKE,Passionfruit,http://open.spotify.com/track/5mCPDVBb16L4XQwD...,2017,0.2560,0.809,298941,0.463,1,0.085000,11,0.109,-11.377,1,79,0.0396,111.980,0.364
93860,DRAKE,Toosie Slide,http://open.spotify.com/track/466cKvZn1j45IpxD...,2020,0.2890,0.830,247059,0.490,1,0.000003,1,0.113,-8.820,0,78,0.2090,81.604,0.845
38914,DRAKE,In My Feelings,http://open.spotify.com/track/2G7V7zsVDxg1yRsu...,2018,0.0589,0.835,217925,0.626,1,0.000060,1,0.396,-5.833,1,78,0.1250,91.030,0.350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140945,DRAKE,The Resistance,http://open.spotify.com/track/0llA0pYA6GpGk7fT...,2010,0.8220,0.547,225360,0.741,1,0.000000,0,0.117,-8.127,1,49,0.1630,82.589,0.637
156478,DRAKE,Say What's Real,http://open.spotify.com/track/7mPoCVGP752A5DtH...,2009,0.6320,0.462,230717,0.773,1,0.000000,8,0.212,-5.375,1,48,0.2880,86.157,0.455
156388,DRAKE,Congratulations,http://open.spotify.com/track/3SnXwQUrvSacFziU...,2009,0.0185,0.333,332530,0.859,1,0.000000,6,0.220,-2.286,1,48,0.1480,86.882,0.125
156645,DRAKE,Thank Me Now,http://open.spotify.com/track/3cBUv0RIoEyAm2b7...,2010,0.2820,0.495,328573,0.772,1,0.000000,8,0.119,-4.851,1,47,0.4250,71.295,0.748


In [17]:
# target set will be both artist and name
y_set = ['artists', 'name', 'url']

# droping target from data matrix
df_data = df.drop(y_set, axis=1)

# set target
df_target = df[y_set]

In [18]:
# fit on data, 12 neighbors
nn = NearestNeighbors(algorithm='brute', leaf_size =15, n_neighbors=12, n_jobs=-1)
nn.fit(df_data)

NearestNeighbors(algorithm='brute', leaf_size=15, metric='minkowski',
                 metric_params=None, n_jobs=-1, n_neighbors=12, p=2,
                 radius=1.0)

In [19]:
# sample a song(index) from df_data to use as our query point 
input_index = 58286 # Drake - Tootsie slide

# vectorize 
data_vect = [df_data.iloc[input_index].values]
data_vect

[array([ 2.02000e+03,  3.21000e-01,  8.34000e-01,  2.47059e+05,
         4.54000e-01,  1.00000e+00,  6.15000e-06,  1.00000e+00,
         1.14000e-01, -9.75000e+00,  0.00000e+00,  8.30000e+01,
         2.01000e-01,  8.16180e+01,  8.37000e-01])]

In [20]:
# Query Using kneighbors 
neigh_dist, neigh_indices = nn.kneighbors(data_vect)

In [21]:
# top 12 closest data vectors to our reference vector, data_vect
neigh_dist

array([[ 0.        ,  5.08601625, 30.97209819, 40.26932408, 45.41215041,
        50.32443327, 50.39740189, 52.72150335, 55.27282865, 55.93455796,
        57.05886579, 57.85963648]])

In [22]:
# these are the corresponding indicies of the most similar vectors 
neigh_indices

array([[ 58286,  93860,  55254, 107239,  15938,  56532, 155227,  33730,
         92082, 105990, 120108,  70300]], dtype=int64)

In [23]:
# breaking down the array to a list
indexs = neigh_indices.flat[0:12].tolist()
indexs

[58286,
 93860,
 55254,
 107239,
 15938,
 56532,
 155227,
 33730,
 92082,
 105990,
 120108,
 70300]

In [24]:
# result metrics
df_data.iloc[indexs]

Unnamed: 0,year,acousticness,danceability,duration_ms,energy,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,valence
58286,2020,0.321,0.834,247059,0.454,1,6e-06,1,0.114,-9.75,0,83,0.201,81.618,0.837
93860,2020,0.289,0.83,247059,0.49,1,3e-06,1,0.113,-8.82,0,78,0.209,81.604,0.845
55254,2005,0.619,0.52,247053,0.379,0,2e-06,0,0.108,-9.906,1,57,0.0279,85.934,0.208
107239,2004,0.118,0.68,247040,0.659,0,0.0,0,0.107,-6.855,1,53,0.211,91.259,0.411
15938,2000,0.59,0.266,247040,0.333,0,0.0,10,0.121,-9.876,1,62,0.031,109.479,0.0796
56532,2011,0.0301,0.226,247027,0.261,0,0.0677,2,0.0628,-11.276,1,57,0.0339,108.915,0.084
155227,2002,0.518,0.553,247027,0.53,0,0.0,4,0.232,-5.332,0,49,0.0246,84.017,0.162
33730,1992,0.038,0.691,247040,0.497,0,6e-06,11,0.0859,-13.248,1,51,0.0473,103.893,0.701
92082,2011,0.846,0.414,247053,0.295,0,0.0,10,0.079,-9.271,0,53,0.033,125.833,0.218
105990,1998,0.19,0.694,247040,0.368,0,0.0,8,0.14,-7.264,1,41,0.0655,60.117,0.465


In [25]:
# result target
df_target.iloc[indexs]

Unnamed: 0,artists,name,url
58286,DRAKE,Toosie Slide,http://open.spotify.com/track/127QTOFJsJQp5LbJ...
93860,DRAKE,Toosie Slide,http://open.spotify.com/track/466cKvZn1j45IpxD...
55254,"BRAD PAISLEY', 'DOLLY PARTON",When I Get Where I'm Going (feat. Dolly Parton),http://open.spotify.com/track/3VLCtStwYsAL4LKZ...
107239,"CASSIDY', 'R. KELLY",Hotel (feat. R. Kelly),http://open.spotify.com/track/4hHXhCRSnOKd6nMG...
15938,FAITH HILL,"Where Are You Christmas - From ""Dr. Seuss' How...",http://open.spotify.com/track/1msuiw6pnXYfxZ7E...
56532,M83,Outro,http://open.spotify.com/track/2QVmiA93GVhWNTWQ...
155227,KELLY CHEN,記事本,http://open.spotify.com/track/3FbzN8826gMAaMxU...
33730,THE BELLAMY BROTHERS,Old Hippie,http://open.spotify.com/track/5fv9qtXwNV6Xx3P9...
92082,STAIND,Something to Remind You,http://open.spotify.com/track/4wOQ8upbW1GzHCWj...
105990,DESTINY'S CHILD,"No, No, No, Pt. 1",http://open.spotify.com/track/2pdzseh7ELZCKlXX...


In [27]:
# creates the model into a pickle file
dump(nn, 'model.joblib', compress=True)

['model.joblib']

In [16]:
name = df['name'].tolist(0)

TypeError: tolist() takes 1 positional argument but 2 were given