## Create a model to find similar songs

### Load Libraries

In [1]:
import spotipy
import spotipy.util as util
import sys
import requests
from dotenv import load_dotenv
import os

In [2]:
from spotipy.oauth2 import SpotifyClientCredentials

In [3]:
import pandas as pd
import json

In [4]:
from flask import jsonify

In [5]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KDTree
from sklearn.neighbors import NearestNeighbors

In [6]:
load_dotenv() # load environment variables

True

### Load Spotify variables

In [7]:
client_credentials_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

### Create model to get 4 nearest neighbours

In [8]:
# Assign any track ID
track_id = '0Gc6TVm52BwZD07Ki6tIvf'

In [9]:
neighbors=4        # you can change the number
max_distance=5.0   # you can change the number

rel_artists = sp.artist_related_artists(sp.track(track_id=track_id)['artists'][0]['id'])['artists']
artist_log = []
for a in rel_artists:
    artist_log.append(a['id'])
feat_log = []
for artist in artist_log:
    for track in sp.artist_top_tracks(artist)['tracks']:
        feat_log.append(sp.audio_features(track['id'])[0])
            
catalog = pd.DataFrame.from_dict(feat_log)
    
root = pd.DataFrame.from_dict(sp.audio_features(tracks=[track_id]))

merged_df = root.append(catalog, ignore_index=True)
    
dropped_df = merged_df.drop(columns=['uri', 'track_href', 'id', 'duration_ms', 'time_signature', 'mode', 'loudness', 'type', 'analysis_url'])
scaled_df = StandardScaler().fit_transform(dropped_df)
trans_array = scaled_df.copy()
trans_array[:,0] = [u*2.4 for u in trans_array[:,0]] # acousticness
trans_array[:,1] = [((u*u)**0.5)*u for u in trans_array[:,1]] # danceability
trans_array[:,2] = [u*1.7 for u in trans_array[:,2]] # energy
trans_array[:,3] = [u*1.4 for u in trans_array[:,3]] # instrumentalness
trans_array[:,4] = [u*0.9 for u in trans_array[:,4]] # key
trans_array[:,5] = [u*1.0 for u in trans_array[:,5]] # liveness
trans_array[:,6] = [u*1.0 for u in trans_array[:,6]] # speechiness
trans_array[:,7] = [u*1.1 for u in trans_array[:,7]] # tempo
trans_array[:,8] = [u*2.5 for u in trans_array[:,8]] # valence

knn = NearestNeighbors()
knn.fit(trans_array)

rec = knn.kneighbors(trans_array[[0]], n_neighbors=neighbors+1)

In [10]:
print('Original song:', root.loc[0,'id'])

Original song: 0Gc6TVm52BwZD07Ki6tIvf


### We need to print out and save the response 

In [11]:
predict_response = []
for n in range(1,neighbors+1):
    if rec[0][0][n] <= max_distance:
        pred_dict = (merged_df.loc[rec[1][0][n],'id'], rec[0][0][n])
    predict_response.append(pred_dict)

In [12]:
pred = pd.DataFrame(predict_response, columns=['recommendation', 'distance'])

### Here we can see the Nearest Neighbours ID's and the distances from Original song

In [13]:
pred

Unnamed: 0,recommendation,distance
0,758VqyHhAhzX6vmQ8h0exw,3.470148
1,4yKTDPH6iRBHmA44AipmIk,3.775737
2,1EGjgdJadGF3q0aJ8RAHeV,4.314166
3,0klbnFP6UwMYpUuKjhleKT,4.439575


### Now we need to decode the ID's for easy read

#### We want to see the 'artist name', 'song name', 'direct link to the song' and if the song is explicit or not.

In [14]:
# This gives us the artist name
sp.track('758VqyHhAhzX6vmQ8h0exw')['artists'][0]['name']

'Georges Moustaki'

In [15]:
# Song (track) name
sp.track('758VqyHhAhzX6vmQ8h0exw')['name']

'Le facteur'

In [16]:
sp.track('758VqyHhAhzX6vmQ8h0exw')['id']

'758VqyHhAhzX6vmQ8h0exw'

In [17]:
# Direct link
sp.track('758VqyHhAhzX6vmQ8h0exw')['external_urls']['spotify']

'https://open.spotify.com/track/758VqyHhAhzX6vmQ8h0exw'

In [18]:
# Explicit 'True' or 'False'
sp.track('758VqyHhAhzX6vmQ8h0exw')['explicit']

False

In [19]:
# Track image
sp.track('758VqyHhAhzX6vmQ8h0exw')['album']['images'][1]['url']

'https://i.scdn.co/image/ab67616d00001e02480094ab52afdba5b0a93a65'

In [20]:
# Preview track url. Not all of the tracks have previews!
sp.track('24upABZ8A0sAepfu91sEYr')['preview_url']

'https://p.scdn.co/mp3-preview/dcad19ab187c730cb3ad678695db21baf0137ac2?cid=1544e44f656f402894c4b6b4c0efdf9b'

### Let's have it all in one place

In [21]:
df_predict_tracks = pd.DataFrame() # create dataframe

feat_search_artist = []
feat_search_song = []
feat_search_id = []
feat_search_url = []
feat_search_explicit = []
feat_search_preview = []
feat_search_image = []

for ii in pred['recommendation']:
    artist_name = sp.track(ii)['artists'][0]['name']
    song_name = sp.track(ii)['name']
    song_id = sp.track(ii)['id']
    url_link = sp.track(ii)['external_urls']['spotify']
    explicit = sp.track(ii)['explicit']
    preview = sp.track(ii)['preview_url']
    image = sp.track(ii)['album']['images'][1]['url']
    feat_search_artist.append(artist_name)
    feat_search_song.append(song_name)
    feat_search_id.append(song_id)
    feat_search_url.append(url_link)
    feat_search_explicit.append(explicit)
    feat_search_preview.append(preview)
    feat_search_image.append(image)
    

# Save the results
df_predict_tracks['artist_name'] = feat_search_artist
df_predict_tracks['song_name'] = feat_search_song
df_predict_tracks['song_id'] = feat_search_id
df_predict_tracks['url'] = feat_search_url
df_predict_tracks['explicit'] = feat_search_explicit
df_predict_tracks['preview'] = feat_search_preview
df_predict_tracks['image'] = feat_search_image

In [22]:
# Replace the None velue for the missing preview with the url desired
def get_rid_of_nulls(value):
        if pd.isnull(value):
            return 'http://bit.ly/2nXRRfX'
        else:
            return value

In [23]:
# Apply the function
df_predict_tracks['preview'] = df_predict_tracks['preview'].apply(get_rid_of_nulls)

In [24]:
# Change the index count from 0 to 1
df_predict_tracks.index +=1

In [25]:
df_predict_tracks

Unnamed: 0,artist_name,song_name,song_id,url,explicit,preview,image
1,Georges Moustaki,Le facteur,758VqyHhAhzX6vmQ8h0exw,https://open.spotify.com/track/758VqyHhAhzX6vm...,False,http://bit.ly/2nXRRfX,https://i.scdn.co/image/ab67616d00001e02480094...
2,Yves Montand,Les feuilles mortes,4yKTDPH6iRBHmA44AipmIk,https://open.spotify.com/track/4yKTDPH6iRBHmA4...,False,http://bit.ly/2nXRRfX,https://i.scdn.co/image/ab67616d00001e021ea3cc...
3,Nino Ferrer,Si tu m'aimes encore,1EGjgdJadGF3q0aJ8RAHeV,https://open.spotify.com/track/1EGjgdJadGF3q0a...,False,http://bit.ly/2nXRRfX,https://i.scdn.co/image/ab67616d00001e02b5d839...
4,Julien Clerc,Ce n'est rien,0klbnFP6UwMYpUuKjhleKT,https://open.spotify.com/track/0klbnFP6UwMYpUu...,False,https://p.scdn.co/mp3-preview/a9dff6a2669a2153...,https://i.scdn.co/image/ab67616d00001e02f7dcbd...


### Check the json print out

In [26]:
print(json.dumps(json.loads(df_predict_tracks.to_json(orient='index')), indent=2))

{
  "1": {
    "artist_name": "Georges Moustaki",
    "song_name": "Le facteur",
    "song_id": "758VqyHhAhzX6vmQ8h0exw",
    "url": "https://open.spotify.com/track/758VqyHhAhzX6vmQ8h0exw",
    "explicit": false,
    "preview": "http://bit.ly/2nXRRfX",
    "image": "https://i.scdn.co/image/ab67616d00001e02480094ab52afdba5b0a93a65"
  },
  "2": {
    "artist_name": "Yves Montand",
    "song_name": "Les feuilles mortes",
    "song_id": "4yKTDPH6iRBHmA44AipmIk",
    "url": "https://open.spotify.com/track/4yKTDPH6iRBHmA44AipmIk",
    "explicit": false,
    "preview": "http://bit.ly/2nXRRfX",
    "image": "https://i.scdn.co/image/ab67616d00001e021ea3cc67be2692a612e2bbf2"
  },
  "3": {
    "artist_name": "Nino Ferrer",
    "song_name": "Si tu m'aimes encore",
    "song_id": "1EGjgdJadGF3q0aJ8RAHeV",
    "url": "https://open.spotify.com/track/1EGjgdJadGF3q0aJ8RAHeV",
    "explicit": false,
    "preview": "http://bit.ly/2nXRRfX",
    "image": "https://i.scdn.co/image/ab67616d00001e02b5d8391ce7d