In [3]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict
from dotenv import load_dotenv
import os

load_dotenv()

client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")


sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))

In [4]:
import pandas as pd
pd.set_option('display.max_columns', None)

### Function for finding song using Spotify API

In [5]:
def find_song(name, year):
    song_data = defaultdict()
    results = sp.search(q= 'track: {} year: {}'.format(name,year), limit=1)
    if results['tracks']['items'] == []:
        return None

    results = results['tracks']['items'][0]
    track_id = results['id']
    audio_features = sp.audio_features(track_id)[0]

    song_data['name'] = [name]
    song_data['year'] = [year]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['duration_ms'] = [results['duration_ms']]
    song_data['popularity'] = [results['popularity']]

    for key, value in audio_features.items():
        song_data[key] = value

    return pd.DataFrame(song_data)

In [34]:
find_song("Baby", "2010")

Unnamed: 0,name,year,explicit,duration_ms,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,time_signature
0,Baby,2010,0,214240,84,0.728,0.859,5,-5.237,0,0.137,0.0401,0,0.111,0.535,65.043,audio_features,6epn3r7S14KUqlReYr77hA,spotify:track:6epn3r7S14KUqlReYr77hA,https://api.spotify.com/v1/tracks/6epn3r7S14KU...,https://api.spotify.com/v1/audio-analysis/6epn...,4


### Read in Data

In [7]:
data = pd.read_csv("data.csv")

In [44]:
data.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,speechiness,tempo
0,0.0594,1921,0.982,"['Sergei Rachmaninoff', 'James Levine', 'Berli...",0.279,831667,0.211,0,4BJqT0PrAfrxzMOxytFOIz,0.878,10,0.665,-20.096,1,"Piano Concerto No. 3 in D Minor, Op. 30: III. ...",4,0.0366,80.954
1,0.963,1921,0.732,['Dennis Day'],0.819,180533,0.341,0,7xPhfUan2yNtyFG0cUWkt8,0.0,7,0.16,-12.441,1,Clancy Lowered the Boom,5,0.415,60.936
2,0.0394,1921,0.961,['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...,0.328,500062,0.166,0,1o6I8BglA6ylDMrIELygv1,0.913,3,0.101,-14.85,1,Gati Bali,5,0.0339,110.339
3,0.165,1921,0.967,['Frank Parker'],0.275,210000,0.309,0,3ftBPsC5vPBKxYSee08FDH,2.8e-05,5,0.381,-9.316,1,Danny Boy,3,0.0354,100.109
4,0.253,1921,0.957,['Phil Regan'],0.418,166693,0.193,0,4d6HGyGT8e121BsdKmw9v6,2e-06,3,0.229,-10.096,1,When Irish Eyes Are Smiling,2,0.038,101.665


In [43]:
data = data.drop("release_date", axis = 1)

In [10]:
genre_data = pd.read_csv("data_by_genres.csv")

### Create Recommendation Function

In [56]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import cdist

In [37]:
scaler = StandardScaler()

In [49]:
attributes = ["valence", "year", "acousticness", "danceability", "duration_ms", "energy", "explicit", "instrumentalness",
             "key", "liveness", "loudness", "mode", "popularity", "speechiness", "tempo"]

In [50]:
scaler.fit(data[attributes])

In [115]:
scaler.transform(data[attributes])

array([[-1.7828247 , -2.15247016,  1.27618658, ..., -1.25680847,
        -0.37970638, -1.16930675],
       [ 1.65068832, -2.15247016,  0.61134711, ..., -1.21099271,
         1.94548067, -1.82117959],
       [-1.858821  , -2.15247016,  1.22034007, ..., -1.21099271,
        -0.3962973 , -0.21240379],
       ...,
       [ 0.41194856,  1.66730194, -1.06670771, ...,  2.04192615,
        -0.10749235, -0.81976118],
       [-1.26756976,  1.66730194, -1.30876246, ...,  1.7670316 ,
         1.28798856, -1.36140375],
       [ 0.43094764,  1.66730194, -0.98426761, ...,  1.95029463,
         0.05903135, -0.71220119]])

In [104]:
index = list(np.argsort(distances)[:, :20][0])

In [122]:
def song_recommendation(song_name, song_year, data, number_of_songs = 5):
    
    song_data = find_song(song_name, song_year)
    
    scaled_data = scaler.transform(data[attributes])
    scaled_song = scaler.transform(song_data[attributes])
    dist = cdist(scaled_song, scaled_data, 'euclidean')
    
    index = list(np.argsort(dist)[:, :number_of_songs][0])
    rec = data.iloc[index]
    rec = rec[rec['name'] != song_name]
    
    output = pd.DataFrame()
    output['name'] = rec['name']
    output['artist'] = rec['artists']
    output['year'] = rec['year']
    
    return output

In [121]:
song_recommendation("God's Plan", "2016", data, 5)

Unnamed: 0,name,artist,year
139964,Planet God Damn (feat. Njomza),"['Mac Miller', 'Njomza']",2016
91581,Wya?,['Wifisfuneral'],2017
18869,Jungle,['A Boogie Wit da Hoodie'],2016
74927,Nike Ticks,['YNG Martyr'],2019
18455,Only,"['Nicki Minaj', 'Drake', 'Lil Wayne', 'Chris B...",2014
