### Notebook 3

In [35]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
from dotenv import load_dotenv
load_dotenv()
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import NearestNeighbors
from sklearn import preprocessing
import xgboost

scope = "user-library-read"
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

In [59]:
def song_to_df (sp, key):
    cat_cols = ['key', 'mode', 'time_signature']
    num_cols = ['danceability','energy','loudness','speechiness','acousticness',
                'instrumentalness','liveness','valence','tempo','duration_ms']
    
    row = pd.DataFrame(sp.audio_features(key)).drop(['type','uri',
                                               'track_href','analysis_url'], axis=1).set_index('id')
    return row

def make_genre_prediction(sp,key, ohe, model):
    cat_cols = ['key', 'mode', 'time_signature']
    num_cols = ['danceability','energy','loudness','speechiness','acousticness',
                'instrumentalness','liveness','valence','tempo','duration_ms']
    row = song_to_df(sp,key)
    temp_ohe = ohe.transform(row[cat_cols])
    returning_obj = row[num_cols].reset_index().join(pd.DataFrame(temp_ohe)).set_index('id')
    return model.predict(returning_obj)

def song_artist_from_key(sp,key):
    theTrack = sp.track(key)
    song_title = theTrack['name']
    artist_title = theTrack['artists'][0]['name']
    return (song_title, artist_title)

def song_id_from_query(sp, query):
    q = query
    if(sp.search(q, limit=1, offset=0, type='track')['tracks']['total']>0):
        return sp.search( q, limit=1, offset=0, type='track')['tracks']['items'][0]['id']
    else:
        return None

In [37]:
infile = open('pickled_files/all_songs_genre_predicted.pickle','rb')
all_files = pickle.load(infile)
infile.close()
# df.set_index('id', inplace = True)
all_songs = all_files[0]
best_model = all_files[1]
ohe_make_genre_pred = all_files[2]

In [38]:
categorical_columns = list(all_songs.select_dtypes('object').columns)
numerical_columns = list(all_songs.select_dtypes(exclude = 'object').columns)

In [39]:
all_songs.head()

Unnamed: 0_level_0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,predicted_genre
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2JCHrnxgfagUZQeJRAx7Hc,0.671,0.522,0,-8.144,0,0.0779,0.542,0.000278,0.335,0.134,79.975,262533.0,4,hip-hop
2G7woUgHMt6QBU7esQ5rsL,0.564,0.932,11,-10.068,1,0.0554,0.013,0.169,0.349,0.619,112.937,146250.0,4,rock
4eqrNsKy9Mvg1fvfsTVIAE,0.791,0.583,1,-5.247,1,0.369,0.0393,0.0,0.0577,0.694,149.693,169147.0,4,hip-hop
5PU9EPOh36Cw0f5VGcAacw,0.458,0.819,0,-5.957,0,0.263,0.174,0.0,0.407,0.712,83.326,281053.0,4,hip-hop
66ace8nbTVjgHTed107cDn,0.431,0.947,11,-5.067,1,0.0649,2.7e-05,0.794,0.349,0.422,96.243,215627.0,4,alt-metal


In [85]:
neigh = NearestNeighbors(n_neighbors=10, radius=0.4, metric='cosine')

In [76]:
X_knn = all_songs

In [77]:
MMScaler = preprocessing.MinMaxScaler()
MinMaxScaler = preprocessing.MinMaxScaler()
X_knn[numerical_columns] = MinMaxScaler.fit_transform(X_knn[numerical_columns])


In [78]:
ohe_knn = OneHotEncoder(drop='first', sparse=False)
X_knn_ohe = ohe_knn.fit_transform(X_knn[categorical_columns])
X_knn_transformed = X_knn[numerical_columns].reset_index().join(pd.DataFrame(X_knn_ohe, columns = ohe_knn.get_feature_names(categorical_columns))).set_index('id')


In [79]:
neigh.fit(X_knn_transformed)

NearestNeighbors(metric='cosine', n_neighbors=10, radius=0.25)

In [80]:
categorical_columns

['key', 'mode', 'time_signature', 'predicted_genre']

In [81]:
def knn_preprocessing(sp, key, num_col = numerical_columns, 
                      cat_col = categorical_columns,
                      mmScaler = MinMaxScaler, bm = best_model,
                      ohe_knn = ohe_knn, ohe_make_genre_pred = ohe_make_genre_pred):
    row = song_to_df(sp, key)
    genre = make_genre_prediction(sp,key, ohe_make_genre_pred, bm)
    row['predicted_genre'] = genre[0]
    row_dummied = ohe_knn.transform(row[cat_col])
    row[num_col] = mmScaler.transform(row[num_col])

    row = row[num_col].reset_index().join(pd.DataFrame(row_dummied, columns = ohe_knn.get_feature_names(cat_col))).set_index('id')
    return row

In [82]:
def make_song_recommendations(sp, kneighs, query):
    if(query.isspace() or not query):
        return "No results found"
    song_id = song_id_from_query(sp, query)
    if(song_id == None):
        return "No results found"
    song_plus_artist = song_artist_from_key(sp, song_id)
    song_to_rec = knn_preprocessing(sp, song_id)
    nbrs = neigh.kneighbors(
       song_to_rec, 15, return_distance=False
    )
    playlist = []
    for each in nbrs[0]:
        if ((song_artist_from_key(sp, X_knn_transformed.iloc[each].name) != song_plus_artist) and
           ((song_artist_from_key(sp, X_knn_transformed.iloc[each].name)) not in playlist)):
            playlist.append(song_artist_from_key(sp, X_knn_transformed.iloc[each].name))
    return (playlist)

In [84]:
make_song_recommendations(sp, neigh, "yellow submarine the beatles")

[('Dark Charade', 'Rishloo'),
 ('Thrown Away', 'Papa Roach'),
 ('Wasp / Behind the Wall of Sleep / Bassically / N.I.B. - 2014 Remaster',
  'Black Sabbath'),
 ('Pink Maggit - Squarepusher Remix', 'Deftones'),
 ('Mountain Cry', 'Blues Traveler'),
 ('Lateralus', 'Tool'),
 ('A New Nite / Rosecrans Groove', 'DJ Quik'),
 ('Universe', 'Alien Ant Farm'),
 ('Care Not - Demo', 'Drowning Pool'),
 ('Free Bird', 'Lynyrd Skynyrd'),
 ('Runaway', 'Kanye West'),
 ('Yacha - Peaking Lights Dub Mix', 'Sinkane'),
 ('Shout - Live At The Richfield Coliseum/1983',
  'Tom Petty and the Heartbreakers')]