In [1]:
import requests
import json
from pprint import pprint
import pandas as pd
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity


#import spotify keys
from config import client_id
from config import client_secret

# Creating Access Token for Spotify API

In [2]:
#generate access token for spotify api
#from: https://stmorse.github.io/journal/spotify-api.html

AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': client_id,
    'client_secret': client_secret,
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

In [3]:
#from: https://stmorse.github.io/journal/spotify-api.html

#base URL for Spotify API search
search_url = 'https://api.spotify.com/v1/search'

#In order to access, we send a properly formed GET request to the API server, with our access_token in the header. Let’s save this header info now, in the following very specific format:
headers = {
    'Authorization': 'Bearer {token}'.format(token=access_token)
}

# Requesting Input from User

In [4]:
# need to add instructions or code to remove punctuations for search to work
search_track = input("Please name a song: ")

Please name a song:  9 to 5


In [5]:
search_artist = input("Please name that song's artist: ")

Please name that song's artist:  dolly parton


In [6]:
# region_preference = input("Please select a region: ")

# Searching for Matching Track Using Spotify API

In [7]:
response = requests.get(search_url + '?q=track%3A' + search_track + '%20artist%3A' + search_artist + '&type=track%2Cartist&limit=1', headers=headers).json()

In [8]:
pprint(response)

{'artists': {'href': 'https://api.spotify.com/v1/search?query=track%3A9+to+5+artist%3Adolly+parton&type=artist&offset=0&limit=1',
             'items': [],
             'limit': 1,
             'next': None,
             'offset': 0,
             'previous': None,
             'total': 0},
 'tracks': {'href': 'https://api.spotify.com/v1/search?query=track%3A9+to+5+artist%3Adolly+parton&type=track&offset=0&limit=1',
            'items': [{'album': {'album_type': 'album',
                                 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/32vWCbZh0xZ4o9gkz4PsEU'},
                                              'href': 'https://api.spotify.com/v1/artists/32vWCbZh0xZ4o9gkz4PsEU',
                                              'id': '32vWCbZh0xZ4o9gkz4PsEU',
                                              'name': 'Dolly Parton',
                                              'type': 'artist',
                                              'uri': 'spotify:ar

In [9]:
pprint(response['tracks']['items'][0]["id"])

'4w3tQBXhn5345eUXDGBWZG'


In [10]:
track_id = response['tracks']['items'][0]["id"]
artist_names = response['tracks']['items'][0]["artists"][0]['name']
track_name = response['tracks']['items'][0]["name"]

# Pulling Track Features from Spotify API

In [11]:
#base URL for Spotify API
base_url = 'https://api.spotify.com/v1/'

track_response = requests.get(base_url + 'audio-features/' + track_id, headers=headers).json()

In [12]:
#empty lists to store song data
danceability = []
energy = []
key = []
loudness = []
mode = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
id_num = []
duration_mins = []
time_signature = []

#fill in track info for each audio feature / empty list
danceability.append(track_response["danceability"])
energy.append(track_response["energy"])
key.append(track_response["key"])
loudness.append(track_response["loudness"])
mode.append(track_response["mode"])
speechiness.append(track_response["speechiness"])
acousticness.append(track_response["acousticness"])
instrumentalness.append(track_response["instrumentalness"])
liveness.append(track_response["liveness"])
valence.append(track_response["valence"])
tempo.append(track_response["tempo"])
id_num.append(track_response["id"])
#duration recorded in api in milliseconds, converting to minutes here
duration_mins.append(round((track_response["duration_ms"] / 60000),2))
time_signature.append(track_response["time_signature"])    

#create a dictionary to hold data gathered from the api
track_dict = {
    "track_id": id_num,
    "artist_names": artist_names,
    "track_name": track_name,
    "danceability": danceability,
    "energy": energy,
    "key": key,
    "loudness": loudness,
    "mode": mode,
    "speechiness": speechiness,
    "acousticness": acousticness,
    "instrumentalness": instrumentalness,
    "liveness": liveness,
    "valence": valence,
    "tempo": tempo,
    "duration_mins": duration_mins,
    "time_signature": time_signature,
    
}
#convert dictionary to a dataframe
input_track_df = pd.DataFrame(track_dict)
input_track_df.head()

Unnamed: 0,track_id,artist_names,track_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins,time_signature
0,4w3tQBXhn5345eUXDGBWZG,Dolly Parton,9 to 5,0.554,0.783,6,-5.852,1,0.0457,0.416,2e-06,0.631,0.813,105.39,2.71,4


# Reading in CSVs of Regional and Global Top 200 Charts

In [13]:
#import CSVs created in "pulling_chart_track_features.ipynb" and save as individual dfs
features_csv_list = {"global":"global_df.csv", "vietnam":"vietnam_df.csv","venezuela":"venezuela_df.csv",
"uruguay":"uruguay_df.csv","united_kingdom":"united_kingdom_df.csv","ukraine":"ukraine_df.csv",
"uae":"uae_df.csv","turkey":"turkey_df.csv","thailand":"thailand_df.csv",
"taiwan":"taiwan_df.csv","switzerland":"switzerland_df.csv","sweden":"sweden_df.csv",
"spain":"spain_df.csv","south_korea":"south_korea_df.csv","south_africa":"south_africa_df.csv",
"slovakia":"slovakia_df.csv","singapore":"singapore_df.csv","saudi_arabia":"saudi_arabia_df.csv",
"romania":"romania_df.csv","portugal":"portugal_df.csv","poland":"poland_df.csv",
"philippines":"philippines_df.csv","peru":"peru_df.csv","paraguay":"paraguay_df.csv",
"panama":"panama_df.csv","pakistan":"pakistan_df.csv","norway":"norway_df.csv",
"nigeria":"nigeria_df.csv","nicaragua":"nicaragua_df.csv","new_zealand":"new_zealand_df.csv",
"netherlands":"netherlands_df.csv","mexico":"mexico_df.csv",
"malaysia":"malaysia_df.csv","luxembourg":"luxembourg_df.csv","lithuania":"lithuania_df.csv",
"latvia":"latvia_df.csv","kazakhstan":"kazakhstan_df.csv","japan":"japan_df.csv",
"italy":"italy_df.csv","israel":"israel_df.csv","ireland":"ireland_df.csv",
"indonesia":"indonesia_df.csv",
"hungary":"hungary_df.csv","hong_kong":"hong_kong_df.csv","honduras":"honduras_df.csv",
"guatemala":"guatemala_df.csv","greece":"greece_df.csv","germany":"germany_df.csv",
"france":"france_df.csv","finland":"finland_df.csv","estonia":"estonia_df.csv",
"el_salvador":"el_salvador_df.csv","egypt":"egypt_df.csv","ecuador":"ecuador_df.csv",
"dominican_republic":"dominican_republic_df.csv","denmark":"denmark_df.csv","czech_republic":"czech_republic_df.csv",
"cyprus":"cyprus_df.csv","costa_rica":"costa_rica_df.csv","colombia":"colombia_df.csv",
"chile":"chile_df.csv","canada":"canada_df.csv","bulgaria":"bulgaria_df.csv",
"brazil":"brazil_df.csv","bolivia":"bolivia_df.csv","belgium":"belgium_df.csv",
"belarus":"belarus_df.csv","austria":"austria_df.csv","australia":"australia_df.csv",
"argentina":"argentina_df.csv"}
# currently not working w/ api calls: "morocco":"morocco_df.csv","india":"india_df.csv","iceland":"iceland_df.csv"

d = {}

for key in features_csv_list:
    d[key] = pd.read_csv(f'resources/top_chart_features/{features_csv_list[key]}') 


In [14]:
d['global']

Unnamed: 0,track_id,artist_names,track_name,region,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins,time_signature
0,3nqQXoyQOWXiESFLlDF1hG,"Sam Smith, Kim Petras",Unholy (feat. Kim Petras),Global,0.714,0.472,2,-7.375,1,0.0864,0.01300,0.000005,0.2660,0.238,131.121,2.62,4
1,4uUG5RXrOk84mYEfFvj3cK,"David Guetta, Bebe Rexha",I'm Good (Blue),Global,0.561,0.965,7,-3.673,0,0.0343,0.00383,0.000007,0.3710,0.304,128.040,2.92,4
2,4Dvkj6JhhA12EX05fT7y2e,Harry Styles,As It Was,Global,0.520,0.731,6,-5.338,0,0.0557,0.34200,0.001010,0.3110,0.662,173.930,2.79,4
3,2tTmW7RDtMQtBk7m2rYeSw,"Bizarrap, Quevedo","Quevedo: Bzrp Music Sessions, Vol. 52",Global,0.621,0.782,2,-5.548,1,0.0440,0.01250,0.033000,0.2300,0.550,128.033,3.32,4
4,5ww2BF9slyYgNOk37BlC4u,Manuel Turizo,La Bachata,Global,0.835,0.679,7,-5.329,0,0.0364,0.58300,0.000002,0.2180,0.850,124.980,2.71,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,4woTEX1wYOTGDqNXuavlRC,"Eminem, Dina Rae",Superman,Global,0.802,0.755,4,-3.138,0,0.0564,0.02150,0.000000,0.2950,0.580,130.027,5.84,4
196,003vvx7Niy0yvhvHt4a68B,The Killers,Mr. Brightside,Global,0.352,0.911,1,-5.230,1,0.0747,0.00121,0.000000,0.0995,0.236,148.033,3.72,4
197,2K7xn816oNHJZ0aVqdQsha,The Neighbourhood,Softcore,Global,0.575,0.568,9,-5.509,0,0.0300,0.04840,0.000417,0.2860,0.370,93.986,3.44,4
198,4gzsuuZypVbxs0Af1LSZyB,"Tainy, Bad Bunny, Julieta Venegas",Lo Siento BB:/ (with Bad Bunny & Julieta Venegas),Global,0.639,0.703,4,-6.330,0,0.0794,0.08890,0.000002,0.0865,0.138,169.888,3.46,4


In [15]:
regions_list = d.keys()
regions_list

dict_keys(['global', 'vietnam', 'venezuela', 'uruguay', 'united_kingdom', 'ukraine', 'uae', 'turkey', 'thailand', 'taiwan', 'switzerland', 'sweden', 'spain', 'south_korea', 'south_africa', 'slovakia', 'singapore', 'saudi_arabia', 'romania', 'portugal', 'poland', 'philippines', 'peru', 'paraguay', 'panama', 'pakistan', 'norway', 'nigeria', 'nicaragua', 'new_zealand', 'netherlands', 'mexico', 'malaysia', 'luxembourg', 'lithuania', 'latvia', 'kazakhstan', 'japan', 'italy', 'israel', 'ireland', 'indonesia', 'hungary', 'hong_kong', 'honduras', 'guatemala', 'greece', 'germany', 'france', 'finland', 'estonia', 'el_salvador', 'egypt', 'ecuador', 'dominican_republic', 'denmark', 'czech_republic', 'cyprus', 'costa_rica', 'colombia', 'chile', 'canada', 'bulgaria', 'brazil', 'bolivia', 'belgium', 'belarus', 'austria', 'australia', 'argentina'])

In [16]:
len(regions_list)

70

# Create Function to Recommend 5 Tracks from Selected Top 200 Chart

In [17]:
def recommendSongs(user_track_df, selected_chart_df):
    # create combined df of the track features from the input track and all of the tracks from the selected regional chart
    # row 0 will be the input song
    combined_df = pd.concat([user_track_df,selected_chart_df.loc[:]]).reset_index(drop=True)    
    
    # scale data (only features columns), create df of scaled data
    scaler = StandardScaler()
    chart_scaled = scaler.fit_transform(combined_df.iloc[:, 3:16])
    chart_scaled_df = pd.DataFrame(chart_scaled, columns=combined_df.iloc[:, 3:16].columns)

    # create array for input track data
    array1 = chart_scaled_df.iloc[0,:].to_numpy().reshape(1, -1)
    # create array for selected chart
    array2 = chart_scaled_df.iloc[1:,:].to_numpy()

    # run cosine similarity
    cosine_sim = cosine_similarity(array1, array2)
    
    # create list that is ranked by score, descending order
    sim_scores = list(enumerate(cosine_sim[-1,:]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # create empty list to store recommendations
    ranked_tracks = []

    for i in range(0, 5):
        indx = sim_scores[i][0]
        ranked_tracks.append([combined_df['track_id'].iloc[indx], combined_df['artist_names'].iloc[indx], combined_df['track_name'].iloc[indx], np.round(sim_scores[i][1],decimals=2)])
    
    return ranked_tracks

In [18]:
recommendSongs(input_track_df,d['global'])

[['3fjN3y5x4hN53rykAN2LHQ', 'Rels B', 'cómo dormiste?', 0.74],
 ['6Uj1ctrBOjOas8xZXGqKk4', 'Doja Cat', 'Woman', 0.68],
 ['1797zYiX4cKosMH836X9Gt', 'Bad Bunny, Tony Dize', 'La Corriente', 0.68],
 ['4tYFy8ALRjIZvnvSLw5lxN', 'Bad Bunny, Rauw Alejandro', 'Party', 0.67],
 ['5QM0SsyzmBM0pbFY52Xj1X', 'Sam Smith', "I'm Not The Only One", 0.63]]