In [31]:
import pickle
import requests
import json
from pprint import pprint
import pandas as pd
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
#import spotify keys
from config import client_id
from config import client_secret

In [32]:
#example country to give to global feature function
url = "https://raw.githubusercontent.com/Flores-Kevin/rage-against-machine-learning/molly/resources/regional-usa-weekly-2022-10-06.csv"

In [33]:
def token_maker(client_id,client_secret):
    #generate access token for spotify api
    #from: https://stmorse.github.io/journal/spotify-api.html
    AUTH_URL = 'https://accounts.spotify.com/api/token'
    # POST
    auth_response = requests.post(AUTH_URL, {
        'grant_type': 'client_credentials',
        'client_id': client_id,
        'client_secret': client_secret,
    })
    # convert the response to JSON
    auth_response_data = auth_response.json()
    # save the access token
    access_token = auth_response_data['access_token']
    return access_token

In [42]:
def global_features(country,access_token):#might need to add token as argument and or find a way to import key to function
    #Read in the selected countries CSV
    df = pd.read_csv(country)
    #extracting track id (removing "spotify:track:" from track id in csv)
    track_id_series = df.uri.str[14:]
    #convert to df and rename column
    track_id_df = track_id_series.to_frame().rename(columns={"uri":"track_id"})
    #merge dfs
    df = df.merge(track_id_df, how="inner", left_index=True, right_index=True)
    df = df[["track_id", "artist_names", "track_name"]]
    df = df.astype({'track_id':'string','artist_names':'string','track_name':'string'})
    #empty lists to store song data
    global_danceability = []
    global_energy = []
    global_key = []
    global_loudness = []
    global_mode = []
    global_speechiness = []
    global_acousticness = []
    global_instrumentalness = []
    global_liveness = []
    global_valence = []
    global_tempo = []
    global_id_num = []
    global_duration_mins = []
    global_time_signature = []
    #base URL for Spotify API
    base_url = 'https://api.spotify.com/v1/'
    #In order to access, we send a properly formed GET request to the API server, with our access_token in the header. Let’s save this header info now, in the following very specific format:
    headers = {
        'Authorization': 'Bearer {token}'.format(token=access_token)
    }
    #loop through dataframe, use track id to look up songs in spotify api and grab each one's audio features
    for t in df["track_id"]:
        global_response = requests.get(base_url + 'audio-features/' + t, headers=headers).json()
        #fill in track info for each audio feature / empty list
        global_danceability.append(global_response["danceability"])
        global_energy.append(global_response["energy"])
        global_key.append(global_response["key"])
        global_loudness.append(global_response["loudness"])
        global_mode.append(global_response["mode"])
        global_speechiness.append(global_response["speechiness"])
        global_acousticness.append(global_response["acousticness"])
        global_instrumentalness.append(global_response["instrumentalness"])
        global_liveness.append(global_response["liveness"])
        global_valence.append(global_response["valence"])
        global_tempo.append(global_response["tempo"])
        global_id_num.append(global_response["id"])
        #duration recorded in api in milliseconds, converting to minutes here
        global_duration_mins.append(round((global_response["duration_ms"] / 60000),2))
        global_time_signature.append(global_response["time_signature"])   
    #create a dictionary to hold data gathered from the api
    global_songs_dict = {
        "track_id": global_id_num,
        "danceability": global_danceability,
        "energy": global_energy,
        "key": global_key,
        "loudness": global_loudness,
        "mode": global_mode,
        "speechiness": global_speechiness,
        "acousticness": global_acousticness,
        "instrumentalness": global_instrumentalness,
        "liveness": global_liveness,
        "valence": global_valence,
        "tempo": global_tempo,
        "duration_mins": global_duration_mins,
        "time_signature": global_time_signature,
    }
    #convert dictionary to a dataframe
    global_info_df = pd.DataFrame(global_songs_dict)
    #merge features df with original df
    global_df = df.merge(global_info_df, how="inner")
    # return track_id_df
    return global_df

In [35]:
#Example song and artist because inputs will be given through flask/index.html
song = "Yesterday"
artist = "The Beatles"

In [36]:
def single_feature(song,artist,access_token):
    #base URL for Spotify API search
    search_url = 'https://api.spotify.com/v1/search'
    #In order to access, we send a properly formed GET request to the API server, with our access_token in the header. Let’s save this header info now, in the following very specific format:
    headers = {
        'Authorization': 'Bearer {token}'.format(token=access_token)
    }
    response = requests.get(search_url + '?q=track%3A' + song + '%20artist%3A' + artist + '&type=track%2Cartist&limit=1', headers=headers).json()
    #get id,name,and artist from response
    track_id = response['tracks']['items'][0]["id"]
    artist_names = response['tracks']['items'][0]["artists"][0]['name']
    track_name = response['tracks']['items'][0]["name"]
    #base URL for Spotify API
    base_url = 'https://api.spotify.com/v1/'
    track_response = requests.get(base_url + 'audio-features/' + track_id, headers=headers).json()
    #empty lists to store song data
    danceability = []
    energy = []
    key = []
    loudness = []
    mode = []
    speechiness = []
    acousticness = []
    instrumentalness = []
    liveness = []
    valence = []
    tempo = []
    id_num = []
    duration_mins = []
    time_signature = []
    #fill in track info for each audio feature / empty list
    danceability.append(track_response["danceability"])
    energy.append(track_response["energy"])
    key.append(track_response["key"])
    loudness.append(track_response["loudness"])
    mode.append(track_response["mode"])
    speechiness.append(track_response["speechiness"])
    acousticness.append(track_response["acousticness"])
    instrumentalness.append(track_response["instrumentalness"])
    liveness.append(track_response["liveness"])
    valence.append(track_response["valence"])
    tempo.append(track_response["tempo"])
    id_num.append(track_response["id"])
    #duration recorded in api in milliseconds, converting to minutes here
    duration_mins.append(round((track_response["duration_ms"] / 60000),2))
    time_signature.append(track_response["time_signature"])
    #create a dictionary to hold data gathered from the api
    track_dict = {
        "track_id": id_num,
        "artist_names": artist_names,
        "track_name": track_name,
        "danceability": danceability,
        "energy": energy,
        "key": key,
        "loudness": loudness,
        "mode": mode,
        "speechiness": speechiness,
        "acousticness": acousticness,
        "instrumentalness": instrumentalness,
        "liveness": liveness,
        "valence": valence,
        "tempo": tempo,
        "duration_mins": duration_mins,
        "time_signature": time_signature,
    }
    #convert dictionary to a dataframe
    input_track_df = pd.DataFrame(track_dict)
    return input_track_df

In [37]:
single_feature(song,artist,token_maker(client_id,client_secret))

Unnamed: 0,track_id,artist_names,track_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins,time_signature
0,3BQHpFgAp4l80e1XslIjNI,The Beatles,Yesterday - Remastered 2009,0.332,0.179,5,-11.83,1,0.0326,0.879,0,0.0886,0.315,96.529,2.09,4


In [40]:
input1 = (single_feature(song,artist,token_maker(client_id,client_secret)))

In [22]:
global_features(url,token_maker(client_id,client_secret))

TypeError: global_features() missing 1 required positional argument: 'access_token'

In [41]:
input2 = global_features(url,token_maker(client_id,client_secret))

In [38]:
def model(input_track_df,global_df):
    #creating df with input track features as row 0, and rest of chart tracks below
    chart_with_input_track = pd.concat([input_track_df,global_df.loc[:]]).reset_index(drop=True)
    #import scaler for data
    scaler = StandardScaler()
    chart_scaled = scaler.fit_transform(chart_with_input_track.iloc[:, 3:16])
    chart_scaled_df = pd.DataFrame(chart_scaled, columns=chart_with_input_track.iloc[:, 3:16].columns)
    array1 = chart_scaled_df.iloc[0,:].to_numpy().reshape(1, -1)
    array2 = chart_scaled_df.iloc[1:,:].to_numpy()
    cosine_sim = cosine_similarity(array1, array2)
    sim_scores = list(enumerate(cosine_sim[-1,:]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    ranked_tracks = []
    for i in range(0, 5):
        indx = sim_scores[i][0]
        ranked_tracks.append([chart_with_input_track['track_id'].iloc[indx], chart_with_input_track['artist_names'].iloc[indx], chart_with_input_track['track_name'].iloc[indx]])
    return ranked_tracks

In [43]:
model(input1,input2)

[['0gucTLf7trAf37Ua1uAyAu', 'Nirvana', 'Smells Like Teen Spirit'],
 ['7rbECVPkY5UODxoOUVKZnA', 'Kanye West', 'I Wonder'],
 ['53tfEupEzQRtVFOeZvk7xq', 'ROSALÍA', 'DESPECHÁ'],
 ['21jGcNKet2qwijlDFuPiPb', 'Post Malone', 'Circles'],
 ['59CfNbkERJ3NoTXDvoURjj', 'Dove Cameron', 'Boyfriend']]