In [14]:
import os 
import requests
import joblib
import base64 as b64
import pandas as pd
import json

In [15]:
#Create spotify class which contains all functions and attributes related to spotify API
class SpotifyAPI:
    
    authorize_url = 'https://accounts.spotify.com/api/token'
    
    def __init__(self, client_id, client_secret):
        self.client_id = client_id
        self.client_secret = client_secret
    
    def get_authorization_code(self):
        
        client_id = self.client_id
        client_secret = self.client_secret
        authorize_url = self.authorize_url
        client_creds = f'{client_id}:{client_secret}'
        
        client_creds_b64 = b64.b64encode(client_creds.encode())
        
        
        authorize_params = {
            'grant_type': 'client_credentials'
        }
        
        token_header = {
            'Authorization': f'Basic {client_creds_b64.decode()}' 
        }
        
        r = requests.post(authorize_url, data=authorize_params, headers=token_header)
        r_json = r.json()
        
        access_token = r_json['access_token']
        
        return access_token

In [16]:
#Create spotify client object with spotify app client id and secret key
spotify_client_id = os.environ['SPOTIFY_API_CLIENT_ID']
spotify_secret_key = os.environ['SPOTIFY_API_SECRET_KEY']

spotify_client = SpotifyAPI(spotify_client_id, spotify_secret_key)

In [17]:
#Get access token
access_token = spotify_client.get_authorization_code()

In [18]:
#Get pollen playlist spotify track ids
pollen_playlist_id = '37i9dQZF1DWWBHeXOYZf74'

pollen_playlist_tracks = []

playlist_header = {
    'Authorization': f'Bearer {access_token}'
}

for i in range(5):
    
    playlist_response = requests.get(f'https://api.spotify.com/v1/playlists/{pollen_playlist_id}/tracks?offset={i*100}', headers=playlist_header)
    playlist_response_json = playlist_response.json()
    
    for item in playlist_response_json['items']:
        
        track_id = item['track']['id']
        pollen_playlist_tracks.append(track_id)

In [19]:
#Load liked tracks ids
liked_track_ids = joblib.load('../Joblib_Objects/liked_track_ids')

#Filter playlist for songs that are already liked
filtered_pollen_playlist_tracks = []

for track in pollen_playlist_tracks:
    if track not in liked_track_ids:
        filtered_pollen_playlist_tracks.append(track)

In [20]:
#Get track information for all filtered pollen playlist tracks
pollen_playlist_df = pd.DataFrame(columns=['genre','danceability','energy','key','loudness','mode','speechiness','acousticness',
                                         'instrumentalness','liveness','valence','tempo'])

track_header = {
    'Authorization': f'Bearer {access_token}'
}

for track in filtered_pollen_playlist_tracks:
    try:
        audio_features_response = requests.get(f'https://api.spotify.com/v1/audio-features/{track}', headers=track_header)
        audio_features_response_json = audio_features_response.json()

        track_response = requests.get(f'https://api.spotify.com/v1/tracks/{track}',headers=track_header)
        track_response_json = track_response.json()
        
        song_name = track_response_json['name']
        artist_id = track_response_json['artists'][0]['id']
        
        artist_response = requests.get(f'https://api.spotify.com/v1/artists/{artist_id}',headers=track_header)
        artist_response_json = artist_response.json()
        genres = artist_response_json['genres']
        
        track_features = {
            'song_name':song_name,
            'track_id':track,
            'genre':genres,
            'danceability':audio_features_response_json['danceability'],
            'energy':audio_features_response_json['energy'],
            'key':audio_features_response_json['key'],
            'loudness':audio_features_response_json['loudness'],
            'mode':audio_features_response_json['mode'],
            'speechiness':audio_features_response_json['speechiness'],
            'acousticness':audio_features_response_json['acousticness'],
            'instrumentalness':audio_features_response_json['instrumentalness'],
            'liveness':audio_features_response_json['liveness'],
            'valence':audio_features_response_json['valence'],
            'tempo':audio_features_response_json['tempo']
        }

        pollen_playlist_df = pollen_playlist_df.append(track_features,ignore_index=True)
        
    except:
        
        print(track)

3hARuUtzTdUuWH1KiLJlSf
2kUQyPvQpScTvEi31bXG9h


In [21]:
#Format genres to only have 1 listed
def replace_empty_list(track):
    if len(track) == 0:
        track = ['Not Available']
    else:
        pass
    
    return track

pollen_playlist_df['genre'] = pollen_playlist_df['genre'].map(replace_empty_list)
pollen_playlist_df['genre'] = pollen_playlist_df['genre'].map(lambda x: x[0])

In [22]:
#load label encoder
genres_label_encoder = joblib.load('../Joblib_Objects/genres_label_encoder')

def label_encode(track):
    
    try:
        encoded_genre = genres_label_encoder.transform([track])[0]
        
        return int(encoded_genre)
    
    except:
        pass
    
pollen_playlist_df['genre'] = pollen_playlist_df['genre'].map(label_encode)

In [23]:
#Drop rows with nans
pollen_playlist_df = pollen_playlist_df.dropna()

#Make separate dataframe without track ids listed
prediction_dataframe = pollen_playlist_df.drop(columns=['track_id','song_name'])

In [24]:
#Load scaler
scaler = joblib.load('../Joblib_Objects/scaler')
X = scaler.transform(prediction_dataframe)



In [39]:
#Load model
xgb_model = joblib.load('../spotify_song_classifier.dat')

#Predict favourites and add to playlist dataframe
predictions = xgb_model.predict(X)
predictions_probabilities = xgb_model.predict_proba(X)
predictions_probabilities_0_class = [x[0] for x in predictions_probabilities]
pollen_playlist_df['favourite'] = predictions
pollen_playlist_df['prediction_probability'] = predictions_probabilities_0_class

def change_probability_1(probability):
    if probability < 0.5:
        new_probability = 1-probability
    else:
        new_probability = probability
    
    return new_probability

pollen_playlist_df['prediction_probability'] = pollen_playlist_df['prediction_probability'].map(change_probability_1)

In [47]:
#Filter for favourite songs
favourites_df = pollen_playlist_df.loc[pollen_playlist_df['favourite'] == 1]

#Filter for 80% prbaobility of being favourite
higher_probability = favourites_df.loc[favourites_df['prediction_probability'] > .8]

In [44]:
#Fill XGB Model Recommendations playlist with newly acquired tracks
authorize_url = 'https://accounts.spotify.com/authorize'

def get_authorization_code(client_id, client_secret):

    authorize_params = {
        'client_id':client_id,
        'response_type':'token',
        'redirect_uri':'https://developer.spotify.com/documentation/web-api/reference-beta/#endpoint-get-users-saved-tracks',
        'scope':'playlist-modify-public playlist-modify-private'
    }

    r = requests.get(authorize_url, params=authorize_params)

    print(r.url)

get_authorization_code(os.environ['SPOTIFY_API_CLIENT_ID'], os.environ['SPOTIFY_API_SECRET_KEY'])

https://accounts.spotify.com/login?continue=https%3A%2F%2Faccounts.spotify.com%2Fauthorize%3Fscope%3Dplaylist-modify-public%2Bplaylist-modify-private%26response_type%3Dtoken%26redirect_uri%3Dhttps%253A%252F%252Fdeveloper.spotify.com%252Fdocumentation%252Fweb-api%252Freference-beta%252F%2523endpoint-get-users-saved-tracks%26client_id%3D853fe527a1704e18845533fb15278ead


In [46]:
# temp_token taken from above url

playlist_header = {
    'Authorization': f'Bearer {temp_token}',
    'Content-Type': 'application/json'
}

uris = []

for track in higher_probability['track_id'].to_list():
    uri = f'spotify:track:{track}'
    uris.append(uri)

request_body = json.dumps({
          "uris" : uris
        })

r = requests.post(f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks', headers=playlist_header, data=request_body)
r.status_code

201