**Libraries**

In [None]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.oauth2 as oauth2
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns 
%config InlineBackend.figure_format ='retina'
import random
from functools import reduce
from sklearn.ensemble import RandomForestRegressor
from datetime import date
import time

**Spotify API Call**

In [None]:
#used for inserting your username and credentials that you can obtain from spotify developer
client_id= 'ebbb035aedf7428cbdbbd6dd31df89a8'
client_secret= '394032044ced4d8e9df0f9d435f88794'

redirect_uri='http://localhost:8910/callback'
username='224qqnsvbhlau4fhtpl4wpogi'

scope ='user-top-read'


In [None]:
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, 
                                                      client_secret=client_secret)
                                                      
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

token = util.prompt_for_user_token(username,scope, client_id, client_secret, redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)

**Extract Top Tracks**

In [None]:
results = sp.current_user_top_tracks(limit=50, offset=0,time_range='long_term')

# Convert the results into a dataframe

track_name=[]
track_id=[]
artist=[]
new=[]
album=[]
duration=[]
popularity=[]

for i, items in enumerate(results['items']):
        track_name.append(items['name'])
        track_id.append(items['id'])
        artist.append(items["artists"][0]["name"])
        duration.append(items["duration_ms"])
        album.append(items["album"]["name"])
        popularity.append(items["popularity"])

# Create the final df   
df_top_tracks = pd.DataFrame({ "track_name": track_name, 
                             "album": album, 
                             "track_id": track_id,
                             "artist": artist, 
                             "duration": duration, 
                             "popularity": popularity})

df_top_tracks

**Feature Extraction**

In [None]:
def get_features(sp,df):
    playlist = df[['track_id','track_name']]
    features = []
    
    features += sp.audio_features(playlist.iloc[0:50, 0])

    feature_list = []
    for feature in features:
        feature_list.append([feature['danceability'],
                        feature['energy'],
                        feature['key'],
                        feature['loudness'],
                        feature['mode'],
                        feature['speechiness'],
                        feature['acousticness'],
                        feature['instrumentalness'],
                        feature['liveness'],
                        feature['valence'],
                        feature['tempo'],
                        feature['duration_ms']]
                       )

    df_audio_feature = pd.DataFrame(feature_list, columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 
                                                          'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
                                                          'duration_ms'])
    df_playlist_audio_features = pd.concat([playlist, df_audio_feature], axis=1)
    return df_playlist_audio_features

In [None]:
df_final_features = get_features(sp,df_top_tracks)
#Drop the mode feature as it is binary
del df_final_features['mode']
df_final_features

We can take a look at the frequency of the values present in each feature.

In [None]:
df_final_features.hist(figsize=(35,25)) 
plt.tight_layout()
plt.show()

I will now take all my built up playlist to then create one complilation of the best songs using the features provided. 

** User PLaylist Extraction **

In [None]:
def user_playlist(sp,username):
    playlist_id = []
    playlist_name = []
    num_tracks = []
    
    featured = sp.user_playlists(username)
    for i, items in enumerate(featured['items']):
        playlist_id.append(items['id'])
        playlist_name.append(items['name'])
        num_tracks.append(items['tracks']['total'])
    
    df_your_lib = pd.DataFrame({'playlist_id':playlist_id, 'playlist_name':playlist_name, '#tracks': num_tracks})
    return df_your_lib

In [None]:
df_user_playlists = user_playlist(sp,username)
searchfor = ['Custom', 'Discover']
df_user_playlists =  df_user_playlists[~df_user_playlists['playlist_name'].str.contains('|'.join(searchfor))]
df_user_playlists

I dont have many custom playlists, so i will manually add playlists that I enjoy as well as remove any playlist that doesnt have more than 10 songs to ensure we have sufficient data. 

In [None]:
#df_user_playlists= df_user_playlists.drop(df_user_playlists.index[1])

listOfSeries = [pd.Series(['1QnKkNOpY8jPDIUT4oulDH', 'Nida recomendations', 60], index=df_user_playlists.columns ) ,
                pd.Series(['2XH7v4B8Tf9D4UYJPxtJlX', 'throwback', 279], index=df_user_playlists.columns ) ,
                pd.Series(['4JbURQJ0cA9Go65G0JmBq8', 'train', 20], index=df_user_playlists.columns ) ]


df_user_playlists= df_user_playlists.append(listOfSeries , ignore_index=True)

df_user_playlists.drop( df_user_playlists[ df_user_playlists['#tracks']<10 ].index , inplace=True)
df_user_playlists

Now that my list of playlists has been made, we can extract the tracks from each playlist to analyze them to single out the best ones. It would be interesting to test this out on an account with many more playlists than I do. But we will go on with what we have.

**Fetching tracks for each playlist**

In [None]:
def get_playlist_tracks(sp, playlist_id):
    tracks = []
    offset = 0
    while True:
        track_list = sp.playlist_tracks(playlist_id, fields = None, limit = 100, offset = offset, market = None)
        tracks += track_list['items']
        
        if track_list['next'] is not None:
            offset +=100
        else:
            break
            
    track_id = []
    track_name = []
    
    try:
        for track in tracks:
            track_id.append(track['track']['id'])
            track_name.append(track['track']['name'])
    except:
            pass
    
    df_playlist_tracks = pd.DataFrame({'track_id':track_id, 'track_name': track_name})
    return df_playlist_tracks

In [None]:
#test the function
get_playlist_tracks(sp, '1S455AG3UORA5W0HPZZmlI')

**Get audio features within a playlist**

In [None]:
def get_audio_features(sp, playlist_id):
    playlist = get_playlist_tracks(sp, playlist_id)
    audio_features = []
    
    for i in range(len(playlist)):
        try:
            audio_features.append(sp.audio_features(playlist['track_id'][i]))
        except:
            pass
             
print(audio_features)            
    feature_list = []
    for feature in audio_features:
        try:
            feature_list.append([feature[0]['danceability'],
                            feature[0]['energy'],
                            feature[0]['key'],
                            feature[0]['loudness'],
                            feature[0]['speechiness'],
                            feature[0]['acousticness'],
                            feature[0]['instrumentalness'],
                            feature[0]['liveness'],
                            feature[0]['valence'],
                            feature[0]['tempo'],
                            feature[0]['duration_ms']]
                               )
        except:
            pass

    df_audio_feature = pd.DataFrame(feature_list, columns = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 
                                                          'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
                                                          'duration_ms'])
    df_playlist_audio_features = pd.concat([playlist, df_audio_feature], axis=1)
    df_playlist_audio_features = df_playlist_audio_features.dropna()
    return df_playlist_audio_features

In [None]:
def get_audio_features_mean (sp, playlist_id):
    playlist = get_audio_features(sp, playlist_id)
    df_mean_playlist = pd.DataFrame(playlist.mean(),columns = [playlist_id])
    return df_mean_playlist

In [None]:
dataframes = []
for i in df_user_playlists['playlist_id']:
    dataframes.append(get_audio_features_mean(sp, i))
    
dataframes