# Using Spotify API to Extract Data from my Personal Playlist

In [248]:
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials

#Authentication - without user
client_credentials_manager = SpotifyClientCredentials(client_id='YourClientID', client_secret='YourSecretID')
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [249]:
playlist_link = "https://open.spotify.com/playlist/5JFforWrze2uuzHMYMygIN"
track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_link)["items"]]

# Extracting features from the tracks using track_uri obtained

In [250]:
data = sp.playlist_tracks(playlist_link)
trackname = [x["track"]["name"]for x in data["items"]]

df1=pd.DataFrame(trackname)
df1.set_axis(["song_title"],axis=1,inplace=True)


features = sp.audio_features(track_uris)
df=pd.DataFrame(features)

In [251]:
Data = pd.concat([df,df1],axis=1, join='inner')
Data.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,song_title
0,0.371,0.268,1,-10.506,1,0.0281,0.748,0.0517,0.104,0.165,102.617,audio_features,7D0RhFcb3CrfPuTJ0obrod,spotify:track:7D0RhFcb3CrfPuTJ0obrod,https://api.spotify.com/v1/tracks/7D0RhFcb3Crf...,https://api.spotify.com/v1/audio-analysis/7D0R...,227093,3,Sparks
1,0.585,0.595,8,-10.444,1,0.0328,0.0696,0.266,0.0837,0.039,140.037,audio_features,2LKOHdMsL0K9KwcPRlJK2v,spotify:track:2LKOHdMsL0K9KwcPRlJK2v,https://api.spotify.com/v1/tracks/2LKOHdMsL0K9...,https://api.spotify.com/v1/audio-analysis/2LKO...,259147,4,After Dark
2,0.458,0.525,2,-11.8,1,0.0273,0.0661,0.231,0.0906,0.317,130.105,audio_features,1h1xwHkWC8a0EPzTmLBXoG,spotify:track:1h1xwHkWC8a0EPzTmLBXoG,https://api.spotify.com/v1/tracks/1h1xwHkWC8a0...,https://api.spotify.com/v1/audio-analysis/1h1x...,335880,4,She - String Mix; 2007 Remaster
3,0.62,0.93,1,-3.685,1,0.0374,0.00043,0.0,0.0686,0.609,106.22,audio_features,2g2a5kDeZexbUTD8abcvm6,spotify:track:2g2a5kDeZexbUTD8abcvm6,https://api.spotify.com/v1/tracks/2g2a5kDeZexb...,https://api.spotify.com/v1/audio-analysis/2g2a...,190173,4,Lifestyles of the Rich & Famous
4,0.634,0.741,1,-5.47,1,0.0751,0.0067,0.0,0.154,0.499,102.988,audio_features,5TclgAYTVdTRLnHvTUhyIP,spotify:track:5TclgAYTVdTRLnHvTUhyIP,https://api.spotify.com/v1/tracks/5TclgAYTVdTR...,https://api.spotify.com/v1/audio-analysis/5Tcl...,182893,4,Entertain Me


# Features 

In [252]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   danceability      77 non-null     float64
 1   energy            77 non-null     float64
 2   key               77 non-null     int64  
 3   loudness          77 non-null     float64
 4   mode              77 non-null     int64  
 5   speechiness       77 non-null     float64
 6   acousticness      77 non-null     float64
 7   instrumentalness  77 non-null     float64
 8   liveness          77 non-null     float64
 9   valence           77 non-null     float64
 10  tempo             77 non-null     float64
 11  type              77 non-null     object 
 12  id                77 non-null     object 
 13  uri               77 non-null     object 
 14  track_href        77 non-null     object 
 15  analysis_url      77 non-null     object 
 16  duration_ms       77 non-null     int64  
 17 

# Feature Scaling

In [253]:
feature_cols=['danceability','energy','key', 'loudness', 'mode',
              'speechiness', 'acousticness','instrumentalness','liveness', 'valence','tempo','duration_ms','time_signature']

In [264]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normalized_df = scaler.fit_transform(df[feature_cols])

print(normalized_df[:2])

[[0.25405405 0.22299573 0.09090909 0.46320319 1.         0.02368265
  0.760157   0.05470899 0.09104345 0.1875184  0.32895491 0.33187752
  0.        ]
 [0.63963964 0.59016393 0.72727273 0.46706998 1.         0.05150977
  0.07070999 0.28148148 0.0610405  0.00206064 0.66934406 0.45833974
  0.5       ]]


## Building the Recommendation System using Cosine Similarity and Sigmoid Kernel 


In [299]:
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.metrics.pairwise import cosine_similarity

indices = pd.Series(df.index, index=Data['song_title']).drop_duplicates()
# using Cosine_similarity
cosine = cosine_similarity(normalized_df)
sig_kernel = sigmoid_kernel(normalized_df)

def recommendation_generation(song_title,model_type=cosine):
    index=indices[song_title]
    # Get list of songs for given songs
    score=list(enumerate(model_type[index]))
    
    # Sort the most similar songs
    similarity_score = sorted(score,key = lambda x:x[1],reverse = True)
    
    # Select the top-10 recommended songs.
    similarity_score = similarity_score[1:11]
    
    top_songs_index = [i[0] for i in similarity_score]
    
    # Top 10 recommended songs
    top_songs=Data['song_title'].iloc[top_songs_index]
    return top_songs

In [300]:
print("Recommended Songs (Using Cosine Similarity):\n")
print(recommendation_generation('Circles',cosine).values)

Recommended Songs (Using Cosine Similarity):

['Lifestyles of the Rich & Famous' 'Entertain Me' 'Fuel' 'Perfect'
 'Save Your Tears' 'STAY' 'Lips Of An Angel' 'Not Meant to Be'
 'Without You' 'December']


In [301]:
print("Recommended Songs (Using Sigmoid Kernal):\n")
print(recommendation_generation('Drown',sig_kernel).values)

Recommended Songs (Using Sigmoid Kernal):

["Savin' Me" 'Save Your Tears' 'Lost in You' 'Lifeline' 'Run'
 'The Unforgiven II' 'Breathing' 'Unwell' "Freakin' Out On the Interstate"
 'Helena']
