# Authorization Code Flow

According Spotify API docs - we need to use Spotify Authorization Code Flow to extract user data. The following file will perform extraction of the user's liked and disliked songs. Once the songs are extracted, it will extract features of the song which will be further used to train the model and will be used in EDA.

The extracted data will be stored in `songs_data.csv`

In [20]:
import requests
import numpy as np
import os
from urllib.parse import urlencode
import base64
import webbrowser
from dotenv import load_dotenv, find_dotenv
import pandas as pd

In [2]:
# Extaction of environment variables from .env
load_dotenv(find_dotenv())                          # used to find and load .env file

CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
REDIRECT_URI = os.getenv("REDIRECT_URI")

In [3]:
# Creating authentication header
auth_headers = {
    "client_id" : CLIENT_ID,
    "response_type" : "code",
    "redirect_uri" : REDIRECT_URI,
    "scope" : "playlist-read-private user-top-read user-library-read"
}

In [4]:
webbrowser.open("https://accounts.spotify.com/authorize?" + urlencode(auth_headers))

True

In [5]:
# Extracted the auth code from the url
AUTH_CODE = "AQBVXOk5pQOIrIL7qpHatcI-NvEj34e5ivygq2SCG8W7JOTeA644mpxjTa7g-V6ZVChb6OUVT2uhx1n8vr7d-UOyvVZe1S9WpEYFdjP5mSlzXdbnJ9lntHA0wwpkHw_kaW95u6iNz9JGjNLmnPLSJhAbF6HcRm-TLN0bm9A_Z_-w6m-13A2Wp7US6hQ0QiBa5WTU8ajYrJx0FrPfUcTrDedLqdPQR1_08HGHF72g3l-VfhH5SEtMaiD0Fg"

In [6]:
# Fetching the token
#
# Encoding the credentials and then passing the headers and data
# to a post request

encodedCredentials = base64.b64encode(CLIENT_ID.encode() + b':' + CLIENT_SECRET.encode()).decode("utf-8")

tokenHeaders = {
    "Authorization" : f"Basic {encodedCredentials}",
    "Content-Type" : "application/x-www-form-urlencoded"
}

tokenData = {
    "grant_type" : "authorization_code",
    "code" : AUTH_CODE,
    "redirect_uri" : REDIRECT_URI
}

url = "https://accounts.spotify.com/api/token"

accessResponse = requests.post(
    url,
    data= tokenData,
    headers=tokenHeaders
)

data = accessResponse.json()

TOKEN = data["access_token"]
REFRESH_TOKEN = data["refresh_token"]

In [7]:
# Creating functions to fetch details from Spotify 

# Getting users top 50 songs
def getTopSongs(token):
    url = "https://api.spotify.com/v1/me/top/tracks?limit=100"
    topSongsHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    topSongsResponse = requests.get(
        url,
        headers=topSongsHeaders
    )
    songsJSON = topSongsResponse.json()
    return songsJSON

# Getting users saved (liked) songs
def getSavedSongs(token):
    url = "https://api.spotify.com/v1/me/tracks?limit=50"
    savedSongsHeader = {
        "Authorization" : f"Bearer {token}"
    }
    savedSongsResponse = requests.get(
        url,
        headers=savedSongsHeader
    )
    savedSongs = savedSongsResponse.json()
    return savedSongs

# Getting users playlists in order fetch disliked playlist
def getPlaylist(token):
    url = "https://api.spotify.com/v1/me/playlists?limit=10"
    playlistHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    playlistResponse = requests.get(
        url,
        headers=playlistHeaders
    )
    playlistJSON = playlistResponse.json()
    return playlistJSON

# Getting songs from the users desrired playlist
def getSongsPlaylist(token, playlist_id):
    url = f"https://api.spotify.com/v1/playlists/{playlist_id}/tracks"
    playlistSongsHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    playlistSongsResponse = requests.get(
        url,
        headers=playlistSongsHeaders
    )
    playlistSongsJSON = playlistSongsResponse.json()
    return playlistSongsJSON

In [8]:
# Getting the top 100 songs and fetching their IDs into a list

topSongsReponse = getTopSongs(TOKEN)
topSongsRaw = pd.json_normalize(topSongsReponse['items'])
topSongsID = list(topSongsRaw['id'])

In [9]:
# Getting users saved tracks and getching their IDs into a list

savedSongsResponse = getSavedSongs(TOKEN)
savedSongsRaw = pd.json_normalize(savedSongsResponse['items'])
savedSongsID = list(savedSongsRaw['track.id'])

In [28]:
# Fetching items from playlist and getting the data into a list

playlists = getPlaylist(TOKEN)
playlistsDF = pd.json_normalize(playlists['items'])

# Extracting songs from a playlist with loved songs
zupSongsPlaylistID = playlistsDF.query('name == "ZuppZZup"')['id'].iloc[0]
dilSongsPlaylistID = playlistsDF.query('name == "Dilllll"')['id'].iloc[0]

# Passing extracted songs to create a list
zupPlaylistSongs = getSongsPlaylist(TOKEN, zupSongsPlaylistID)
zupPlaylistSongsDF = pd.json_normalize(zupPlaylistSongs['items'])
zupPlaylistSongsID = list(zupPlaylistSongsDF['track.id'])

dilPlaylistSongs = getSongsPlaylist(TOKEN, dilSongsPlaylistID)
dilPlaylistSongsDF = pd.json_normalize(dilPlaylistSongs['items'])
dilPlaylistSongsID = list(dilPlaylistSongsDF['track.id'])

In [29]:
# Merging top 50 songs and saved songs into one list
# Both will be considered as liked songs

likedSongsID = topSongsID + savedSongsID + zupPlaylistSongsID + dilPlaylistSongsID
likedSongsID = [*set(likedSongsID)]         # Removing any duplicate items from the list

In [37]:
# Getting ID of the Dislike songs playlist
ID_DL1 = playlistsDF.query('name == "Dislike songs"')['id'].iloc[0]
ID_DL2 = playlistsDF.query('name == "Dislike songs 2"')['id'].iloc[0]
ID_DL3 = playlistsDF.query('name == "Dislike songs 3"')['id'].iloc[0]

# Passing the extracted ID into another function to fetch all the songs in the playlist

dislikePLaylistSongs1 = getSongsPlaylist(TOKEN, ID_DL1)
dislikePLaylistSongsDF1 = pd.json_normalize(dislikePLaylistSongs1['items'])
dislikedSongsID1 = list(dislikePLaylistSongsDF1['track.id'])

dislikePLaylistSongs2 = getSongsPlaylist(TOKEN, ID_DL2)
dislikePLaylistSongsDF2 = pd.json_normalize(dislikePLaylistSongs2['items'])
dislikedSongsID2 = list(dislikePLaylistSongsDF2['track.id'])

dislikePLaylistSongs3 = getSongsPlaylist(TOKEN, ID_DL3)
dislikePLaylistSongsDF3 = pd.json_normalize(dislikePLaylistSongs3['items'])
dislikedSongsID3 = list(dislikePLaylistSongsDF3['track.id'])

dislikedSongsID = np.concatenate((dislikedSongsID1, dislikedSongsID2, dislikedSongsID3))
dislikedSongsID = [*set(dislikedSongsID)]

In [38]:
# Splitting the lists into length of 100 elements 
# to allow API to extract the data

print(f'Liked songs total : {len(likedSongsID)}')
print(f'Disliked songs total : {len(dislikedSongsID)}')

Liked songs total : 285
Disliked songs total : 286


In [39]:
# Splitting
likedSongsID_1 = likedSongsID[0: 99]
likedSongsID_2 = likedSongsID[99: 198]
likedSongsID_3 = likedSongsID[198: 285]

dislikedSongsID_1 = dislikedSongsID[0: 99]
dislikedSongsID_2 = dislikedSongsID[99: 198]
dislikedSongsID_3 = dislikedSongsID[198: 286]

In [40]:
# Creating function that results in features of all songs

def getSongFeatures(token, listID):
    url = f'https://api.spotify.com/v1/audio-features?ids="{listID}"'
    songFeaturesHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    featureResponse = requests.get(
        url,
        headers=songFeaturesHeaders
    )
    data = featureResponse.json()
    return data

In [52]:
# Extracting song features
#
# Spotify API requires to pass all the IDs as a string seperating all the IDs using a 
# ',' to access features of multiple songs in one go. For that we need to convert the
# list into a string.

likedSongsID_1String = ''.join([str(element + ",") for element in likedSongsID_1])
likedSongsID_2String = ''.join([str(element + ",") for element in likedSongsID_2])
likedSongsID_3String = ''.join([str(element + ",") for element in likedSongsID_3])

dislikedSongsIDString1 = ''.join([str(element + ",") for element in dislikedSongsID1])
dislikedSongsIDString3 = ''.join([str(element + ",") for element in dislikedSongsID3])

# Calling the function
likedFeatues1 = getSongFeatures(TOKEN, likedSongsID_1String)
likedFeatues2 = getSongFeatures(TOKEN, likedSongsID_2String)
likedFeatues3 = getSongFeatures(TOKEN, likedSongsID_3String)

dislikedFeatures1 = getSongFeatures(TOKEN, dislikedSongsIDString1)
dislikedFeatures3 = getSongFeatures(TOKEN, dislikedSongsIDString3)

likedFeatuesDF1 = pd.json_normalize(likedFeatues1['audio_features'])
likedFeatuesDF2 = pd.json_normalize(likedFeatues2['audio_features'])
likedFeatuesDF3 = pd.json_normalize(likedFeatues3['audio_features'])

dislikedFeaturesDF1 = pd.json_normalize(dislikedFeatures1['audio_features'])
dislikedFeaturesDF3 = pd.json_normalize(dislikedFeatures3['audio_features'])

In [53]:
# Cleaning the features data of liked songs

likedFeatuesDF1 = likedFeatuesDF1.drop(columns=['type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms'])
likedFeatuesDF2 = likedFeatuesDF2.drop(columns=['type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms'])
likedFeatuesDF3 = likedFeatuesDF3.drop(columns=['type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms'])
likedFeatuesDF1['liked/disliked'] = 1
likedFeatuesDF2['liked/disliked'] = 1
likedFeatuesDF3['liked/disliked'] = 1
likedFeatuesDF1.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,liked/disliked
0,,,,,,,,,,,,,1
1,0.526,0.52,6.0,-7.985,0.0,0.0569,0.435,0.0,0.197,0.13,149.119,4.0,1
2,0.568,0.622,4.0,-6.548,1.0,0.109,0.063,0.0018,0.56,0.109,76.984,4.0,1
3,0.537,0.419,11.0,-11.585,1.0,0.0414,0.76,1.1e-05,0.107,0.292,132.261,4.0,1
4,0.76,0.555,7.0,-10.165,1.0,0.0465,0.409,0.213,0.0822,0.52,98.066,4.0,1


In [54]:
# Cleaning the features data of disliked songs

dislikedFeaturesDF1 = dislikedFeaturesDF1.drop(columns=['type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms'])
dislikedFeaturesDF3 = dislikedFeaturesDF3.drop(columns=['type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms'])

dislikedFeaturesDF1['liked/disliked'] = 0
dislikedFeaturesDF3['liked/disliked'] = 0

dislikedFeaturesDF1.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,liked/disliked
0,,,,,,,,,,,,,0
1,0.649,0.328,9.0,-13.381,1.0,0.0407,0.664,0.00102,0.0793,0.291,89.97,4.0,0
2,0.707,0.948,11.0,-3.313,0.0,0.0503,0.0489,1e-06,0.145,0.79,145.0,4.0,0
3,0.524,0.947,7.0,-2.6,1.0,0.258,0.163,0.0,0.451,0.653,190.013,4.0,0
4,0.591,0.472,6.0,-7.957,0.0,0.046,0.469,0.0,0.134,0.434,170.009,4.0,0


In [55]:
# Merging the 2 dataframes into one dataframe and saving them into a CSV for further use

dfTemp = [likedFeatuesDF1, likedFeatuesDF2, likedFeatuesDF3, dislikedFeaturesDF1, dislikedFeaturesDF3]
songsFeatureData = pd.concat(dfTemp)
songsFeatureData = songsFeatureData.dropna()
songsFeatureData.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,liked/disliked
1,0.526,0.52,6.0,-7.985,0.0,0.0569,0.435,0.0,0.197,0.13,149.119,4.0,1
2,0.568,0.622,4.0,-6.548,1.0,0.109,0.063,0.0018,0.56,0.109,76.984,4.0,1
3,0.537,0.419,11.0,-11.585,1.0,0.0414,0.76,1.1e-05,0.107,0.292,132.261,4.0,1
4,0.76,0.555,7.0,-10.165,1.0,0.0465,0.409,0.213,0.0822,0.52,98.066,4.0,1
5,0.404,0.806,2.0,-4.75,1.0,0.0496,0.198,0.0,0.114,0.112,148.036,4.0,1


In [56]:
# Saving into a csv
songsFeatureData.to_csv('songs_data.csv')