# Authorization Code Flow

According Spotify API docs - we need to use Spotify Authorization Code Flow to extract user data. The following file will perform extraction of the user's liked and disliked songs. Once the songs are extracted, it will extract features of the song which will be further used to train the model and will be used in EDA.

The extracted data will be stored in `songs_data.csv`

In [1]:
import requests
import os
from urllib.parse import urlencode
import base64
import webbrowser
from dotenv import load_dotenv, find_dotenv
import pandas as pd

In [2]:
# Extaction of environment variables from .env
load_dotenv(find_dotenv())                          # used to find and load .env file

CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
REDIRECT_URI = os.getenv("REDIRECT_URI")

In [7]:
# Creating authentication header
auth_headers = {
    "client_id" : CLIENT_ID,
    "response_type" : "code",
    "redirect_uri" : REDIRECT_URI,
    "scope" : "playlist-read-private user-top-read user-library-read"
}

In [8]:
webbrowser.open("https://accounts.spotify.com/authorize?" + urlencode(auth_headers))

True

In [9]:
# Extracted the auth code from the url
AUTH_CODE = "AQDk4SU2Q5DxYP108NzG7dPd7tdsiIYS4J8dnT0AINAHIIOpAUFsvqT03fVMI8PCg8bLtxnPlsgXUUIcYHndQ8YK93CtcyEaSTyZQh3fE1Tsg5IT8IsJd3THhrQgiC6ZxKrOksnjeN61hirSbege74Vgz7olYUuECx_kHvE1Hvul9-TXKr7Jyx1RLbdDzCDz-Rc_3P_B-kBzGGVyKiuCXRpoDNpBWrUAaK_p50AlZBYw8A6a8lM9d7IYeQ"

In [10]:
# Fetching the token
#
# Encoding the credentials and then passing the headers and data
# to a post request

encodedCredentials = base64.b64encode(CLIENT_ID.encode() + b':' + CLIENT_SECRET.encode()).decode("utf-8")

tokenHeaders = {
    "Authorization" : f"Basic {encodedCredentials}",
    "Content-Type" : "application/x-www-form-urlencoded"
}

tokenData = {
    "grant_type" : "authorization_code",
    "code" : AUTH_CODE,
    "redirect_uri" : REDIRECT_URI
}

url = "https://accounts.spotify.com/api/token"

accessResponse = requests.post(
    url,
    data= tokenData,
    headers=tokenHeaders
)

data = accessResponse.json()

TOKEN = data["access_token"]
REFRESH_TOKEN = data["refresh_token"]

In [11]:
# Creating functions to fetch details from Spotify 

# Getting users top 50 songs
def getTopSongs(token):
    url = "https://api.spotify.com/v1/me/top/tracks?limit=100"
    topSongsHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    topSongsResponse = requests.get(
        url,
        headers=topSongsHeaders
    )
    songsJSON = topSongsResponse.json()
    return songsJSON

# Getting users saved (liked) songs
def getSavedSongs(token):
    url = "https://api.spotify.com/v1/me/tracks?limit=50"
    savedSongsHeader = {
        "Authorization" : f"Bearer {token}"
    }
    savedSongsResponse = requests.get(
        url,
        headers=savedSongsHeader
    )
    savedSongs = savedSongsResponse.json()
    return savedSongs

# Getting users playlists in order fetch disliked playlist
def getPlaylist(token):
    url = "https://api.spotify.com/v1/me/playlists?limit=10"
    playlistHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    playlistResponse = requests.get(
        url,
        headers=playlistHeaders
    )
    playlistJSON = playlistResponse.json()
    return playlistJSON

# Getting songs from the users desrired playlist
def getSongsPlaylist(token, playlist_id):
    url = f"https://api.spotify.com/v1/playlists/{playlist_id}/tracks"
    playlistSongsHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    playlistSongsResponse = requests.get(
        url,
        headers=playlistSongsHeaders
    )
    playlistSongsJSON = playlistSongsResponse.json()
    return playlistSongsJSON

In [16]:
# Getting the top 50 songs and fetching their IDs into a list

topSongsReponse = getTopSongs(TOKEN)
topSongsRaw = pd.json_normalize(topSongsReponse['items'])
topSongsID = list(topSongsRaw['id'])

In [17]:
# Getting users saved tracks and getching their IDs into a list

savedSongsResponse = getSavedSongs(TOKEN)
savedSongsRaw = pd.json_normalize(savedSongsResponse['items'])
savedSongsID = list(savedSongsRaw['track.id'])

In [20]:
# Merging top 50 songs and saved songs into one list
# Both will be considered as liked songs

likedSongsID = topSongsID + savedSongsID
likedSongsID = [*set(likedSongsID)]         # Removing any duplicate items from the list

In [21]:
# Fetching items from playlist and getting the data into a list

playlists = getPlaylist(TOKEN)
playlistsDF = pd.json_normalize(playlists['items'])

# Getting ID of the Dislike songs playlist
ID = playlistsDF.query('name == "Dislike Songs"')['id'].iloc[0]

# Passing the extracted ID into another function to fetch all the songs in the playlist

dislikePLaylistSongs = getSongsPlaylist(TOKEN, ID)
dislikePLaylistSongsDF = pd.json_normalize(dislikePLaylistSongs['items'])
dislikedSongsID = list(dislikePLaylistSongsDF['track.id'])

In [22]:
# Creating function that results in features of all songs

def getSongFeatures(token, listID):
    url = f'https://api.spotify.com/v1/audio-features?ids="{listID}"'
    songFeaturesHeaders = {
        "Authorization" : f"Bearer {token}"
    }
    featureResponse = requests.get(
        url,
        headers=songFeaturesHeaders
    )
    data = featureResponse.json()
    return data

In [23]:
# Extracting song features
#
# Spotify API requires to pass all the IDs as a string seperating all the IDs using a 
# ',' to access features of multiple songs in one go. For that we need to convert the
# list into a string.

likedSongsIDString = ''.join([str(element + ",") for element in likedSongsID])
dislikedSongsIDString = ''.join([str(element + ",") for element in dislikedSongsID])

# Calling the function
likedFeatues = getSongFeatures(TOKEN, likedSongsIDString)
dislikedFeatures = getSongFeatures(TOKEN, dislikedSongsIDString)

likedFeatuesDF = pd.json_normalize(likedFeatues['audio_features'])
dislikedFeaturesDF = pd.json_normalize(dislikedFeatures['audio_features'])

In [24]:
# Cleaning the features data of liked songs

likedFeatuesDF = likedFeatuesDF.drop(index=0)
likedFeatuesDF = likedFeatuesDF.iloc[:, :11]
likedFeatuesDF['liked/disliked'] = 1
likedFeatuesDF.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,liked/disliked
1,0.445,0.83,2.0,-7.328,1.0,0.0454,0.00273,0.001,0.131,0.693,125.432,1
2,0.581,0.424,8.0,-6.616,1.0,0.0269,0.723,0.0,0.101,0.352,98.936,1
3,0.637,0.825,11.0,-6.612,1.0,0.0815,0.352,0.0,0.389,0.835,92.029,1
4,0.759,0.54,9.0,-6.039,0.0,0.0287,0.037,0.0,0.0945,0.75,116.947,1
5,0.5,0.337,10.0,-15.412,1.0,0.0381,0.0725,0.772,0.0962,0.0393,144.116,1


In [25]:
# Cleaning the features data of disliked songs

dislikedFeaturesDF = dislikedFeaturesDF.drop(index=0)
dislikedFeaturesDF = dislikedFeaturesDF.iloc[:, :11]
dislikedFeaturesDF['liked/disliked'] = 0
dislikedFeaturesDF.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,liked/disliked
1,0.272,0.473,6.0,-11.03,0.0,0.0944,0.788,0.119,0.949,0.323,200.01,0
2,0.585,0.104,5.0,-17.34,1.0,0.0921,0.951,0.0,0.115,0.664,75.914,0
3,0.743,0.441,2.0,-10.273,1.0,0.0462,0.611,0.00267,0.123,0.697,134.168,0
4,0.573,0.507,5.0,-13.59,1.0,0.0326,0.372,3.5e-05,0.124,0.358,143.875,0
5,0.224,0.5,7.0,-13.006,1.0,0.0553,0.86,0.219,0.377,0.278,180.554,0


In [26]:
# Merging the 2 dataframes into one dataframe and saving them into a CSV for further use

dfTemp = [likedFeatuesDF, dislikedFeaturesDF]
songsFeatureData = pd.concat(dfTemp)
songsFeatureData.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,liked/disliked
1,0.445,0.83,2.0,-7.328,1.0,0.0454,0.00273,0.001,0.131,0.693,125.432,1
2,0.581,0.424,8.0,-6.616,1.0,0.0269,0.723,0.0,0.101,0.352,98.936,1
3,0.637,0.825,11.0,-6.612,1.0,0.0815,0.352,0.0,0.389,0.835,92.029,1
4,0.759,0.54,9.0,-6.039,0.0,0.0287,0.037,0.0,0.0945,0.75,116.947,1
5,0.5,0.337,10.0,-15.412,1.0,0.0381,0.0725,0.772,0.0962,0.0393,144.116,1


In [27]:
# Saving into a csv
songsFeatureData.to_csv('songs_data.csv')