# Extracting data using Spotify API

In [2]:
# Installing and importing packages

!pip3 install requests
import requests
import json
import csv


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


Substitute following variables with adequate clientID and clientSecret and access Spotify API

In [1]:
# Passwords

CLIENT_ID = 'clientID'
CLIENT_SECRET = 'clientSecret'

In [23]:
### ACCESSING THE API


AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']


headers = {
    'Authorization': 'Bearer {token}'.format(token=access_token)
}


# base URL of all Spotify API endpoints
BASE_URL = 'https://api.spotify.com/v1/'

We want to obtain data from the TOP 50 playlists for USA, UK, Australia, India and South Africa. PlaylistIDs were directly obtained from Spotify (https://open.spotify.com) and added into a dictionary

In [24]:
# Creating a library with the playlists ID from the URI for Global and desired countries TOP 50 playlists 

playlists_ids = {'Global':'37i9dQZEVXbNG2KDcFcKOF?si=482439e7bff944eb',
                 'USA': '37i9dQZEVXbLp5XoPON0wI?si=8edfde91af67406c',
                 'UK': '37i9dQZEVXbMwmF30ppw50?si=b5175b30cb7b46e3',
                 'Australia':'37i9dQZEVXbJPcfkRz0wJ0?si=7890479cf740436c',
                 'India':'37i9dQZEVXbLZ52XmnySJg?si=0773e9ef03684046',
                 'South Africa': '37i9dQZEVXbJV3H3OfCN1z?si=759621de52964d16',
                }


Using GET from library request, we iterated through the dictionary in order to obtain data for all the songs in each playlist. 

In [25]:
### GETTING THE DATA about the playlist with GET request

d= {}   # New dictionary to store data

for key in playlists_ids:
    r = requests.get(BASE_URL + 'playlists/' + playlists_ids[key], headers=headers)
    d[f"{key}"] = r.json()


In [33]:
# Checking all tracks for each playlist

for country in d:
    print (f"{country} \n")
    
    country_playlist = d[country]['tracks']
    for i in country_playlist['items']:
        print (i['track']['name'],'-------', i['track']['album']['artists'][0]['name'],'-------', i['track']['popularity'])
    
    print ("\n")
   

Global 

Ella Baila Sola ------- Eslabon Armado ------- 90
un x100to ------- Grupo Frontera ------- 100
La Bebe - Remix ------- Yng Lvcas ------- 99
Cupid - Twin Ver. ------- FIFTY FIFTY ------- 98
Flowers ------- Miley Cyrus ------- 91
Daylight ------- David Kushner ------- 97
Kill Bill ------- SZA ------- 94
As It Was ------- Harry Styles ------- 92
TQG ------- KAROL G ------- 96
Classy 101 ------- Feid ------- 95
Tattoo ------- Loreen ------- 91
PRC ------- Peso Pluma ------- 96
BESO ------- ROSALÍA ------- 96
El Azul ------- Junior H ------- 95
Creepin' (with The Weeknd & 21 Savage) ------- Metro Boomin ------- 96
See You Again (feat. Kali Uchis) ------- Tyler, The Creator ------- 94
Calm Down (with Selena Gomez) ------- Rema ------- 79
Yandel 150 ------- Yandel ------- 90
Die For You (with Ariana Grande) - Remix ------- The Weeknd ------- 88
Cha Cha Cha ------- Käärijä ------- 88
Boy's a Liar Pt. 2 ------- PinkPantheress ------- 96
Anti-Hero ------- Taylor Swift ------- 94
Last Ni

Now, using some of the data from the playlist information, we want to extract the audio features of each of the tracks, as well as the artist genre and the date in which the album was released. 

In order to do this, we need to do three more calls to Spotify API

### THE FOLLOWING CODE WILL TAKE SOME TIME:

Data used for the analysis was extracted and store using this code. Move to analysis.ipynb to continue with the analysis

In [31]:
data = [] #store final data

tracks_id = []


for country in d:
    
    print (f"{country} \n")  # Keeping track of the iterations in the loop
    
    country_playlist = d[country]['tracks']
    
    for song in country_playlist['items']:
    
        track_id = song['track']['id']
    
        track_name = song['track']['name']
    
        track_popularity = song['track']['popularity']
    
        track_artist = song['track']['artists'][0]['name']
        

        """
        GET requests to the API to get the relevant info needed:
        
        """
        
        # Getting album_id and artist_id to obtain info about the artist and the album
    
        r1 = requests.get(BASE_URL + 'tracks/' + track_id, headers=headers)
        tracks = r1.json()
    
        album_id = tracks['album']['id']
        artist_id = tracks['album']['artists'][0]['id']
        
        
        # Getting info about the album release date using album_id obtained in r1
        
        r2 = requests.get(BASE_URL + 'albums/' + album_id, headers=headers)
        album = r2.json()
   
        album_release = album['release_date']
    
    
        # Getting info about artists genre using artist_id obtained in r1
    
        r3 = requests.get(BASE_URL + 'artists/' + artist_id, headers=headers)
        artist = r3.json()
    
        artist_genre = artist['genres']
        
    
        # Getting audio features (key, liveness, danceability, ...)

        f = requests.get(BASE_URL + 'audio-features/' + track_id, headers=headers)
        f = f.json()
            

        # Combine with info obtained previously:
            
        f.update({
            'country_playlist': country,
            'track_name': track_name,
            'popularity': track_popularity,
            'artist': track_artist,
            'artist_genres': artist_genre,
            'album_release': album_release

            })
            

        data.append(f)
            
       
        

Global 

USA 

UK 

Australia 

India 

South Africa 



Data is saved into a json file for reproducibility and validation purposes

In [32]:
### SAVING THE DATA

with open("playlists.json", "w") as file:
    json.dump(data, file)