In [1]:
import requests
import datetime
import base64
from urllib.parse import urlencode
import pandas as pd
import numpy as np
import time
from tqdm import tqdm

In [2]:
client_id = '86241dc85057485b9d020a85673e5c1d'
client_secret = '68e8b53b75d24a8c9e95aec12adad209'

In [3]:

class SpotifyAPI(object):
    access_token = None
    access_token_expires = datetime.datetime.now()
    access_token_did_expire = True
    client_id = None
    client_secret = None
    token_url = "https://accounts.spotify.com/api/token"
    
    def __init__(self, client_id, client_secret, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.client_id = client_id
        self.client_secret = client_secret

    def get_client_credentials(self):
        """
        Returns a base64 encoded string
        """
        client_id = self.client_id
        client_secret = self.client_secret
        if client_secret == None or client_id == None:
            raise Exception("You must set client_id and client_secret")
        client_creds = f"{client_id}:{client_secret}"
        client_creds_b64 = base64.b64encode(client_creds.encode())
        return client_creds_b64.decode()
    
    def get_token_headers(self):
        client_creds_b64 = self.get_client_credentials()
        return {
            "Authorization": f"Basic {client_creds_b64}"
        }
    
    def get_token_data(self):
        return {
            "grant_type": "client_credentials"
        } 
    
    def perform_auth(self):
        token_url = self.token_url
        token_data = self.get_token_data()
        token_headers = self.get_token_headers()
        r = requests.post(token_url, data=token_data, headers=token_headers)
        if r.status_code not in range(200, 299):
            raise Exception("Could not authenticate client.")
            # return False
        data = r.json()
        now = datetime.datetime.now()
        access_token = data['access_token']
        expires_in = data['expires_in'] # seconds
        expires = now + datetime.timedelta(seconds=expires_in)
        self.access_token = access_token
        self.access_token_expires = expires
        self.access_token_did_expire = expires < now
        return True
    
    def get_access_token(self):
        token = self.access_token
        expires = self.access_token_expires
        now = datetime.datetime.now()
        if expires < now:
            self.perform_auth()
            return self.get_access_token()
        elif token == None:
            self.perform_auth()
            return self.get_access_token() 
        return token
    
    def get_resource_header(self):
        access_token = self.get_access_token()
        headers = {
            "Authorization": f"Bearer {access_token}"
        }
        return headers
        
        
    def get_resource(self, lookup_id, resource_type='albums', version='v1'):
        endpoint = f"https://api.spotify.com/{version}/{resource_type}/{lookup_id}"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            return {}
        return r.json()
    
    def get_album(self, _id):
        return self.get_resource(_id, resource_type='albums')
    
    def get_artist(self, _id):
        return self.get_resource(_id, resource_type='artists')
    
    def base_search(self, query_params): # type
        headers = self.get_resource_header()
        endpoint = "https://api.spotify.com/v1/search"
        lookup_url = f"{endpoint}?{query_params}"
        r = requests.get(lookup_url, headers=headers)
        if r.status_code not in range(200, 299):  
            return {}
        return r.json()
    
    def search(self, query=None, operator=None, operator_query=None, search_type='artist' ):
        if query == None:
            raise Exception("A query is required")
        if isinstance(query, dict):
            query = " ".join([f"{k}:{v}" for k,v in query.items()])
        if operator != None and operator_query != None:
            if operator.lower() == "or" or operator.lower() == "not":
                operator = operator.upper()
                if isinstance(operator_query, str):
                    query = f"{query} {operator} {operator_query}"
        query_params = urlencode({"q": query, "type": search_type.lower()})
        #print(query_params)
        return self.base_search(query_params)


In [4]:
spotify = SpotifyAPI(client_id, client_secret)

In [38]:
audio_features = pd.read_csv("../data/tracks_spotify_all.csv")

In [39]:
audio_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29681 entries, 0 to 29680
Data columns (total 21 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Unnamed: 0          29681 non-null  int64  
 1   Unnamed: 0.1        29681 non-null  int64  
 2   original_index      29681 non-null  int64  
 3   song                29681 non-null  object 
 4   artist              29681 non-null  object 
 5   primary_artist      29681 non-null  object 
 6   featured_artist_1   4367 non-null   object 
 7   featured_artist_2   592 non-null    object 
 8   is_collected        29681 non-null  bool   
 9   match_song          18155 non-null  object 
 10  duration_ms         18155 non-null  float64
 11  spotify_popularity  18155 non-null  float64
 12  song_id             18155 non-null  object 
 13  song_uri            18155 non-null  object 
 14  match_artist        18155 non-null  object 
 15  artist_id           18155 non-null  object 
 16  rele

In [40]:
audio_features["audio_collected"] = False

In [41]:
def get_audio_features(i):
    id = audio_features.loc[i,"song_id"]
    spot_song = spotify.get_resource(id,resource_type="audio-features")
    
    audio_features.loc[i,"danceability"] = spot_song['danceability']
    audio_features.loc[i,"energy"] = spot_song['energy']
    audio_features.loc[i,"key"] = spot_song['key']
    audio_features.loc[i,"loudness"] = spot_song['loudness']
    audio_features.loc[i,"mode"] = spot_song['mode']
    audio_features.loc[i,"speechiness"] = spot_song['speechiness']
    audio_features.loc[i,"acousticness"] = spot_song['acousticness']
    audio_features.loc[i,"instrumentalness"] = spot_song['instrumentalness']
    audio_features.loc[i,"liveness"] = spot_song['liveness']
    audio_features.loc[i,"valence"] = spot_song['valence']
    audio_features.loc[i,"tempo"] = spot_song['tempo']


    audio_features.loc[i,"audio_collected"] = True


In [55]:
for i in tqdm(range(100)):
    try:
        if audio_features.loc[i,"audio_collected"] == False:
            if spotify.perform_auth() == True:
                get_audio_features(i)
    except:
        continue

 46%|████▌     | 46/100 [02:24<10:52, 12.09s/it]

In [51]:
audio_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29681 entries, 0 to 29680
Data columns (total 33 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Unnamed: 0          29681 non-null  int64  
 1   Unnamed: 0.1        29681 non-null  int64  
 2   original_index      29681 non-null  int64  
 3   song                29681 non-null  object 
 4   artist              29681 non-null  object 
 5   primary_artist      29681 non-null  object 
 6   featured_artist_1   4367 non-null   object 
 7   featured_artist_2   592 non-null    object 
 8   is_collected        29681 non-null  bool   
 9   match_song          18155 non-null  object 
 10  duration_ms         18155 non-null  float64
 11  spotify_popularity  18155 non-null  float64
 12  song_id             18155 non-null  object 
 13  song_uri            18155 non-null  object 
 14  match_artist        18155 non-null  object 
 15  artist_id           18155 non-null  object 
 16  rele

In [54]:
audio_features.iloc[:13, 21:32]


Unnamed: 0,audio_collected,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence
0,True,0.604,0.366,5.0,-7.519,1.0,0.0282,0.578,0.0,0.133,0.13
1,True,0.591,0.764,1.0,-5.484,1.0,0.0483,0.0383,0.0,0.103,0.478
2,True,0.736,0.704,3.0,-7.409,0.0,0.0615,0.0203,0.0,0.0501,0.894
3,True,0.649,0.76,1.0,-6.487,1.0,0.0574,0.114,0.0,0.317,0.843
4,True,0.807,0.893,11.0,-3.745,0.0,0.0347,0.0451,2.8e-05,0.366,0.537
5,True,0.803,0.597,11.0,-6.035,0.0,0.141,0.000619,5e-06,0.323,0.331
6,True,0.788,0.859,2.0,-2.724,1.0,0.0856,0.281,0.0,0.0424,0.822
7,True,0.563,0.664,9.0,-5.044,1.0,0.154,0.335,0.0,0.0849,0.688
8,True,0.664,0.609,1.0,-6.509,1.0,0.0707,0.304,0.0,0.0926,0.194
9,True,0.702,0.825,6.0,-3.787,0.0,0.0601,0.00883,0.0,0.0674,0.915
