In [19]:
import requests
import datetime
import base64
from urllib.parse import urlencode
import pandas as pd
import numpy as np
import time
from tqdm import tqdm

In [20]:
import sys
sys.path.append('../client_secret')

from client_secret import *

In [21]:

class SpotifyAPI(object):
    access_token = None
    access_token_expires = datetime.datetime.now()
    access_token_did_expire = True
    client_id = None
    client_secret = None
    token_url = "https://accounts.spotify.com/api/token"
    
    def __init__(self, client_id, client_secret, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.client_id = client_id
        self.client_secret = client_secret

    def get_client_credentials(self):
        """
        Returns a base64 encoded string
        """
        client_id = self.client_id
        client_secret = self.client_secret
        if client_secret == None or client_id == None:
            raise Exception("You must set client_id and client_secret")
        client_creds = f"{client_id}:{client_secret}"
        client_creds_b64 = base64.b64encode(client_creds.encode())
        return client_creds_b64.decode()
    
    def get_token_headers(self):
        client_creds_b64 = self.get_client_credentials()
        return {
            "Authorization": f"Basic {client_creds_b64}"
        }
    
    def get_token_data(self):
        return {
            "grant_type": "client_credentials"
        } 
    
    def perform_auth(self):
        token_url = self.token_url
        token_data = self.get_token_data()
        token_headers = self.get_token_headers()
        r = requests.post(token_url, data=token_data, headers=token_headers)
        if r.status_code not in range(200, 299):
            raise Exception("Could not authenticate client.")
            # return False
        data = r.json()
        now = datetime.datetime.now()
        access_token = data['access_token']
        expires_in = data['expires_in'] # seconds
        expires = now + datetime.timedelta(seconds=expires_in)
        self.access_token = access_token
        self.access_token_expires = expires
        self.access_token_did_expire = expires < now
        return True
    
    def get_access_token(self):
        token = self.access_token
        expires = self.access_token_expires
        now = datetime.datetime.now()
        if expires < now:
            self.perform_auth()
            return self.get_access_token()
        elif token == None:
            self.perform_auth()
            return self.get_access_token() 
        return token
    
    def get_resource_header(self):
        access_token = self.get_access_token()
        headers = {
            "Authorization": f"Bearer {access_token}"
        }
        return headers
        
        
    def get_resource(self, lookup_id, resource_type='albums', version='v1'):
        endpoint = f"https://api.spotify.com/{version}/{resource_type}/{lookup_id}"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            return {}
        return r.json()
    
    def get_album(self, _id):
        return self.get_resource(_id, resource_type='albums')
    
    def get_artist(self, _id):
        return self.get_resource(_id, resource_type='artists')
    
    def base_search(self, query_params): # type
        headers = self.get_resource_header()
        endpoint = "https://api.spotify.com/v1/search"
        lookup_url = f"{endpoint}?{query_params}"
        r = requests.get(lookup_url, headers=headers)
        if r.status_code not in range(200, 299):  
            return {}
        return r.json()
    
    def search(self, query=None, operator=None, operator_query=None, search_type='artist' ):
        if query == None:
            raise Exception("A query is required")
        if isinstance(query, dict):
            query = " ".join([f"{k}:{v}" for k,v in query.items()])
        if operator != None and operator_query != None:
            if operator.lower() == "or" or operator.lower() == "not":
                operator = operator.upper()
                if isinstance(operator_query, str):
                    query = f"{query} {operator} {operator_query}"
        query_params = urlencode({"q": query, "type": search_type.lower()})
        #print(query_params)
        return self.base_search(query_params)


In [22]:
spotify = SpotifyAPI(client_id, client_secret)

In [23]:
audio_features = pd.read_csv("../data/tracks_spotify_all.csv")

In [24]:
audio_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29681 entries, 0 to 29680
Data columns (total 21 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Unnamed: 0          29681 non-null  int64  
 1   Unnamed: 0.1        29681 non-null  int64  
 2   original_index      29681 non-null  int64  
 3   song                29681 non-null  object 
 4   artist              29681 non-null  object 
 5   primary_artist      29681 non-null  object 
 6   featured_artist_1   4367 non-null   object 
 7   featured_artist_2   592 non-null    object 
 8   is_collected        29681 non-null  bool   
 9   match_song          18155 non-null  object 
 10  duration_ms         18155 non-null  float64
 11  spotify_popularity  18155 non-null  float64
 12  song_id             18155 non-null  object 
 13  song_uri            18155 non-null  object 
 14  match_artist        18155 non-null  object 
 15  artist_id           18155 non-null  object 
 16  rele

In [25]:
audio_features["audio_collected"] = False

In [26]:
def get_audio_features(i):
    id = audio_features.loc[i,"song_id"]
    spot_song = spotify.get_resource(id,resource_type="audio-features")
    
    audio_features.loc[i,"danceability"] = spot_song['danceability']
    audio_features.loc[i,"energy"] = spot_song['energy']
    audio_features.loc[i,"key"] = spot_song['key']
    audio_features.loc[i,"loudness"] = spot_song['loudness']
    audio_features.loc[i,"mode"] = spot_song['mode']
    audio_features.loc[i,"speechiness"] = spot_song['speechiness']
    audio_features.loc[i,"acousticness"] = spot_song['acousticness']
    audio_features.loc[i,"instrumentalness"] = spot_song['instrumentalness']
    audio_features.loc[i,"liveness"] = spot_song['liveness']
    audio_features.loc[i,"valence"] = spot_song['valence']
    audio_features.loc[i,"tempo"] = spot_song['tempo']


    audio_features.loc[i,"audio_collected"] = True


In [27]:
for i in tqdm(range(10)):
    try:
        if audio_features.loc[i,"audio_collected"] == False:
            if spotify.perform_auth() == True:
                get_audio_features(i)
    except:
        continue

100%|██████████| 10/10 [00:04<00:00,  2.46it/s]


In [28]:
audio_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29681 entries, 0 to 29680
Data columns (total 33 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Unnamed: 0          29681 non-null  int64  
 1   Unnamed: 0.1        29681 non-null  int64  
 2   original_index      29681 non-null  int64  
 3   song                29681 non-null  object 
 4   artist              29681 non-null  object 
 5   primary_artist      29681 non-null  object 
 6   featured_artist_1   4367 non-null   object 
 7   featured_artist_2   592 non-null    object 
 8   is_collected        29681 non-null  bool   
 9   match_song          18155 non-null  object 
 10  duration_ms         18155 non-null  float64
 11  spotify_popularity  18155 non-null  float64
 12  song_id             18155 non-null  object 
 13  song_uri            18155 non-null  object 
 14  match_artist        18155 non-null  object 
 15  artist_id           18155 non-null  object 
 16  rele

In [29]:
audio_features

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,original_index,song,artist,primary_artist,featured_artist_1,featured_artist_2,is_collected,match_song,...,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0,0,0,Easy On Me,Adele,Adele,,,True,Easy On Me,...,,,,,,,,,,
1,1,1,1,Stay,The Kid LAROI & Justin Bieber,The Kid LAROI,Justin Bieber,,True,STAY (with Justin Bieber),...,,,,,,,,,,
2,2,2,2,Industry Baby,Lil Nas X & Jack Harlow,Lil Nas X,Jack Harlow,,True,INDUSTRY BABY (feat. Jack Harlow),...,,,,,,,,,,
3,3,3,3,Fancy Like,Walker Hayes,Walker Hayes,,,True,Fancy Like,...,,,,,,,,,,
4,4,4,4,Bad Habits,Ed Sheeran,Ed Sheeran,,,True,Bad Habits,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29676,29676,29676,330076,Stay,The Ames Brothers,The Ames Brothers,,,False,,...,,,,,,,,,,
29677,29677,29677,330082,Over And Over,Thurston Harris,Thurston Harris,,,False,,...,,,,,,,,,,
29678,29678,29678,330084,Little Serenade,The Ames Brothers,The Ames Brothers,,,False,,...,,,,,,,,,,
29679,29679,29679,330085,I'll Get By (As Long As I Have You),Billy Williams,Billy Williams,,,False,,...,,,,,,,,,,


In [15]:
# audio_features.to_csv("../data/tracks_spotify_all+audio_features")

In [2]:
aud =pd.read_csv("../data/tracks_spotify_all+audio_features")

In [4]:
aud.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29681 entries, 0 to 29680
Data columns (total 34 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Unnamed: 0          29681 non-null  int64  
 1   Unnamed: 0.1        29681 non-null  int64  
 2   Unnamed: 0.1.1      29681 non-null  int64  
 3   original_index      29681 non-null  int64  
 4   song                29681 non-null  object 
 5   artist              29681 non-null  object 
 6   primary_artist      29681 non-null  object 
 7   featured_artist_1   4367 non-null   object 
 8   featured_artist_2   592 non-null    object 
 9   is_collected        29681 non-null  bool   
 10  match_song          18155 non-null  object 
 11  duration_ms         18155 non-null  float64
 12  spotify_popularity  18155 non-null  float64
 13  song_id             18155 non-null  object 
 14  song_uri            18155 non-null  object 
 15  match_artist        18155 non-null  object 
 16  arti