# Collecting songs information using Spotify API

- for this i will be using Spotify api to collect sogs information from Spotify. 
- I will need to create a spotify developer account or login using my current account.
- get the client ID and client secret form spotify.
- Write the script for the api, I followed along with this video [link here]()

In [3]:
# improting libraries  
import requests
import datetime
import base64
from urllib.parse import urlencode
import pandas as pd
import numpy as np
import time
from tqdm import tqdm

In [1]:
# setting clinet id and secret 
client_id = ''
client_secret = ''

- ## Writting the Spotify api scrept 

In [148]:
# Creating a class to use all the functions needed for using spotify api
class SpotifyAPI(object):
    access_token = None
    access_token_expires = datetime.datetime.now()
    access_token_did_expire = True
    client_id = None
    client_secret = None
    token_url = "https://accounts.spotify.com/api/token"
    
    def __init__(self, client_id, client_secret, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.client_id = client_id
        self.client_secret = client_secret

    def get_client_credentials(self):
        """
        Returns a base64 encoded string
        """
        client_id = self.client_id
        client_secret = self.client_secret
        if client_secret == None or client_id == None:
            raise Exception("You must set client_id and client_secret")
        client_creds = f"{client_id}:{client_secret}"
        client_creds_b64 = base64.b64encode(client_creds.encode())
        return client_creds_b64.decode()
    
    def get_token_headers(self):
        client_creds_b64 = self.get_client_credentials()
        return {
            "Authorization": f"Basic {client_creds_b64}"
        }
    
    def get_token_data(self):
        return {
            "grant_type": "client_credentials"
        } 
    
    def perform_auth(self):
        token_url = self.token_url
        token_data = self.get_token_data()
        token_headers = self.get_token_headers()
        r = requests.post(token_url, data=token_data, headers=token_headers)
        if r.status_code not in range(200, 299):
            raise Exception("Could not authenticate client.")
            # return False
        data = r.json()
        now = datetime.datetime.now()
        access_token = data['access_token']
        expires_in = data['expires_in'] # seconds
        expires = now + datetime.timedelta(seconds=expires_in)
        self.access_token = access_token
        self.access_token_expires = expires
        self.access_token_did_expire = expires < now
        return True
    
    def get_access_token(self):
        token = self.access_token
        expires = self.access_token_expires
        now = datetime.datetime.now()
        if expires < now:
            self.perform_auth()
            return self.get_access_token()
        elif token == None:
            self.perform_auth()
            return self.get_access_token() 
        return token
    
    def get_resource_header(self):
        access_token = self.get_access_token()
        headers = {
            "Authorization": f"Bearer {access_token}"
        }
        return headers
        
        
    def get_resource(self, lookup_id, resource_type='albums', version='v1'):
        endpoint = f"https://api.spotify.com/{version}/{resource_type}/{lookup_id}"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            return {}
        return r.json()
    
    def get_album(self, _id):
        return self.get_resource(_id, resource_type='albums')
    
    def get_artist(self, _id):
        return self.get_resource(_id, resource_type='artists')
    
    def base_search(self, query_params): # type
        headers = self.get_resource_header()
        endpoint = "https://api.spotify.com/v1/search"
        lookup_url = f"{endpoint}?{query_params}"
        r = requests.get(lookup_url, headers=headers)
        if r.status_code not in range(200, 299):  
            return {}
        return r.json()
    
    def search(self, query=None, operator=None, operator_query=None, search_type='artist' ):
        if query == None:
            raise Exception("A query is required")
        if isinstance(query, dict):
            query = " ".join([f"{k}:{v}" for k,v in query.items()])
        if operator != None and operator_query != None:
            if operator.lower() == "or" or operator.lower() == "not":
                operator = operator.upper()
                if isinstance(operator_query, str):
                    query = f"{query} {operator} {operator_query}"
        query_params = urlencode({"q": query, "type": search_type.lower()})
        #print(query_params)
        return self.base_search(query_params)

In [149]:
# passing client id and secret to spotify class and creatting a new object from that class 
spotify = SpotifyAPI(client_id, client_secret)

- ## Loading the data i cleaned in the previous step

In [4]:
# loading in the data i prepared form the original one 
tracks = pd.read_csv('../data/unique_songs.csv')

In [5]:
tracks.head()

Unnamed: 0.1,Unnamed: 0,original_index,song,artist,primary_artist,featured_artist_1,featured_artist_2
0,0,0,Easy On Me,Adele,Adele,,
1,1,1,Stay,The Kid LAROI & Justin Bieber,The Kid LAROI,Justin Bieber,
2,2,2,Industry Baby,Lil Nas X & Jack Harlow,Lil Nas X,Jack Harlow,
3,3,3,Fancy Like,Walker Hayes,Walker Hayes,,
4,4,4,Bad Habits,Ed Sheeran,Ed Sheeran,,


In [6]:
tracks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29681 entries, 0 to 29680
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Unnamed: 0         29681 non-null  int64 
 1   original_index     29681 non-null  int64 
 2   song               29681 non-null  object
 3   artist             29681 non-null  object
 4   primary_artist     29681 non-null  object
 5   featured_artist_1  4367 non-null   object
 6   featured_artist_2  592 non-null    object
dtypes: int64(2), object(5)
memory usage: 1.6+ MB


In [7]:
# copy the data in a new opject to save the newlly pull data from spotify 
tracks_spotify = tracks 

In [8]:
tracks_spotify.head()

Unnamed: 0.1,Unnamed: 0,original_index,song,artist,primary_artist,featured_artist_1,featured_artist_2
0,0,0,Easy On Me,Adele,Adele,,
1,1,1,Stay,The Kid LAROI & Justin Bieber,The Kid LAROI,Justin Bieber,
2,2,2,Industry Baby,Lil Nas X & Jack Harlow,Lil Nas X,Jack Harlow,
3,3,3,Fancy Like,Walker Hayes,Walker Hayes,,
4,4,4,Bad Habits,Ed Sheeran,Ed Sheeran,,


In [9]:
# creatting a column called (is_collected) to keep track of songs that has been pulled from spotify
tracks_spotify["is_collected"] = False

In [10]:
# Final shape of the data before running it on Spotify api
tracks_spotify

Unnamed: 0.1,Unnamed: 0,original_index,song,artist,primary_artist,featured_artist_1,featured_artist_2,is_collected
0,0,0,Easy On Me,Adele,Adele,,,False
1,1,1,Stay,The Kid LAROI & Justin Bieber,The Kid LAROI,Justin Bieber,,False
2,2,2,Industry Baby,Lil Nas X & Jack Harlow,Lil Nas X,Jack Harlow,,False
3,3,3,Fancy Like,Walker Hayes,Walker Hayes,,,False
4,4,4,Bad Habits,Ed Sheeran,Ed Sheeran,,,False
...,...,...,...,...,...,...,...,...
29676,29676,330076,Stay,The Ames Brothers,The Ames Brothers,,,False
29677,29677,330082,Over And Over,Thurston Harris,Thurston Harris,,,False
29678,29678,330084,Little Serenade,The Ames Brothers,The Ames Brothers,,,False
29679,29679,330085,I'll Get By (As Long As I Have You),Billy Williams,Billy Williams,,,False


In [160]:
# a function that take the artist name and the song name and run it on spotify to pull in the information and then 
# assgin that information to thier approprate song and column.
def get_info(i):
    sp_song = tracks_spotify.loc[i,"song"]
    sp_artist = tracks_spotify.loc[i,"primary_artist"]
    spot_song = spotify.search({"track": sp_song , "artist":sp_artist }, search_type='Track')
    tracks_spotify.loc[i,"match_song"] = spot_song['tracks']['items'][0]["name"]
    tracks_spotify.loc[i,"duration_ms"] = spot_song['tracks']['items'][0]["duration_ms"]
    tracks_spotify.loc[i,"spotify_popularity"] = spot_song['tracks']['items'][0]["popularity"]
    tracks_spotify.loc[i,"song_id"] = spot_song['tracks']['items'][0]["id"]
    tracks_spotify.loc[i,"song_uri"] = spot_song['tracks']["items"][0]["uri"]
    
    #artist info
    tracks_spotify.loc[i,"match_artist"] =spot_song['tracks']['items'][0]['artists'][0]['name']
    tracks_spotify.loc[i,"artist_id"] = spot_song['tracks']["items"][0]["artists"][0]['id']
    tracks_spotify.loc[i,"artist_uri"] = spot_song['tracks']["items"][0]["artists"][0]['uri']
    #album info
    tracks_spotify.loc[i,"release_date"] = spot_song['tracks']["items"][0]["album"]['release_date']
    tracks_spotify.loc[i,"Album_id"] = spot_song['tracks']["items"][0]["album"]['id']
    tracks_spotify.loc[i,"album_type"] = spot_song['tracks']["items"][0]["album"]['album_type']
    tracks_spotify.loc[i,"album_name"] = spot_song['tracks']["items"][0]["album"]['name']
    tracks_spotify.loc[i,"album_uri"] = spot_song['tracks']["items"][0]["album"]['uri']
    
    tracks_spotify.loc[i,"is_collected"] = True

In [161]:
# running a for loop to first check if the infromation for the song is collected or not and then if not 
# preform the function to get the information 
for i in tqdm(range(len(tracks_spotify))):
    try:
        if tracks_spotify.loc[i,"is_collected"] == False:
            if spotify.perform_auth() == True:
                get_info(i)
    except:
        continue

100%|██████████| 29681/29681 [6:28:39<00:00,  1.27it/s]   


In [113]:
#tracks_spotify[(tracks_spotify["match_artist"] != tracks_spotify["primary_artist"]) & (tracks_spotify["is_collected"] == True)]

In [114]:
#tracks_spotify.to_csv("../data/tracks_spotify_1.csv")

In [142]:
#tracks_spotify.to_csv("../data/tracks_spotify_2.csv")

In [162]:
#tracks_spotify.to_csv("../data/tracks_spotify_all.csv")

In [3]:
load = pd.read_csv("../data/tracks_spotify_all.csv")

In [10]:
load[load["is_collected"] == True]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,original_index,song,artist,primary_artist,featured_artist_1,featured_artist_2,is_collected,match_song,...,spotify_popularity,song_id,song_uri,match_artist,artist_id,release_date,Album_id,album_type,album_name,album_uri
0,0,0,0,Easy On Me,Adele,Adele,,,True,Easy On Me,...,95.0,0gplL1WMoJ6iYaPgMCL0gX,spotify:track:0gplL1WMoJ6iYaPgMCL0gX,Adele,spotify:artist:4dpARuHxo51G3z768sgnrY,2021-10-14,224jZ4sUX7OhAuMwaxp86S,single,Easy On Me,spotify:album:224jZ4sUX7OhAuMwaxp86S
1,1,1,1,Stay,The Kid LAROI & Justin Bieber,The Kid LAROI,Justin Bieber,,True,STAY (with Justin Bieber),...,96.0,5HCyWlXZPP0y6Gqq8TgA20,spotify:track:5HCyWlXZPP0y6Gqq8TgA20,The Kid LAROI,spotify:artist:2tIP7SsRs7vjIcLrU85W8J,2021-07-09,4QLAtpLNUsHEYrcHXmMIZZ,single,STAY (with Justin Bieber),spotify:album:4QLAtpLNUsHEYrcHXmMIZZ
2,2,2,2,Industry Baby,Lil Nas X & Jack Harlow,Lil Nas X,Jack Harlow,,True,INDUSTRY BABY (feat. Jack Harlow),...,95.0,27NovPIUIRrOZoCHxABJwK,spotify:track:27NovPIUIRrOZoCHxABJwK,Lil Nas X,spotify:artist:7jVv8c5Fj3E9VhNjxT4snq,2021-07-23,622NFw5Yk0OReMJ2XWcXUh,single,INDUSTRY BABY (feat. Jack Harlow),spotify:album:622NFw5Yk0OReMJ2XWcXUh
3,3,3,3,Fancy Like,Walker Hayes,Walker Hayes,,,True,Fancy Like,...,81.0,3ZozFqJJlDBNIm4xqxn2ZD,spotify:track:3ZozFqJJlDBNIm4xqxn2ZD,Walker Hayes,spotify:artist:7sKxqpSqbIzphAKAhrqvlf,2021-06-04,1Lcuv3HMgEG36SkibQIspv,single,Country Stuff,spotify:album:1Lcuv3HMgEG36SkibQIspv
4,4,4,4,Bad Habits,Ed Sheeran,Ed Sheeran,,,True,Bad Habits,...,95.0,3rmo8F54jFF8OgYsqTxm5d,spotify:track:3rmo8F54jFF8OgYsqTxm5d,Ed Sheeran,spotify:artist:6eUKZXaKkcviH0Ku9w2n3V,2021-10-29,32iAEBstCjauDhyKpGjTuq,album,=,spotify:album:32iAEBstCjauDhyKpGjTuq
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24503,24503,24503,289126,Backstage,Gene Pitney,Gene Pitney,,,True,Backstage - Original Gusto Recordings,...,27.0,4rWtrEbRMZgpnDTeYQgcLA,spotify:track:4rWtrEbRMZgpnDTeYQgcLA,Gene Pitney,spotify:artist:3ap1NzHNV9QA1x1V6z3gSe,1984,71VLfdu9MouPY8pJNeaW0Y,album,Backstage (I'm Lonely) [Original Gusto Recordi...,spotify:album:71VLfdu9MouPY8pJNeaW0Y
26885,26885,26885,306773,Jellybread,Booker T. & The MG's,Booker T.,The MG's,,True,Jellybread,...,6.0,4mnTsJuNT4esYmnHqGMxJd,spotify:track:4mnTsJuNT4esYmnHqGMxJd,Booker T. & the M.G.'s,spotify:artist:2vDV0T8sxx2ENnKXds75e5,1998-01-01,62e5NVqS2RLLIWkoNHuVK9,compilation,Time Is Tight,spotify:album:62e5NVqS2RLLIWkoNHuVK9
26888,26888,26888,306825,Dear Lonely Hearts,Nat King Cole,Nat King Cole,,,True,Dear Lonely Hearts,...,14.0,1xLzzA9PPPaJhsNiM00olS,spotify:track:1xLzzA9PPPaJhsNiM00olS,Nat King Cole,spotify:artist:7v4imS0moSyGdXyLgVTIV7,1962-01-01,6knZwJ3Qp0uBe7NEiDIrTw,album,Dear Lonely Hearts,spotify:album:6knZwJ3Qp0uBe7NEiDIrTw
26889,26889,26889,306830,The Love Of A Boy,Timi Yuro,Timi Yuro,,,True,The Love Of A Boy,...,17.0,5y2FnGvK01cIOqGWhCN5PR,spotify:track:5y2FnGvK01cIOqGWhCN5PR,Timi Yuro,spotify:artist:2JyxVsEiD9HVRM7CtFaLCK,1992-01-01,18UaMS0uVYtzsIoDIS5v3P,compilation,The Best Of Timi Yuro,spotify:album:18UaMS0uVYtzsIoDIS5v3P
