In [1]:
# Import Libraries
import requests
import pandas as pd
from tqdm import tqdm
import os
import re

#----Environment Variables----
client_id = os.getenv('GENIUS_CLIENT_ID')
client_secret = os.getenv('GENIUS_CLIENT_SECRET')
auth_token = os.getenv('GENIUS_TOKEN')
#-----------------------------

In [2]:
headers = {
    'Authorization' : f'Bearer {auth_token}'
}

In [3]:
# Import song_data.csv, to create search queries
data = pd.read_csv('data/song_data.csv')

In [4]:
# This cell will be used to clean any datapoints that break the api, I would rather json data to populate than have a try/except
data.loc[27, 'Song'] = 'Lift Me Up' 

In [5]:
data['lyricurl'] = str

In [7]:
for i in tqdm(range(len(data))):
    artist = re.sub(r'[^A-Za-z0-9 ]+', '', data.loc[i, 'Artist'])   #regex to remove special characters
    song = re.sub(r'[^A-Za-z0-9 ]+', '', data.loc[i, "Song"])
    query = f"{artist} {song}".replace(' ', '%20') #stitch artist and song together, replace spaces to work with url
    url = 'http://api.genius.com'    #api url
    search = '/search?q='       #api endpoint
    uri = url+search+query     #combined url
    
    req = requests.get(uri, headers=headers)    # Request call
    api_path = req.json()['response']['hits'][0]['result']['api_path']  #immediately pull the api path from json
    
    req = requests.get(url+api_path, headers=headers) # second request call on api endpoint
    json = req.json()                                 # convert response to json dictionary
    
    data.at[i, 'lyricurl'] = json['response']['song']['url']      # assign lyric url to dataframe

100%|███████████████████████████████████████████| 75/75 [02:08<00:00,  1.71s/it]


In [8]:
data.head()

Unnamed: 0,Artist,Song,SongID,Popularity,Lyrics,href,lyricurl
0,Miley Cyrus,Flowers,0yLdNVWF3Srea0uzk55zFn,87,,https://api.spotify.com/v1/tracks/0yLdNVWF3Sre...,https://genius.com/Miley-cyrus-flowers-lyrics
1,Metro Boomin,Creepin' (with The Weeknd & 21 Savage),2dHHgzDwk4BJdRwy9uXhTO,97,,https://api.spotify.com/v1/tracks/2dHHgzDwk4BJ...,https://genius.com/Spotify-new-music-friday-12...
2,SZA,Kill Bill,1Qrg8KqiBpW07V7PNxwwwL,93,,https://api.spotify.com/v1/tracks/1Qrg8KqiBpW0...,https://genius.com/Sza-kill-bill-lyrics
3,Central Cee,LET GO,3zkyus0njMCL6phZmNNEeN,94,,https://api.spotify.com/v1/tracks/3zkyus0njMCL...,https://genius.com/Central-cee-let-go-lyrics
4,Tiësto,10:35,6BePGk3eCan4FqaW2X8Qy3,90,,https://api.spotify.com/v1/tracks/6BePGk3eCan4...,https://genius.com/Tiesto-and-tate-mcrae-10-35...


In [12]:
#adjusting spotify url to actual lyric url for Metro Boomin'
data.loc[1, 'lyricurl'] = 'https://genius.com/Metro-boomin-the-weeknd-and-21-savage-creepin-lyrics'

In [13]:
data.head()

Unnamed: 0,Artist,Song,SongID,Popularity,Lyrics,href,lyricurl
0,Miley Cyrus,Flowers,0yLdNVWF3Srea0uzk55zFn,87,,https://api.spotify.com/v1/tracks/0yLdNVWF3Sre...,https://genius.com/Miley-cyrus-flowers-lyrics
1,Metro Boomin,Creepin' (with The Weeknd & 21 Savage),2dHHgzDwk4BJdRwy9uXhTO,97,,https://api.spotify.com/v1/tracks/2dHHgzDwk4BJ...,https://genius.com/Metro-boomin-the-weeknd-and...
2,SZA,Kill Bill,1Qrg8KqiBpW07V7PNxwwwL,93,,https://api.spotify.com/v1/tracks/1Qrg8KqiBpW0...,https://genius.com/Sza-kill-bill-lyrics
3,Central Cee,LET GO,3zkyus0njMCL6phZmNNEeN,94,,https://api.spotify.com/v1/tracks/3zkyus0njMCL...,https://genius.com/Central-cee-let-go-lyrics
4,Tiësto,10:35,6BePGk3eCan4FqaW2X8Qy3,90,,https://api.spotify.com/v1/tracks/6BePGk3eCan4...,https://genius.com/Tiesto-and-tate-mcrae-10-35...


In [14]:
# Overwrite CSV with new info
data.to_csv("data/song_data.csv")