In [1]:
from secret_keys import SP_DC, SP_KEY, CLIENT_SECRET
from syrics.api import Spotify
import requests
import pandas as pd
import time

In [2]:
import spotify_token as st

data = st.start_session(SP_DC, SP_KEY)
access_token = data[0]
expiration_date = data[1]
sp = Spotify(SP_DC)

In [3]:
# print(sp.get_lyrics("1v4m9GLt7lpFM5iOvwQZrU"))
# # print(sp.get_current_song())

In [4]:
def cleanLyrics(lyrics):
    if lyrics == None:
        return None
    lyrics = lyrics['lyrics']
    lines = lyrics['lines']
    to_return = []
    for line in lines:
        to_return.append(line['words'])
    return to_return

In [5]:
def searchArtists(name, retry_count=8):
    global data, access_token, expiration_date, sp
    for i in range(retry_count):
        try:
            endpoint = "https://api.spotify.com/v1/search"
            q = name
            type = ["artist"]
            headers = {
                "Authorization": f"Bearer {access_token}"
            }
            params = {
                "q": q,
                "type": type
            }
            response = requests.get(endpoint, headers=headers, params=params)
            if response.status_code == 200:
                spotify_href = response.json()['artists']['items'][0]['external_urls']['spotify']
                id = spotify_href.split("/")[-1]
                return id
            elif response.status_code == 401:
                data = st.start_session(SP_DC, SP_KEY)
                access_token = data[0]
                expiration_date = data[1]
                sp = Spotify(SP_DC)
                continue  # Retry the request with the new access token
            elif response.status_code == 403:
                break  # Break out of the loop and return None
            elif response.status_code == 429 or response.status_code == 503:
                time.sleep(5)  # Wait for a few seconds before retrying
            else:
                raise Exception(response.status_code, response.text)
        except Exception as e:
            if i == retry_count - 1:
                raise e
            else:
                time.sleep(5)  # Wait for a few seconds before retrying
    return None

In [6]:
def getArtistsAlbums(id, limit=50, retry_count=8):
    global data, access_token, expiration_date, sp
    for i in range(retry_count):
        try:
            endpoint = f"https://api.spotify.com/v1/artists/{id}/albums"
            headers = {
                "Authorization": f"Bearer {access_token}"
            }
            market = "US"
            limit = limit
            params = {
                "market": market,
                "limit": limit
            }
            response = requests.get(endpoint, headers=headers, params=params)
            if response.status_code == 200:
                albums = response.json()['items']
                to_return = []
                for album in albums:
                    return_album = {
                        "name": album['name'],
                        "id": album['id'],
                        "release_date": album['release_date'],
                        "album_type": album['album_type']
                    }
                    to_return.append(return_album)
                return to_return
            elif response.status_code == 401:
                data = st.start_session(SP_DC, SP_KEY)
                access_token = data[0]
                expiration_date = data[1]
                sp = Spotify(SP_DC)
                continue  # Retry the request with the new access token
            elif response.status_code == 403:
                break  # Break out of the loop and return an empty list
            elif response.status_code == 429 or response.status_code == 503:
                time.sleep(5)  # Wait for a few seconds before retrying
            else:
                raise Exception("Error getting albums")
        except Exception as e:
            if i == retry_count - 1:
                raise e
            else:
                time.sleep(5)  # Wait for a few seconds before retrying
    return []

In [7]:
def getAlbumTracks(album_id, retry_count=8):
    global data, access_token, expiration_date, sp
    for i in range(retry_count):
        try:
            endpoint = f"https://api.spotify.com/v1/albums/{album_id}/tracks"
            headers = {
                "Authorization": f"Bearer {access_token}"
            }
            limit = 50
            params = {
                "limit": limit
            }
            response = requests.get(endpoint, headers=headers, params=params)
            if response.status_code == 200:
                tracks = response.json()['items']
                to_return = []
                for track in tracks:
                    return_track = {
                        "name": track['name'],
                        "id": track['id'],
                        "duration_ms": track['duration_ms']
                    }
                    to_return.append(return_track)
                return to_return
            elif response.status_code == 401:
                data = st.start_session(SP_DC, SP_KEY)
                access_token = data[0]
                expiration_date = data[1]
                sp = Spotify(SP_DC)
                continue  # Retry the request with the new access token
            elif response.status_code == 403:
                break  # Break out of the loop and return an empty list
            elif response.status_code == 429 or response.status_code == 503:
                time.sleep(5)  # Wait for a few seconds before retrying
            else:
                raise Exception("Error getting tracks")
        except Exception as e:
            if i == retry_count - 1:
                raise e
            else:
                time.sleep(5)  # Wait for a few seconds before retrying
    return []

In [8]:
def generateLyrics(name, limit = 50):
    artist_id = searchArtists(name)
    albums = getArtistsAlbums(artist_id, limit)
    to_return = []
    for album in albums:
        album_name = album['name']
        album_id = album['id']
        try:
            album_tracks = getAlbumTracks(album_id)
            for track in album_tracks:
                track_id = track['id']
                if track_id == None:
                    lyrics = []
                else:
                    lyrics = cleanLyrics(sp.get_lyrics(track_id))
                    to_return.append({
                        "artist": name,
                        "album": album_name,
                        "track": track['name'],
                        "lyrics": lyrics
                    })
        except:
            pass
            
    return to_return

In [9]:
sample_artists = ["Taylor Swift", "CoCoMelon", "Keshi", "Conan Gray", "Slayer", "Black Sabbath", "Khalid", "Lana Del Ray", "IU", "YOASOBI", "LilyPichu", "League of Legends"]

In [10]:
final = []
for artists in sample_artists:
    time.sleep(2)
    final += generateLyrics(artists)

df = pd.DataFrame(final)
df.to_csv("lyrics.csv", index=False)

In [11]:
df

Unnamed: 0,artist,album,track,lyrics
0,Taylor Swift,1989 (Taylor's Version) [Deluxe],Welcome To New York (Taylor's Version),"[Walkin' through a crowd, the village is aglow..."
1,Taylor Swift,1989 (Taylor's Version) [Deluxe],Blank Space (Taylor's Version),"[Nice to meet you, where you been?, I could sh..."
2,Taylor Swift,1989 (Taylor's Version) [Deluxe],Style (Taylor's Version),"[Midnight, You come and pick me up, no headlig..."
3,Taylor Swift,1989 (Taylor's Version) [Deluxe],Out Of The Woods (Taylor's Version),"[Looking at it now, It all seems so simple, We..."
4,Taylor Swift,1989 (Taylor's Version) [Deluxe],All You Had To Do Was Stay (Taylor's Version),"[(Hey, hey, hey), (Hey, hey, hey), (Hey, hey, ..."
...,...,...,...,...
5824,League of Legends,Faerie Court (Skin Theme),Faerie Court - Skin Theme,
5825,League of Legends,"Milio, the Gentle Flame (Champion Theme)","Milio, the Gentle Flame - Champion Theme",
5826,League of Legends,"Ahri, the Nine-Tailed Fox (Champion Theme)","Ahri, the Nine-Tailed Fox - Champion Theme",
5827,League of Legends,Lunar Revel 2023 (Event Theme),Lunar Revel 2023 - Event Theme,
