In [1]:
from secret_keys import SP_DC, SP_KEY, CLIENT_SECRET
from syrics.api import Spotify
import requests
import pandas as pd
import time

In [2]:
import spotify_token as st

data = st.start_session(SP_DC, SP_KEY)
access_token = data[0]
expiration_date = data[1]
sp = Spotify(SP_DC)

In [3]:
# print(sp.get_lyrics("1v4m9GLt7lpFM5iOvwQZrU"))
# # print(sp.get_current_song())

In [4]:
def cleanLyrics(lyrics):
    if lyrics == None:
        return None
    lyrics = lyrics['lyrics']
    lines = lyrics['lines']
    to_return = []
    for line in lines:
        to_return.append(line['words'])
    return to_return

In [5]:
def searchArtists(name, retry_count=8):
    global data, access_token, expiration_date, sp
    for i in range(retry_count):
        try:
            endpoint = "https://api.spotify.com/v1/search"
            q = name
            type = ["artist"]
            headers = {
                "Authorization": f"Bearer {access_token}"
            }
            params = {
                "q": q,
                "type": type
            }
            response = requests.get(endpoint, headers=headers, params=params)
            if response.status_code == 200:
                spotify_href = response.json()['artists']['items'][0]['external_urls']['spotify']
                id = spotify_href.split("/")[-1]
                return id
            elif response.status_code == 401:
                data = st.start_session(SP_DC, SP_KEY)
                access_token = data[0]
                expiration_date = data[1]
                sp = Spotify(SP_DC)
                continue  # Retry the request with the new access token
            elif response.status_code == 403:
                break  # Break out of the loop and return None
            elif response.status_code == 429 or response.status_code == 503:
                time.sleep(5)  # Wait for a few seconds before retrying
            else:
                raise Exception(response.status_code, response.text)
        except Exception as e:
            if i == retry_count - 1:
                raise e
            else:
                time.sleep(5)  # Wait for a few seconds before retrying
    return None

In [6]:
def getArtistsAlbums(id, limit=50, retry_count=8):
    global data, access_token, expiration_date, sp
    for i in range(retry_count):
        try:
            endpoint = f"https://api.spotify.com/v1/artists/{id}/albums"
            headers = {
                "Authorization": f"Bearer {access_token}"
            }
            market = "US"
            limit = limit
            params = {
                "market": market,
                "limit": limit
            }
            response = requests.get(endpoint, headers=headers, params=params)
            if response.status_code == 200:
                albums = response.json()['items']
                to_return = []
                for album in albums:
                    return_album = {
                        "name": album['name'],
                        "id": album['id'],
                        "release_date": album['release_date'],
                        "album_type": album['album_type']
                    }
                    to_return.append(return_album)
                return to_return
            elif response.status_code == 401:
                data = st.start_session(SP_DC, SP_KEY)
                access_token = data[0]
                expiration_date = data[1]
                sp = Spotify(SP_DC)
                continue  # Retry the request with the new access token
            elif response.status_code == 403:
                break  # Break out of the loop and return an empty list
            elif response.status_code == 429 or response.status_code == 503:
                time.sleep(5)  # Wait for a few seconds before retrying
            else:
                raise Exception("Error getting albums")
        except Exception as e:
            if i == retry_count - 1:
                raise e
            else:
                time.sleep(5)  # Wait for a few seconds before retrying
    return []

In [7]:
def getAlbumTracks(album_id, retry_count=8):
    global data, access_token, expiration_date, sp
    for i in range(retry_count):
        try:
            endpoint = f"https://api.spotify.com/v1/albums/{album_id}/tracks"
            headers = {
                "Authorization": f"Bearer {access_token}"
            }
            limit = 50
            params = {
                "limit": limit
            }
            response = requests.get(endpoint, headers=headers, params=params)
            if response.status_code == 200:
                tracks = response.json()['items']
                to_return = []
                for track in tracks:
                    return_track = {
                        "name": track['name'],
                        "id": track['id'],
                        "duration_ms": track['duration_ms']
                    }
                    to_return.append(return_track)
                return to_return
            elif response.status_code == 401:
                data = st.start_session(SP_DC, SP_KEY)
                access_token = data[0]
                expiration_date = data[1]
                sp = Spotify(SP_DC)
                continue  # Retry the request with the new access token
            elif response.status_code == 403:
                break  # Break out of the loop and return an empty list
            elif response.status_code == 429 or response.status_code == 503:
                time.sleep(5)  # Wait for a few seconds before retrying
            else:
                raise Exception("Error getting tracks")
        except Exception as e:
            if i == retry_count - 1:
                raise e
            else:
                time.sleep(5)  # Wait for a few seconds before retrying
    return []

In [8]:
def generateLyrics(name, limit = 50):
    artist_id = searchArtists(name)
    albums = getArtistsAlbums(artist_id, limit)
    to_return = []
    for album in albums:
        album_name = album['name']
        album_id = album['id']
        try:
            album_tracks = getAlbumTracks(album_id)
            for track in album_tracks:
                track_id = track['id']
                if track_id == None:
                    lyrics = []
                else:
                    lyrics = cleanLyrics(sp.get_lyrics(track_id))
                    to_return.append({
                        "artist": name,
                        "album": album_name,
                        "track": track['name'],
                        "lyrics": lyrics
                    })
        except:
            pass
            
    return to_return

In [9]:
sample_artists = ["Taylor Swift", "CoCoMelon", "Keshi", "Conan Gray", "Slayer", "Black Sabbath", "Khalid", "Lana Del Ray", "IU", "YOASOBI", "LilyPichu", "League of Legends"]
top_x_artists = [
    "The Weeknd", "Taylor Swift", "Rihanna", "Ariana Grande", "Drake",
    "Kanye West", "Justin Bieber", "Dua Lipa", "Coldplay", "Bruno Mars",
    "Beyoncé", "SZA", "David Guetta", "Ed Sheeran", "Eminem", "Bad Bunny",
    "Miley Cyrus", "Marshmello", "Travis Scott", "Calvin Harris", "Billie Eilish",
    "Doja Cat", "21 Savage", "Maroon 5", "Shakira", "Imagine Dragons",
    "Tate McRae", "Post Malone", "Lady Gaga", "Katy Perry", "Olivia Rodrigo",
    "Lana Del Rey", "Harry Styles", "Adele", "Peso Pluma", "Playboi Carti",
    "KAROL G", "Nicki Minaj", "Sia", "Tiësto", "Benson Boone", "J Balvin",
    "Kendrick Lamar", "Ty Dolla $ign", "Queen", "Arctic Monkeys", "Future",
    "Metro Boomin", "Elton John", "Khalid", "Sam Smith", "Daddy Yankee",
    "Selena Gomez", "Chris Brown", "Jack Harlow", "OneRepublic", "USHER",
    "Justin Timberlake", "Pitbull", "Feid", "Shawn Mendes", "Madonna",
    "Myke Towers", "One Direction", "Halsey", "Kali Uchis", "Linkin Park",
    "J. Cole", "Maluma", "Rauw Alejandro", "Black Eyed Peas", "The Chainsmokers",
    "Ozuna", "Bebe Rexha", "Michael Jackson", "Camila Cabello", "Bizarrap",
    "James Arthur", "Teddy Swims", "Swae Lee", "Avicii", "Lil Wayne",
    "Ellie Goulding", "Jason Derulo", "Arijit Singh", "JAY-Z", "Mitski",
    "XXXTENTACION", "Britney Spears", "P!nk", "Charlie Puth", "Manuel Turizo",
    "Noah Kahan", "The Neighbourhood", "Pharrell Williams", "Lewis Capaldi",
    "Ava Max", "50 Cent", "Pritam", "CoCoMelon", "Keshi", "Conan Gray", "Slayer",
    "Black Sabbath", "LilyPichu", "League of Legends", "Billy Joel", "AC/DC", "Demi Lovato",
    "Twenty One Pilots", "Panic! At The Disco", "5 Seconds of Summer", "Fall Out Boy",
]


In [10]:
top_x = []
for artists in top_x_artists:
    top_x += generateLyrics(artists)

df = pd.DataFrame(top_x)
df.to_csv("top_x.csv", index=True)

In [12]:
df = df.dropna(subset=['lyrics'])

In [14]:
df = df.drop_duplicates(subset=['lyrics'])

In [16]:
df = df.reset_index(drop=True)

In [19]:
df

Unnamed: 0,artist,album,track,lyrics
0,The Weeknd,The Highlights (Deluxe),Die For You,"[I'm findin' ways to articulate, The feeling I..."
1,The Weeknd,The Highlights (Deluxe),Starboy (feat. Daft Punk),"[I'm tryna put you in the worst mood, ah, P1 c..."
2,The Weeknd,The Highlights (Deluxe),Save Your Tears,"[Ooh, Na-na, yeah, I saw you dancing in a crow..."
3,The Weeknd,The Highlights (Deluxe),Blinding Lights,"[Yeah, ♪, I've been tryna call, I've been on m..."
4,The Weeknd,The Highlights (Deluxe),In Your Eyes,"[Oh, yeah, I just pretend, uh, That I'm in the..."
...,...,...,...,...
11232,Fall Out Boy,So Sick,So Sick - (BBC Radio 1 - Live Lounge),"[Gotta change my answering machine, Now that I..."
11233,Fall Out Boy,I'm Like A Lawyer With The Way I'm Always Tryi...,I'm Like A Lawyer With The Way I'm Always Tryi...,"[Last year's wishes are this year's apologies,..."
11234,Fall Out Boy,I'm Like A Lawyer With The Way I'm Always Tryi...,"This Ain't A Scene, It's An Arms Race - Live F...","[I am an arms dealer, Fitting you with weapons..."
11235,Fall Out Boy,I'm Like A Lawyer With The Way I'm Always Tryi...,Golden - Live From Hammersmith Palais,"[How cruel is the golden rule?, When the lives..."


In [18]:
df.to_csv("top_x_cleaned.csv", index=True)