In [13]:
# Imports:
from lyricsgenius import Genius
import os
from dotenv import load_dotenv # Loads .env file.
import pandas as pd
import time
import requests

In [5]:
load_dotenv('auth.env')
GENIUS_CLIENT_ID = os.getenv('GENIUS_CLIENT_ID')
genius = Genius(GENIUS_CLIENT_ID)

In [7]:
combined_df = pd.read_csv('cleaned_data.csv')
combined_df.head()

Unnamed: 0,track_ids,track_names,artists,first_artists,danceability,energy,loudness,mode,acousticness,instrumentalness,valence,tempo,mood
0,1k1Bqnv2R0uJXQN4u6LKYt,Ain't No Sunshine,['Bill Withers'],Bill Withers,0.527,0.415,-11.451,0,0.457,1.7e-05,0.515,78.169,Happy
1,3zBhihYUHBmGd2bcQIobrF,(Sittin' On) the Dock of the Bay,['Otis Redding'],Otis Redding,0.768,0.367,-11.226,1,0.683,1.8e-05,0.532,103.621,Happy
2,3SdTKo2uVsxFblQjpScoHy,Stand By Me,['Ben E. King'],Ben E. King,0.65,0.306,-9.443,1,0.57,7e-06,0.605,118.068,Happy
3,3NfxSdJnVdon1axzloJgba,I Say a Little Prayer,['Aretha Franklin'],Aretha Franklin,0.592,0.355,-14.051,1,0.478,0.0,0.499,133.032,Happy
4,4kP69y3GKHi9tXckfgp4bK,For Once In My Life,['Stevie Wonder'],Stevie Wonder,0.524,0.519,-11.903,1,0.195,0.0,0.847,110.121,Happy


In [9]:
labels = combined_df['mood'].copy()
columns_to_drop = ['track_ids', 'artists', 'danceability', 'energy', 'loudness', 'mode', 'acousticness', 'instrumentalness', 'valence', 'tempo', 'mood']
cleaned_df = combined_df.drop(columns=columns_to_drop)
cleaned_df.head()

Unnamed: 0,track_names,first_artists
0,Ain't No Sunshine,Bill Withers
1,(Sittin' On) the Dock of the Bay,Otis Redding
2,Stand By Me,Ben E. King
3,I Say a Little Prayer,Aretha Franklin
4,For Once In My Life,Stevie Wonder


In [42]:
genius.verbose = True # Turn on status messages
genius.remove_section_headers = True # Remove section headers (e.g. [Chorus]) from lyrics when searching
genius.timeout = 10 # Increase waiting time for server not responding

def get_lyrics(track_name, artist):
    retries = 3
    delay = 5  # Initial delay
    while retries > 0:
        try:
            song = genius.search_song(track_name, artist)
            if song:
                return song.lyrics
            else:
                return None
        except TimeoutError:
            print(f"Request timed out. Retrying in {delay} seconds...")
            time.sleep(delay)
            delay *= 2  # Exponential backoff
            retries -= 1
    return None

In [43]:
cleaned_df['lyrics'] = None

for index, row in cleaned_df.iterrows():
    track_name = row['track_names']
    artist = row['first_artists']
    try:
        lyrics = get_lyrics(track_name, artist)
        cleaned_df.at[index, 'lyrics'] = lyrics
    except Exception as e:
        print(f"An error occurred for track '{track_name}' by '{artist}': {str(e)}")
        continue  # Move onto next song.

    time.sleep(10)

Searching for "Ain't No Sunshine" by Bill Withers...
Done.
Searching for "(Sittin' On) the Dock of the Bay" by Otis Redding...
Done.
Searching for "Stand By Me" by Ben E. King...
Done.
Searching for "I Say a Little Prayer" by Aretha Franklin...
Done.
Searching for "For Once In My Life" by Stevie Wonder...
Done.
Searching for "Hold On, I'm Comin'" by Sam & Dave...
Done.
Searching for "Tired of Being Alone" by Al Green...
Done.
Searching for "Be My Baby" by The Ronettes...
Done.
Searching for "I Can See Clearly Now - Edit" by Johnny Nash...
Done.
Searching for "Midnight Train to Georgia" by Gladys Knight & The Pips...
Done.
Searching for "I've Got a Woman" by Ray Charles...
Done.
Searching for "Under the Boardwalk" by The Drifters...
Done.
Searching for "(You Make Me Feel Like) A Natural Woman" by Aretha Franklin...
Done.
Searching for "Love Train" by The O'Jays...
Done.
Searching for "When a Man Loves a Woman" by Percy Sledge...
Done.
Searching for "Son of a Preacher Man" by Dusty Sprin

In [47]:
print(cleaned_df.isnull().sum())
rows_with_none_lyrics = cleaned_df[cleaned_df['lyrics'].isnull()]
for index, row in rows_with_none_lyrics.iterrows():
    track_name = row['track_names']
    artist = row['first_artists']
    print(f"Song name: {track_name}, Artist name: {artist}")

track_names       0
first_artists     0
lyrics           53
dtype: int64
Song name: Thank You (Falettinme Be Mice Elf Agin) - Single Version, Artist name: Sly & The Family Stone
Song name: Cissy Strut, Artist name: The Meters
Song name: Blood Milk Moon, Artist name: Hermanos Gutiérrez
Song name: Low Sun, Artist name: Hermanos Gutiérrez
Song name: Sonido Cósmico, Artist name: Hermanos Gutiérrez
Song name: Awake, Artist name: Tycho
Song name: In My Baby's Arms, Artist name: The California Honeydrops
Song name: Floating, Artist name: LANKS
Song name: Answering Machine, Artist name: Ruby Haunt
Song name: I'll Wait for You, Artist name: Tsuwavii
Song name: Leap Of Faith, Artist name: Kurtis Wells
Song name: All The Things I Couldn't Say To You, Artist name: Busty and the Bass
Song name: the way you looked, Artist name: juno roome
Song name: Home - 2019 - Remaster, Artist name: Edward Sharpe & The Magnetic Zeros
Song name: Valerie (feat. Amy Winehouse) - Version Revisited, Artist name: Mark 

In [146]:
print(cleaned_df.isnull().sum())
df_size = cleaned_df.shape
print("Number of rows:", df_size[0]) # We dropped 21 songs due to different languages or we couldn't find their lyrics -> 1828 to 1807.
cleaned_df.to_csv('nlp_original.csv', index = False)

track_names      0
first_artists    0
lyrics           0
dtype: int64
Number of rows: 1807
