### Find instructions here
#### https://github.com/johnwmillr/LyricsGenius/blob/master/README.md

In [None]:
# #Install lyrics genius module
# !pip install lyricsgenius

In [1]:
#Import module and config with token
import requests
import pandas as pd
from pprint import pprint
import lyricsgenius
from config import genius_token
genius = lyricsgenius.Genius(genius_token)


In [None]:
#Open top_love_songs csv file
top_song_df=pd.read_csv("csv/top_love_songs_1.csv")
top_song2_df=pd.read_csv("csv/top_love_songs_2.csv")

# merge data frames on song
top2000_lovesongs_df = pd.merge(top_song_df, top_song2_df, on=["Song Name", "Artist", "Album", "Duration", "Rank"], how="outer")
top2000_lovesongs_df


#add new columns to populate with year and lyrics
top2000_lovesongs_df["Year"]=""
top2000_lovesongs_df["Lyrics"]=""

# drop irrelevent columns
top2000_lovesongs_df=top2000_lovesongs_df.drop(["Unnamed: 0_x", "Unnamed: 0_y"], axis=1)
top2000_lovesongs_df


In [None]:
# #check song object attributes obtained through lyricsgenius
# dir(song)

In [None]:
#Search year for TOP2000 songs:

for index, row in top2000_lovesongs_df.iloc[0:2000].iterrows():
    title=row["Song Name"]
    artist=row["Artist"]
    print(f'Retrieving info for Index {index}: {title} by {artist}')
    
    try:       
        song = genius.search_song(title, artist=artist)
        top2000_lovesongs_df.loc[index, "Year"]= song.year        
        top2000_lovesongs_df.loc[index, "Lyrics"]= song.lyrics

        
    except:
        print(f'----- missing info for {title} by {artist}')

print("FIN")

In [None]:
# store new csv to prevent re-runnning API call
top2000_lovesongs_df.to_csv("top2000_yearlyrics.csv")


In [2]:
# Create dataframe from new csv
song_year_df=pd.read_csv("top2000_yearlyrics.csv")
song_year_df.head(5)

Unnamed: 0.1,Unnamed: 0,Rank,Song Name,Artist,Duration,Album,Year,Lyrics
0,0,1,Wonderwall,Oasis,259,(What's the Story) Morning Glory?,,
1,1,2,The Scientist,Coldplay,309,A Rush of Blood to the Head,2002-08-26,"[Verse 1]\nCome up to meet you, tell you I'm s..."
2,2,3,Yellow,Coldplay,267,Parachutes,2000-06-26,[Verse 1: Chris Martin]\nLook at the stars\nLo...
3,3,4,Fix You,Coldplay,294,X&Y,2005-06-06,[Verse 1: Chris Martin]\nWhen you try your bes...
4,4,5,Chasing Cars,Snow Patrol,0,Eyes Open,2006-06-06,[Verse 1]\nWe'll do it all\nEverything\nOn our...


In [3]:
# Split the song year on '-'
song_year_df[['Split Year', 'xyz', 'abc']] = song_year_df.Year.str.split("-",expand=True,)

# drop irrelevent columns
songs_df=song_year_df.drop(["xyz", "abc", "Year"], axis=1)

# drop na years
song_years_df = songs_df[songs_df['Split Year'].notna()]

song_years_df.head()

Unnamed: 0.1,Unnamed: 0,Rank,Song Name,Artist,Duration,Album,Lyrics,Split Year
1,1,2,The Scientist,Coldplay,309,A Rush of Blood to the Head,"[Verse 1]\nCome up to meet you, tell you I'm s...",2002
2,2,3,Yellow,Coldplay,267,Parachutes,[Verse 1: Chris Martin]\nLook at the stars\nLo...,2000
3,3,4,Fix You,Coldplay,294,X&Y,[Verse 1: Chris Martin]\nWhen you try your bes...,2005
4,4,5,Chasing Cars,Snow Patrol,0,Eyes Open,[Verse 1]\nWe'll do it all\nEverything\nOn our...,2006
5,5,6,Iris,Goo Goo Dolls,289,Dizzy Up the Girl,[Verse 1]\nAnd I'd give up forever to touch yo...,1998


In [4]:
#Drop weird columns created for don't know what reason
year_df=song_years_df.drop(["Unnamed: 0"], axis=1)

# Rename Split Year Column
year_df = year_df.rename(columns={"Split Year": "Year"})

year_df

Unnamed: 0,Rank,Song Name,Artist,Duration,Album,Lyrics,Year
1,2,The Scientist,Coldplay,309,A Rush of Blood to the Head,"[Verse 1]\nCome up to meet you, tell you I'm s...",2002
2,3,Yellow,Coldplay,267,Parachutes,[Verse 1: Chris Martin]\nLook at the stars\nLo...,2000
3,4,Fix You,Coldplay,294,X&Y,[Verse 1: Chris Martin]\nWhen you try your bes...,2005
4,5,Chasing Cars,Snow Patrol,0,Eyes Open,[Verse 1]\nWe'll do it all\nEverything\nOn our...,2006
5,6,Iris,Goo Goo Dolls,289,Dizzy Up the Girl,[Verse 1]\nAnd I'd give up forever to touch yo...,1998
...,...,...,...,...,...,...,...
1994,995,Breakthrough,Modest Mouse,0,This Is a Long Drive for Someone with Nothing ...,"[Verse 1]\nI got one two three four\nFive, six...",1996
1995,996,Saturday Night,Misfits,250,Famous Monsters,"[Intro]\nHm, hmm\nOh oh oh oh oh\nOh oh oh oh\...",1999
1996,997,You Wanted More,Tonic,249,Sugar,"[Verse 1]\nLove is tragic, love is bold\nYou w...",1999
1997,998,New Romantic,Laura Marling,172,My Manic And I,"[Verse 1]\n\nI know I said I love you, but I'm...",2007


In [5]:
# Cast Year strings to int
year_df['Year'] = year_df['Year'].astype(int)

year_df.dtypes

Rank          int64
Song Name    object
Artist       object
Duration      int64
Album        object
Lyrics       object
Year          int32
dtype: object

In [6]:
# bin years to see distribution
bins = [0, 1949, 1959, 1969, 1979, 1989, 1999, 2009, 2020]

# Create the names for the bins
labels = ["40s", "50s", "60s", "70s", "80s", "90s", "2000s", "2010s"]

year_df["bins"] = pd.cut(year_df["Year"], bins, labels=labels, include_lowest=True)

# count songs per decade
year_df["bins"].value_counts()

2000s    1034
2010s     315
90s       241
80s        79
60s        74
70s        58
50s         5
40s         1
Name: bins, dtype: int64

In [14]:
# Drop data from 40s & 50s, remove songs from 2000s not ranked in top 300
final_songs_df = year_df.loc[(year_df["bins"] != "40s") & (year_df["bins"] != "50s") & (year_df["bins"] != "2000s")]

songs_df = year_df.loc[(year_df["bins"]=='2000s') & (year_df["Rank"]<300)]

# merge songs from 2000s back into dataframe
final_df = pd.merge(songs_df, final_songs_df, on=["Song Name", "Artist", "Album", "Duration", "Rank", "Year", "bins", "Lyrics"], how="outer")
final_df


Unnamed: 0,Rank,Song Name,Artist,Duration,Album,Lyrics,Year,bins
0,2,The Scientist,Coldplay,309,A Rush of Blood to the Head,"[Verse 1]\nCome up to meet you, tell you I'm s...",2002,2000s
1,3,Yellow,Coldplay,267,Parachutes,[Verse 1: Chris Martin]\nLook at the stars\nLo...,2000,2000s
2,4,Fix You,Coldplay,294,X&Y,[Verse 1: Chris Martin]\nWhen you try your bes...,2005,2000s
3,5,Chasing Cars,Snow Patrol,0,Eyes Open,[Verse 1]\nWe'll do it all\nEverything\nOn our...,2006,2000s
4,7,I'm Yours,Jason Mraz,242,I'm Yours,"[Verse 1]\nWell, you done done me, and you bet...",2008,2000s
...,...,...,...,...,...,...,...,...
1105,992,Hard to Say I'm Sorry,Chicago,232,The Chicago Story,[Verse 1]\nEverybody needs a little time away\...,1983,80s
1106,994,Haunted,Taylor Swift,245,Speak Now,[Verse 1]\nYou and I walk a fragile line\nI ha...,2010,2010s
1107,995,Breakthrough,Modest Mouse,0,This Is a Long Drive for Someone with Nothing ...,"[Verse 1]\nI got one two three four\nFive, six...",1996,90s
1108,996,Saturday Night,Misfits,250,Famous Monsters,"[Intro]\nHm, hmm\nOh oh oh oh oh\nOh oh oh oh\...",1999,90s


In [15]:
final_df.to_csv("csv/final_data.csv")

In [17]:
final_df["bins"].value_counts()


# ##OPTIONAL STEPS FOUND ONLINE

# #Add the song to the artist object:
# artist.add_song(song)

# #Save the artist's songs to a JSON file:
# artist.save_lyrics(f'Lyrics/{artist}_{song}_lyrics')    #the name will be "Lyrics_{ArtistName}.json" (example: Lyrics_Cher.json)

# #Open your json file
# Artist=pd.read_json("Lyrics_Cher.json")

2000s    343
2010s    315
90s      241
80s       79
60s       74
70s       58
50s        0
40s        0
Name: bins, dtype: int64