# Music / Lyric Analyzer

## Webscrapping the songs from Wikipedia (1959 - 2021)
#### Notice: There will be a part 2, where i fetch all the lyrics from a site called https://www.mldb.org

OBS:! I saw that most songs have a link. With some aditional data. Genre, Length, Label, Released.
Also all most of singers actually!

In [82]:
import bs4
import requests

def get_top100_song_year(year=2021):
    """This function will return..."""
    result_dict = dict()
    
    url = "https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_" + str(year)
    
    r = requests.get(url)
    r.raise_for_status()
    soup = bs4.BeautifulSoup(r.text, 'html.parser')
    
    table = soup.select('.wikitable > tbody > tr')
    
    for el in table:
        td = el.select('td')
        if(len(td) > 0):
            number = td[0].text.rstrip('\n')
            title = td[1].text.rstrip('\n')
            artist = extract_artists(td[2])
            title_url = extract_title_url(td[1])
            result_dict[number] = [title[1:-1], artist[0], artist[1], title_url]
    
    return result_dict

def extract_artists(element):
    """This function will return..."""
    
    if(element.find('a')):
        result_list = [x.text.rstrip('\n') for x in element.select('a')]
    elif(element.find('li')):
        result_list = [x.text.rstrip('\n') for x in element.select('li')]
    else:
        # A handfull of songs are listed wrongly. So I have to check for the word 'featuring'
        result = element.text.rstrip('\n')
        if('featuring' in result):
            result_list = result.split(' featuring ')
        else:
            result_list =  [result]
    
     
    
    # Need to add a blank spot if there is no featuring artist.
    if(len(result_list) < 2):
        result_list.append("")
    
    return [artist for artist in result_list][:2]

def extract_title_url(element):
    """This function will return..."""
    
    if(element.find('a')):
        return element.select('a')[0].get('href')
    else:
        return ""

In [101]:
def get_all_top100_song_period(start=1959, end=2021):
    """This function will return..."""
    result_dict = dict()
    for i in range(start, end + 1):
        result_dict[i] = get_top100_song_year(i)
    return result_dict

        
billboard_data = get_all_top100_song_period(1959, 2021)
print("Size of Data:",len(billboard_data))
print("Example (2021):")
billboard_data.get(2021)


Size of Data: 63
Example (2021):


{'1': ['Levitating', 'Dua Lipa', '', '/wiki/Levitating_(song)'],
 '2': ['Save Your Tears',
  'The Weeknd',
  'Ariana Grande',
  '/wiki/Save_Your_Tears'],
 '3': ['Blinding Lights', 'The Weeknd', '', '/wiki/Blinding_Lights'],
 '4': ['Mood', '24kGoldn', 'Iann Dior', '/wiki/Mood_(song)'],
 '5': ['Good 4 U', 'Olivia Rodrigo', '', '/wiki/Good_4_U'],
 '6': ['Kiss Me More', 'Doja Cat', 'SZA', '/wiki/Kiss_Me_More'],
 '7': ['Leave the Door Open',
  'Silk Sonic',
  'Bruno Mars',
  '/wiki/Leave_the_Door_Open'],
 '8': ['Drivers License',
  'Olivia Rodrigo',
  '',
  '/wiki/Drivers_License_(song)'],
 '9': ['Montero (Call Me by Your Name)',
  'Lil Nas X',
  '',
  '/wiki/Montero_(Call_Me_by_Your_Name)'],
 '10': ['Peaches',
  'Justin Bieber',
  'Daniel Caesar',
  '/wiki/Peaches_(Justin_Bieber_song)'],
 '11': ['Butter', 'BTS', '', '/wiki/Butter_(song)'],
 '12': ['Stay',
  'The Kid Laroi',
  '',
  '/wiki/Stay_(The_Kid_Laroi_and_Justin_Bieber_song)'],
 '13': ['Deja Vu',
  'Olivia Rodrigo',
  '',
  '/wiki/D

## Turn it into a dataframe (IMPORTANT)

In [109]:
import pandas as pd

def make_data_into_dataframe(billboard_data):
    data_list = []
    for year, value in billboard_data.items():
        for place, song in value.items():
            list_song = [year, place, song[0], song[1], song[2], song[3]]
            data_list.append(list_song)
    return pd.DataFrame(data_list, columns=['Year', 'Place', 'Title', 'Artist', 'Featuring','Title_url'])
   
make_data_into_dataframe(billboard_data)

Unnamed: 0,Year,Place,Title,Artist,Featuring,Title_url
0,1959,1,The Battle of New Orleans,Johnny Horton,,/wiki/The_Battle_of_New_Orleans
1,1959,2,Mack the Knife,Bobby Darin,,/wiki/Mack_the_Knife
2,1959,3,Personality,Lloyd Price,,/wiki/Personality_(Lloyd_Price_song)
3,1959,4,Venus,Frankie Avalon,,/wiki/Venus_(Frankie_Avalon_song)
4,1959,5,Lonely Boy,Paul Anka,,/wiki/Lonely_Boy_(Paul_Anka_song)
...,...,...,...,...,...,...
6296,2021,96,Things a Man Oughta Know,Lainey Wilson,,/wiki/Things_a_Man_Oughta_Know
6297,2021,97,Throat Baby (Go Baby),BRS Kash,,/wiki/Throat_Baby_(Go_Baby)
6298,2021,98,Tombstone,Rod Wave,,/wiki/Tombstone_(song)
6299,2021,99,Drinkin' Beer. Talkin' God. Amen.,Chase Rice,Florida Georgia Line,/wiki/Drinkin%27_Beer._Talkin%27_God._Amen.


## Save as CSV. So i dont have to scrape it everyday..

In [107]:
#df_to_csv = pd.DataFrame.from_dict(billboard_data) 
make_data_into_dataframe(billboard_data).to_csv ('data/raw_top100_1959_2021.csv', index = False, header=True)

## Load Data

In [108]:
import pandas as pd
df = pd.read_csv('data/raw_top100_1959_2021.csv' )
df

Unnamed: 0,Year,Place,Title,Artist,Featuring,Title_url
0,1959,1,The Battle of New Orleans,Johnny Horton,,/wiki/The_Battle_of_New_Orleans
1,1959,2,Mack the Knife,Bobby Darin,,/wiki/Mack_the_Knife
2,1959,3,Personality,Lloyd Price,,/wiki/Personality_(Lloyd_Price_song)
3,1959,4,Venus,Frankie Avalon,,/wiki/Venus_(Frankie_Avalon_song)
4,1959,5,Lonely Boy,Paul Anka,,/wiki/Lonely_Boy_(Paul_Anka_song)
...,...,...,...,...,...,...
6296,2021,96,Things a Man Oughta Know,Lainey Wilson,,/wiki/Things_a_Man_Oughta_Know
6297,2021,97,Throat Baby (Go Baby),BRS Kash,,/wiki/Throat_Baby_(Go_Baby)
6298,2021,98,Tombstone,Rod Wave,,/wiki/Tombstone_(song)
6299,2021,99,Drinkin' Beer. Talkin' God. Amen.,Chase Rice,Florida Georgia Line,/wiki/Drinkin%27_Beer._Talkin%27_God._Amen.


## Testing the data:

In [5]:
df[df['Title'] == '"Venus"']

Unnamed: 0,Year,Place,Title,Artist,Title_url
3,1959,4,"""Venus""",Frankie Avalon,/wiki/Venus_(Frankie_Avalon_song)
1133,1970,33,"""Venus""",Shocking Blue,/wiki/Venus_(Shocking_Blue_song)
2738,1986,38,"""Venus""",Bananarama,/wiki/Venus_(Shocking_Blue_song)


In [96]:
df[df['Artist'] == 'The Beatles']

Unnamed: 0,Year,Place,Title,Artist,Featuring,Title_url
500,1964,1,I Want to Hold Your Hand,The Beatles,,/wiki/I_Want_to_Hold_Your_Hand
501,1964,2,She Loves You,The Beatles,,/wiki/She_Loves_You
512,1964,13,A Hard Day's Night,The Beatles,,/wiki/A_Hard_Day%27s_Night_(song)
513,1964,14,Love Me Do,The Beatles,,/wiki/Love_Me_Do
515,1964,16,Please Please Me,The Beatles,,/wiki/Please_Please_Me_(song)
539,1964,40,Twist and Shout,The Beatles,,/wiki/Twist_and_Shout
551,1964,52,Can't Buy Me Love,The Beatles,,/wiki/Can%27t_Buy_Me_Love
554,1964,55,Do You Want to Know a Secret,The Beatles,,/wiki/Do_You_Want_to_Know_a_Secret
594,1964,95,I Saw Her Standing There,The Beatles,,/wiki/I_Saw_Her_Standing_There
606,1965,7,Help!,The Beatles,,/wiki/Help!_(song)


#### Issue with songs with featuring artists. Need to refactor the code:

In [21]:
# How many songs have a featurin artist for the song:
print("Featuring artists: ",df['Artist'].str.contains('featuring').sum())
print("AND in the name: ",df['Artist'].str.contains('and').sum()) # OBS.: This could also be Andrew, Andy etc.. not valid!


Featuring artists:  718
AND in the name:  586


#### Testing featuring after refactoring the web scrapper:

In [88]:
# How many songs have a featurin artist for the song:
print("Featuring artists: ",df['Artist'].str.contains('featuring').sum())
print("AND in the name: ",df['Artist'].str.contains('and').sum()) # OBS.: This could also be Andrew, Andy etc.. not valid!

Featuring artists:  4
AND in the name:  273


In [86]:
df[df['Artist'].str.contains('featuring')]

Unnamed: 0,Year,Place,Title,Artist,Featuring,Title_url
6140,2020,40,High Fashion,Roddy Ricch featuring Mustard,,/wiki/High_Fashion_(song)
6190,2020,90,Rags2Riches,Rod Wave featuring ATR Son Son,,/wiki/Rags2Riches_(song)
6247,2021,47,Wants and Needs,Drake featuring Lil Baby,,/wiki/Wants_and_Needs
6277,2021,77,Cry Baby,Megan Thee Stallion featuring DaBaby,,/wiki/Cry_Baby_(Megan_Thee_Stallion_song)


In [87]:
# It's usually used if there is a third featuring person/band. DRAKE featuring 21 SAVAGE and PROJECT PAT
# Might have to change the way i crawl the data!
df[df['Artist'].str.contains('and')]

Unnamed: 0,Year,Place,Title,Artist,Featuring,Title_url
24,1959,25,A Teenager in Love,Dion and the Belmonts,,/wiki/A_Teenager_in_Love
30,1959,31,Red River Rock,Johnny and the Hurricanes,,/wiki/Red_River_Valley_(song)
35,1959,36,Teen Beat,Sandy Nelson,,/wiki/Teen_Beat_(song)
59,1959,60,Tell Him No,Travis and Bob,,/wiki/Tell_Him_No
74,1959,75,Petite Fleur,Chris Barber's Jazz Band,,/wiki/Petite_Fleur
...,...,...,...,...,...,...
6191,2020,91,Bluebird,Miranda Lambert,,/wiki/Bluebird_(Miranda_Lambert_song)
6214,2021,14,Positions,Ariana Grande,,/wiki/Positions_(song)
6243,2021,43,You Right,Doja Cat and the Weeknd,,/wiki/You_Right
6287,2021,87,POV,Ariana Grande,,/wiki/POV_(song)


In [90]:
df[df['Title'] == 'Leave the Door Open'] # This title's artist got '()' in the name....FIXED

Unnamed: 0,Year,Place,Title,Artist,Featuring,Title_url
6207,2021,7,Leave the Door Open,Silk Sonic,Bruno Mars,/wiki/Leave_the_Door_Open


In [92]:
df[df['Title'] == 'Peaches'] # This title's artist(s) ot featuring + and in the name. FIXED

Unnamed: 0,Year,Place,Title,Artist,Featuring,Title_url
6210,2021,10,Peaches,Justin Bieber,Daniel Caesar,/wiki/Peaches_(Justin_Bieber_song)


In [93]:
print("Number of songs: ",len(df))
print("Years: ", len(df["Year"].drop_duplicates()))

Number of songs:  6301
Years:  63


In [94]:
# Find all the no.1 songs

df[df['Place'] == "1"]

Unnamed: 0,Year,Place,Title,Artist,Featuring,Title_url
0,1959,1,The Battle of New Orleans,Johnny Horton,,/wiki/The_Battle_of_New_Orleans
100,1960,1,Theme from A Summer Place,Percy Faith,,/wiki/Theme_from_A_Summer_Place
200,1961,1,Tossin' and Turnin',Bobby Lewis,,/wiki/Tossin%27_and_Turnin%27
300,1962,1,Stranger on the Shore,Acker Bilk,,/wiki/Stranger_on_the_Shore
400,1963,1,Sugar Shack,Jimmy Gilmer and the Fireballs,,/wiki/Sugar_Shack
...,...,...,...,...,...,...
5801,2017,1,Shape of You,Ed Sheeran,,/wiki/Shape_of_You
5901,2018,1,God's Plan,Drake,,/wiki/God%27s_Plan_(song)
6001,2019,1,Old Town Road,Lil Nas X,Billy Ray Cyrus,/wiki/Old_Town_Road
6101,2020,1,Blinding Lights,The Weeknd,,/wiki/Blinding_Lights


In [43]:
songs_without_url = df[df['Title_url'].isnull()]

In [95]:
# Find songs that don't have a link.
            
print(len(songs_without_url), " out of ", len(df), " Songs.",  (len(songs_without_url) / len(df))*100, "%")
songs_without_url

74  out of  6301  Songs. 1.1744167592445642 %


Unnamed: 0,Year,Place,Title,Artist,Title_url
112,1960,13,"""Greenfields""",The Brothers Four,
139,1960,40,"""Way Down Yonder in New Orleans""",Freddy Cannon,
165,1960,66,"""It's Time to Cry""",Paul Anka,
181,1960,82,"""Lady Luck""",Lloyd Price,
182,1960,83,"""Step by Step""",The Crests,
...,...,...,...,...,...
4061,1999,61,"""If You""",Silk,
4081,1999,81,"""Faded Pictures""",Case featuring Joe,
4289,2001,89,"""I'm a Thug""",Trick Daddy,
4388,2002,88,"""Anything""",Jaheim featuring Next,


### Notes after testing

In [115]:
# Notice 1986: Dionne and Friends (Dionne Warwick, Gladys Knight, Elton John and Stevie Wonder)
# Need a a clever way to break that down.
# Also 2004, 2012 (and others) have "feature" or "and". Need to be able to break down aswell 
# 74 songs wont get extra data. Which is acceptable 

In [210]:
import pandas as pd

def make_data_into_dataframe(billboard_data):
    data_list = []
    for year, value in billboard_data.items():
        for place, song in value.items():
            list_song = [year, place, song[0], song[1], song[2]]
            data_list.append(list_song)
    return pd.DataFrame(data_list, columns=['Year', 'Place', 'Title', 'Artist', 'Title_url'])
   
df = make_data_into_dataframe(data)
df

Unnamed: 0,Year,Place,Title,Artist,Title_url
0,1959,1,"""The Battle of New Orleans""",Johnny Horton,/wiki/The_Battle_of_New_Orleans
1,1959,2,"""Mack the Knife""",Bobby Darin,/wiki/Mack_the_Knife
2,1959,3,"""Personality""",Lloyd Price,/wiki/Personality_(Lloyd_Price_song)
3,1959,4,"""Venus""",Frankie Avalon,/wiki/Venus_(Frankie_Avalon_song)
4,1959,5,"""Lonely Boy""",Paul Anka,/wiki/Lonely_Boy_(Paul_Anka_song)
...,...,...,...,...,...
6296,2021,96,"""Things a Man Oughta Know""",Lainey Wilson,/wiki/Things_a_Man_Oughta_Know
6297,2021,97,"""Throat Baby (Go Baby)""",BRS Kash,/wiki/Throat_Baby_(Go_Baby)
6298,2021,98,"""Tombstone""",Rod Wave,/wiki/Tombstone_(song)
6299,2021,99,"""Drinkin' Beer. Talkin' God. Amen.""",Chase Rice featuring Florida Georgia Line,/wiki/Drinkin%27_Beer._Talkin%27_God._Amen.


### Notes after converting:

In [110]:
# link for each songs lyrics 
# number of words in lyrics 
# (maybe) lenth of song? - DONE
# (maybe) Link for youtube video of the song - DONE

# Turn it into a csv file - DONE

## Get additional data from wikipedia link

In [113]:
import bs4
import requests
import re


def get_additional_song_data(link):
    result_dict = dict()
    
    url = "https://en.wikipedia.org/"+link
    
    r = requests.get(url)
    r.raise_for_status()
    soup = bs4.BeautifulSoup(r.text, 'html.parser')
    
    # Some songs have more than one infobox. We only want the first.
    infobox = soup.select('.infobox')
    table = infobox[0].select('tbody > tr')
    
    # All the data we want to extract (Will convert it into an Object!)
    released = ""
    genres = [""]
    length = ""
    label = ""
    total_labels = ""
    writer = [""]
    youtube = ""
    
    
    for el in table:

        
        if(el.find('th')):
            th = el.select('th')
            td = el.select('td')
            if(len(td) > 0):
                #print(th[0].text, td[0].text)
                match th[0].text:
                    case "Released":
                        released =  extract_released_year(td[0])
                        #print("Released", extract_released_year(td[0]))
                    case "Genre":
                        genres = extract_genre(td[0])
                        #print("Genre:", extract_genre(td[0]))
                    case "Length":
                        length = extract_length(td[0])
                        #print("Length", extract_length(td[0]))
                    case "Label":
                        label_data = extract_label(td[0])
                        label = label_data[0]
                        total_labels = label_data[1]
                        #print("Label", extract_label(td[0]))
                    case "Songwriter(s)":
                        writer = extract_songwriter(td[0])
                        #print("Songwriter(s)", extract_songwriter(td[0]))
                        
        # Search for YT link
        if(el.find('a', {'title': 'YouTube'})):
            youtube = el.find('a', {'class': 'external'}).get('href')
            #print(el.find('a', {'class': 'external'}).get('href'))
                    
                
    return [released, genres, length, label, total_labels, writer, youtube]
    
    

def extract_released_year(element):
    """This function will return the year the song was released. But tests shows that is different scenarios. 
    1. Multiple release days, release etc., 
    2. Sometimes only the Year and Month(or Just the year), 
    
    I am using regex and split to make a list of 4 digit numbers and only returns index 0(Incase there was a rerelease later)
    """    
    years_reg = re.compile(r"\b(19|20)\d{2}\b")
        
    return [year for year in element.text.split() if re.search(years_reg, year)][0]
    
                        
def extract_genre(element):
    """This function will return a list of up to 3 genres. But tests shows that is different scenarios. 
    1. Only one Genre, 
    2. Multiple genres in an unordered list(ul), 
    3. Multiple genres with a bookmark like this. Pop[1], Rock[2].
    
    Which is handle but simple if statements.
    """
    letters_reg = re.compile(r"[a-zA-Z]")    
    
    if(element.find('ul')):
        result_list = [x.text for x in element.select('a')]
    else:
        result_list = element.text.split()
        
    return [genre for genre in result_list if re.search(letters_reg, genre)][:3]
    
    
def extract_length(element):
    """This function will return the length of the song. In some cases there is a single and an album version etc.,
    but i decided to return just the longest version. 
    1. The function splits the text elements into a list if it contains ':'.
    2. It cleans up the result for any letters or symbols(not ':' obviously).
    3. Return the max value."""

    # c = character   l = length
    lengths = ["".join([c for c in l if int(c.isnumeric()) or c == ":"]) for l in element.text.split() if ":" in l] 
    
    return max(lengths)
    
def extract_label(element):
    """This function will return only the first mentioned Label. Some singers/song change labels for whatever reason.
    Sometimes up tp two or three times. For simplicity I only return the first mentioned and the amount of different
    labels that the song have had. Maybe we can use that information for something interresting."""
    
    if(element.find('a')):
        result_list = [x.text for x in element.select('a')]
    elif(element.find('li')):
        result_list = [x.text for x in element.select('li')]
    else:
        result_list = element.text.split()
    
    # Returning originale label and the amount of different labels.
    return [[label for label in result_list][0], len(result_list)]

def extract_songwriter(element):
    """This function will return only the first mentioned songwriter. Some singers/song change labels for whatever reason.
    Sometimes up tp two or three times. For simplicity I only return the first mentioned and the amount of different
    labels that the song have had. Maybe we can use that information for something interresting."""
    
    if(element.find('a')):
        result_list = [x.text for x in element.select('a')]
        
    else:
        result_list = re.split('; |,',element.text) 
    
    # Returning originale label and the amount of different labels.
    return [writer for writer in result_list][:2]
    
    
    
get_additional_song_data("/wiki/Thrift_Shop")

# Some songs have up to 7 genres! Limit it to 3!?
# Length can be different aswell, album and single (Think i should go with the longest one)
# Multiple recording days. (Take the earliest)
# More than one label! (Like Wham - Careless whispers...)
# Song writers is often more than one. Some weird ones like Andy Gibb - shadow song:  "Barry, Robin & Maurice Gibb; Andy Gibb"

['2012',
 ['Pop-rap', 'comedy hip hop'],
 '3:55',
 'Macklemore LLC',
 1,
 ['Ben Haggerty', 'Ryan Lewis'],
 'https://www.youtube.com/watch?v=QK8mJJJvaes']

In [106]:
test1 = "/wiki/The_First_Time_Ever_I_Saw_Your_Face" # 2 different lenghts + label also have a number after
test2 = "/wiki/When_Doves_Cry" # 7 different genres! And 3  dates..
test3 = "/wiki/Shadow_Dancing_(song)" # Songwriters: "Barry, Robin & Maurice Gibb; Andy Gibb"
test4 = "/wiki/Careless_Whisper" # Genre has this [1] etc + multiple labels
test5 = "/wiki/The_Way_We_Were_(song)" # No genre
test6 = "/wiki/The_Sign_(song)" # the genres prints like this: Techno-reggaepopEuropop (Make sure it's splits correctly)

get_additional_song_data(test1)
get_additional_song_data(test2)
get_additional_song_data(test3)
get_additional_song_data(test4)
get_additional_song_data(test5)
get_additional_song_data(test6)

Released 1972
Genre: ['Soul', 'vocal jazz']
Length 5:22
Label ['Atlantic', 2]
Songwriter(s) ['Ewan MacColl']
Released 1984
Genre: ['Experimental pop', 'neo-psychedelia', 'soul']
Length 5:52
Label ['Warner', 2]
Songwriter(s) ['Prince']
https://www.youtube.com/watch?v=UG3VcCAlUgE
Released 1978
Genre: ['Disco']
Length 4:34
Label ['RSO', 2]
Songwriter(s) ['Barry, Robin & Maurice Gibb', 'Andy Gibb']
Released 1984
Genre: ['Pop', 'soul', 'R&B']
Length 6:30
Label ['Epic', 3]
Songwriter(s) ['George Michael', 'Andrew Ridgeley']
https://www.youtube.com/watch?v=izGwDsrQ1eQ
Released 1973
Length 3:29
Label ['Columbia', 1]
Songwriter(s) ['Alan Bergman', 'Marilyn Bergman']
Released 1993
Genre: ['Techno-reggae', 'pop', 'Europop']
Length 3:08
Label ['Arista', 2]
Songwriter(s) ['Jonas Berggren']
https://www.youtube.com/watch?v=iqu132vTl5Y


In [13]:
list123 = [['"The Battle of New Orleans"', 'Johnny Horton', '/wiki/The_Battle_of_New_Orleans']
,['"Theme from A Summer Place"', 'Percy Faith', '/wiki/Theme_from_A_Summer_Place']
,['"Tossin\' and Turnin\'"', 'Bobby Lewis', '/wiki/Tossin%27_and_Turnin%27']
,['"Stranger on the Shore"', 'Acker Bilk', '/wiki/Stranger_on_the_Shore']
,['"Sugar Shack"', 'Jimmy Gilmer and the Fireballs', '/wiki/Sugar_Shack']
,['"I Want to Hold Your Hand"', 'The Beatles', '/wiki/I_Want_to_Hold_Your_Hand']
,['"Wooly Bully"', 'Sam the Sham and the Pharaohs', '/wiki/Wooly_Bully']
,['"California Dreamin\'"', 'The Mamas & the Papas', '/wiki/California_Dreamin%27']
,['"To Sir With Love"', 'Lulu', '/wiki/To_Sir_with_Love_(song)']
,['"Hey Jude"', 'The Beatles', '/wiki/Hey_Jude']
,['"Sugar, Sugar"', 'The Archies', '/wiki/Sugar,_Sugar']
,['"Bridge Over Troubled Water"', 'Simon & Garfunkel', '/wiki/Bridge_Over_Troubled_Water_(song)']
,['"Joy to the World"', 'Three Dog Night', '/wiki/Joy_to_the_World_(Hoyt_Axton_song)']
,['"The First Time Ever I Saw Your Face"', 'Roberta Flack', '/wiki/The_First_Time_Ever_I_Saw_Your_Face']
,['"Tie a Yellow Ribbon Round the Ole Oak Tree"', 'Tony Orlando and Dawn', '/wiki/Tie_a_Yellow_Ribbon_Round_the_Ole_Oak_Tree']
,['"The Way We Were"', 'Barbra Streisand', '/wiki/The_Way_We_Were_(song)']
,['"Love Will Keep Us Together"', 'Captain & Tennille', '/wiki/Love_Will_Keep_Us_Together#Captain_&_Tennille_version']
,['"Silly Love Songs"', 'Wings', '/wiki/Silly_Love_Songs']
,['"Tonight\'s the Night (Gonna Be Alright)"', 'Rod Stewart', '/wiki/Tonight%27s_the_Night_(Gonna_Be_Alright)']
,['"Shadow Dancing"', 'Andy Gibb', '/wiki/Shadow_Dancing_(song)']
,['"My Sharona"', 'The Knack', '/wiki/My_Sharona']
,['"Call Me"', 'Blondie', '/wiki/Call_Me_(Blondie_song)']
,['"Bette Davis Eyes"', 'Kim Carnes', '/wiki/Bette_Davis_Eyes']
,['"Physical"', 'Olivia Newton-John', '/wiki/Physical_(Olivia_Newton-John_song)']
,['"Every Breath You Take"', 'The Police', '/wiki/Every_Breath_You_Take']
,['"When Doves Cry"', 'Prince', '/wiki/When_Doves_Cry']
,['"Careless Whisper"', 'Wham!', '/wiki/Careless_Whisper']
,['"That\'s What Friends Are For"', 'Dionne and Friends (Dionne Warwick, Gladys Knight, Elton John and Stevie Wonder)', '/wiki/That%27s_What_Friends_Are_For']
,['"Walk Like An Egyptian"', 'The Bangles', '/wiki/Walk_Like_An_Egyptian']
,['"Faith"', 'George Michael', '/wiki/Faith_(George_Michael_song)']
,['"Look Away"', 'Chicago', '/wiki/Look_Away']
,['"Hold On"', 'Wilson Phillips', '/wiki/Hold_On_(Wilson_Phillips_song)']
,['"(Everything I Do) I Do It for You"', 'Bryan Adams', '/wiki/(Everything_I_Do)_I_Do_It_for_You']
,['"End of the Road"', 'Boyz II Men', '/wiki/End_of_the_Road_(Boyz_II_Men_song)']
,['"I Will Always Love You"', 'Whitney Houston', '/wiki/I_Will_Always_Love_You#Whitney_Houston_version']
,['"The Sign"', 'Ace of Base', '/wiki/The_Sign_(song)']
,['"Gangsta\'s Paradise"', 'Coolio featuring L.V.', '/wiki/Gangsta%27s_Paradise']
,['"Macarena (Bayside Boys Mix)"', 'Los del Río', '/wiki/Macarena_(song)']
,['"Candle in the Wind 1997" / "Something About the Way You Look Tonight"', 'Elton John', '/wiki/Candle_in_the_Wind_1997']
,['"Too Close"', 'Next', '/wiki/Too_Close_(Next_song)']
,['"Believe"', 'Cher', '/wiki/Believe_(Cher_song)']
,['"Breathe"', 'Faith Hill', '/wiki/Breathe_(Faith_Hill_song)']
,['"Hanging by a Moment"', 'Lifehouse', '/wiki/Hanging_by_a_Moment']
,['"How You Remind Me"', 'Nickelback', '/wiki/How_You_Remind_Me']
,['"In da Club"', '50 Cent', '/wiki/In_da_Club']
,['"Yeah!"', 'Usher featuring Lil Jon and Ludacris', '/wiki/Yeah!_(Usher_song)']
,['"We Belong Together"', 'Mariah Carey', '/wiki/We_Belong_Together_(Mariah_Carey_song)']
,['"Bad Day"', 'Daniel Powter', '/wiki/Bad_Day_(Daniel_Powter_song)']
,['"Irreplaceable"', 'Beyoncé', '/wiki/Irreplaceable']
,['"Low"', 'Flo Rida featuring T-Pain', '/wiki/Low_(Flo_Rida_song)']
,['"Boom Boom Pow"', 'The Black Eyed Peas', '/wiki/Boom_Boom_Pow']
,['"Tik Tok"', 'Kesha', '/wiki/Tik_Tok_(song)']
,['"Rolling in the Deep"', 'Adele', '/wiki/Rolling_in_the_Deep']
,['"Somebody That I Used to Know"', 'Gotye featuring Kimbra', '/wiki/Somebody_That_I_Used_to_Know']
,['"Thrift Shop"', 'Macklemore & Ryan Lewis featuring Wanz', '/wiki/Thrift_Shop']
,['"Happy"', 'Pharrell Williams', '/wiki/Happy_(Pharrell_Williams_song)']
,['"Uptown Funk"', 'Mark Ronson featuring Bruno Mars', '/wiki/Uptown_Funk']
,['"Love Yourself"', 'Justin Bieber', '/wiki/Love_Yourself']
,['"Shape of You"', 'Ed Sheeran', '/wiki/Shape_of_You']
,['"God\'s Plan"', 'Drake', '/wiki/God%27s_Plan_(song)']
,['"Old Town Road"', 'Lil Nas X featuring Billy Ray Cyrus', '/wiki/Old_Town_Road']
,['"Blinding Lights"', 'The Weeknd', '/wiki/Blinding_Lights']
,['"Levitating"', 'Dua Lipa', '/wiki/Levitating_(song)']]

In [114]:
for song in list123:
    print("================")
    print(song[1],song[0])
    print(get_additional_song_data(song[2]))

Johnny Horton "The Battle of New Orleans"
['1959', ['Country'], '2:33', 'Columbia', 1, ['Jimmy Driftwood'], '']
Percy Faith "Theme from A Summer Place"
['1959[4]', ['Easy', 'listening'], '2:25', 'Columbia Records', 1, ['Max Steiner'], 'https://www.youtube.com/watch?v=fRV0yHiEua8']
Bobby Lewis "Tossin' and Turnin'"
['1961', ['R&B'], '2:29', 'Beltone', 1, ['Ritchie Adams'], '']
Acker Bilk "Stranger on the Shore"
['1961', ['Easy', 'listening', 'Jazz'], '2:52', 'Columbia', 2, ['Acker Bilk', 'Robert Mellin'], '']
Jimmy Gilmer and the Fireballs "Sugar Shack"
['1963', ['Garage', 'rock,', 'rock'], '2:00', 'Dot', 1, ['Keith McCormack'], '']
The Beatles "I Want to Hold Your Hand"
['1963', ['Rock and roll', 'pop'], '2:24', 'Parlophone', 2, ['Lennon–McCartney'], 'https://www.youtube.com/watch?v=XT4pwRi2JmY']
Sam the Sham and the Pharaohs "Wooly Bully"
['1965', ['Rock and roll', 'garage rock'], '2:20', 'MGM', 1, ['Domingo Samudio'], '']
The Mamas & the Papas "California Dreamin'"
['1965', ['Sunshin

['2009', [''], '5:08', 'Interscope', 1, ['William Adams', 'Allan Pineda'], 'https://www.youtube.com/watch?v=4m48GqaOz90']
Kesha "Tik Tok"
['2009', ['Dance-pop', 'electropop'], '3:20', 'RCA', 1, ['Kesha Sebert', 'Dr. Luke'], 'https://www.youtube.com/watch?v=iP6XpLQM2Cs']
Adele "Rolling in the Deep"
['2010', ['Rhythm and blues', 'soul'], '3:48', 'XL', 2, ['Adele Adkins', 'Paul Epworth'], 'https://www.youtube.com/watch?v=rYEDA3JcQqw']
Gotye featuring Kimbra "Somebody That I Used to Know"
['2011', ['Art pop', 'alternative rock'], '4:04', 'Eleven', 1, ['Wally de Backer'], '']
Macklemore & Ryan Lewis featuring Wanz "Thrift Shop"
['2012', ['Pop-rap', 'comedy hip hop'], '3:55', 'Macklemore LLC', 1, ['Ben Haggerty', 'Ryan Lewis'], 'https://www.youtube.com/watch?v=QK8mJJJvaes']
Pharrell Williams "Happy"
['2013', ['Soul', 'neo soul'], '3:55', 'i Am Other', 2, ['Pharrell Williams'], 'https://www.youtube.com/watch?v=y6Sxv-sUYtM']
Mark Ronson featuring Bruno Mars "Uptown Funk"
['2014', ['Funk', 'pop

## Web scrape all the lyrics

## Textblob (For the lyrics)
- https://www.youtube.com/watch?v=ea4IadDRwuc&list=PL_92WMXSLe_-RkWW5zAQZ-gMdVqZ7T-_F&index=3
- https://textblob.readthedocs.io/en/dev/quickstart.html

In [150]:
# Research more