# 50 Years of Music Trends

## Objective
* Analyze lyrics from billboard top 100 songs over 50 years to identify trends
* Statement: Has the sentiments of popular lyrics changed over time?

## Hypothesis
* Ha1 = the sentiments of popular lyrics has become more negative over time
* Ha2 = the sentiments of popular lyrics has become more positive over time
* Ho = no change in the sentiments of popular lyrics over time 

## Sources
* musixmatch source: https://developer.musixmatch.com/documentation/api-reference/track-lyrics-get <br/>
* musixmatch python: https://github.com/hudsonbrendon/python-musixmatch <br/>
* billboard python: https://github.com/guoguo12/billboard-charts<br/>

In [30]:
# Dependency library
import numpy as np
import pandas as pd
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

# API Calls
import billboard
from musixmatch import Musixmatch

# API Keys
from musixmatch_api import api_key

# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [25]:
# Generate a (pseudo) random list of (almost all) dates in string format to fit musixmatch parameter

date_list = []
min_year = 1968
error_counter = 0

for i in range(50):
    try:
        # RANDOM date generation
        month_rand = str(random.randint(1,12))
        if len(month_rand) ==1:
            # PAD single digit numbers with a leading 0
            month_rand = month_rand.zfill(2)
            
        day_rand = str(random.randint(1,28))
        if len(day_rand) == 1:
            day_rand = day_rand.zfill(2)
        
        # STRINGIFY the result for the musixmatch parameter
        date_rand = str(f'{min_year}-{month_rand}-{day_rand}')
        
        # APPEND result to date_list
        date_list.append(date_rand)
        
        #INCREMENT the year
        min_year = min_year + 1
        
    except ValueError:
        error_counter = error_counter + 1
    
date_list

['1968-05-13',
 '1969-09-19',
 '1970-02-09',
 '1971-05-02',
 '1972-03-14',
 '1973-06-12',
 '1974-10-04',
 '1975-08-15',
 '1976-08-11',
 '1977-04-01',
 '1978-10-06',
 '1979-01-11',
 '1980-09-15',
 '1981-11-04',
 '1982-08-26',
 '1983-07-24',
 '1984-07-20',
 '1985-08-21',
 '1986-02-23',
 '1987-01-08',
 '1988-11-23',
 '1989-02-19',
 '1990-09-08',
 '1991-02-03',
 '1992-09-05',
 '1993-09-23',
 '1994-11-28',
 '1995-09-11',
 '1996-08-05',
 '1997-04-04',
 '1998-09-04',
 '1999-06-28',
 '2000-03-25',
 '2001-12-21',
 '2002-02-19',
 '2003-09-03',
 '2004-11-20',
 '2005-01-18',
 '2006-05-12',
 '2007-06-23',
 '2008-10-14',
 '2009-06-11',
 '2010-07-12',
 '2011-03-13',
 '2012-10-07',
 '2013-02-03',
 '2014-08-25',
 '2015-07-17',
 '2016-12-25',
 '2017-04-27']

In [26]:
# Return top 100 billboard songs for each date in random list generated above

billboard_list = 'hot-100'
col_names = ['Song','Artist','Date']
chart_df = pd.DataFrame(columns = col_names)

for date in date_list:
    chart = billboard.ChartData(billboard_list,date)
    for row in chart:
        # EMPTY the list placeholder used to create the dataframe
        chart_list = []
        # CLEAN and convert the billboard object
        chart_list.append(str(row).replace("'","",1))
        temp_df = pd.DataFrame(chart_list)
        temp_df = temp_df[0].str.split("' by ",expand=True)
        temp_df = temp_df.rename(columns={0:"Song",1:"Artist"})
        temp_df['Date'] = date
        
        # APPEND the temp_df in the current loop location to the chart_df
        chart_df = chart_df.append(temp_df)

# REMOVE duplicates and RESET index from the resulting dataframe        
chart_df = chart_df.drop_duplicates().reset_index(drop=True)

# PREVIEW
print(len(chart_df))
chart_df.head()

5000


Unnamed: 0,Song,Artist,Date
0,Tighten Up,Archie Bell & The Drells,1968-05-13
1,Mrs. Robinson,Simon & Garfunkel,1968-05-13
2,Honey,Bobby Goldsboro,1968-05-13
3,"The Good, The Bad And The Ugly","Hugo Montenegro, His Orchestra And Chorus",1968-05-13
4,A Beautiful Morning,The Rascals,1968-05-13


In [27]:
# SPLIT the date values in the dataframe for plotting and analysis purposes

chart_df['Year'], chart_df['Month'], chart_df['Day'] = chart_df['Date'].str.split('-').str

# PREVIEW
chart_df.head()

Unnamed: 0,Song,Artist,Date,Year,Month,Day
0,Tighten Up,Archie Bell & The Drells,1968-05-13,1968,5,13
1,Mrs. Robinson,Simon & Garfunkel,1968-05-13,1968,5,13
2,Honey,Bobby Goldsboro,1968-05-13,1968,5,13
3,"The Good, The Bad And The Ugly","Hugo Montenegro, His Orchestra And Chorus",1968-05-13,1968,5,13
4,A Beautiful Morning,The Rascals,1968-05-13,1968,5,13


In [28]:
# Retrieve lyrics from MusixMatch API based on song and artist in above dataframe

musixmatch = Musixmatch(api_key)
lyrics_list = []
error_counter = 0

# LOOP through the data frame and use song title and artist name to search for lyrics in musixmatch
for x in range(len(chart_df)):
    
    # ERROR HANDLING in case a song queries returns 'null' from musixmatch
    try:
        # GRAB the lyrics based on location (iloc) in chart_df
        song_search = chart_df.iloc[x,0]
        artist_search = chart_df.iloc[x,1]
        lyrics = musixmatch.matcher_lyrics_get(q_artist=artist_search,
                                               q_track=song_search)['message']['body']['lyrics']['lyrics_body']
        
        # FORMATTING to truncate the nonsense at the end of the lyrics from MusixMatch
        song_length = len(lyrics)
        endpoint = len("******* This Lyrics is NOT for Commercial use *******\n(1409617829201)")
        lyrics = lyrics.replace("\n", " ")
        lyrics = str(lyrics[:song_length-endpoint])
        
        # APPEND lyrics to lyrics_list
        lyrics_list.append(lyrics)
        
    except:
        error_counter = error_counter + 1
        lyrics_list.append('MUSIXMATCH_NA')

# CREATE new column in chart_df
chart_df['Lyrics'] = lyrics_list
chart_df.head()

Unnamed: 0,Song,Artist,Date,Year,Month,Day,Lyrics
0,Tighten Up,Archie Bell & The Drells,1968-05-13,1968,5,13,MUSIXMATCH_NA
1,Mrs. Robinson,Simon & Garfunkel,1968-05-13,1968,5,13,MUSIXMATCH_NA
2,Honey,Bobby Goldsboro,1968-05-13,1968,5,13,MUSIXMATCH_NA
3,"The Good, The Bad And The Ugly","Hugo Montenegro, His Orchestra And Chorus",1968-05-13,1968,5,13,MUSIXMATCH_NA
4,A Beautiful Morning,The Rascals,1968-05-13,1968,5,13,MUSIXMATCH_NA


In [29]:
clean_chart_df = chart_df[chart_df['Lyrics'] != "" or != "MUSIXMATCH_NA"].reset_index(drop=True)
clean_chart_df

Unnamed: 0,Song,Artist,Date,Year,Month,Day,Lyrics
0,Tighten Up,Archie Bell & The Drells,1968-05-13,1968,05,13,MUSIXMATCH_NA
1,Mrs. Robinson,Simon & Garfunkel,1968-05-13,1968,05,13,MUSIXMATCH_NA
2,Honey,Bobby Goldsboro,1968-05-13,1968,05,13,MUSIXMATCH_NA
3,"The Good, The Bad And The Ugly","Hugo Montenegro, His Orchestra And Chorus",1968-05-13,1968,05,13,MUSIXMATCH_NA
4,A Beautiful Morning,The Rascals,1968-05-13,1968,05,13,MUSIXMATCH_NA
5,Cowboys To Girls,The Intruders,1968-05-13,1968,05,13,MUSIXMATCH_NA
6,Love Is All Around,The Troggs,1968-05-13,1968,05,13,MUSIXMATCH_NA
7,The Unicorn,The Irish Rovers,1968-05-13,1968,05,13,MUSIXMATCH_NA
8,Young Girl,The Union Gap Featuring Gary Puckett,1968-05-13,1968,05,13,MUSIXMATCH_NA
9,Do You Know The Way To San José,Dionne Warwick,1968-05-13,1968,05,13,MUSIXMATCH_NA


In [None]:
# Vader Sentiment Analysis on each song in the dataframe

# INITIALIZE a list to hold the sentiments
lyrics_sentiments = []

# ANALYZE the list
for y in range(len(lyrics_list)):
    results = analyzer.polarity_scores(clean_chart_df.iloc[y,6])
    compound = results["compound"]
    pos = results["pos"]
    neu = results["neu"]
    neg = results["neg"]
    lyrics_sentiments.append({"Compound": compound,
                              "Positive": pos,
                              "Negative": neg,
                              "Neutral": neu})

# CREATE a dataframe of sentiment analysis that will be appended to the chart_df
lyrics_sentiments_df = pd.DataFrame(lyrics_sentiments)    
    
# APPEND new columns containing the sentiment analysis    
clean_chart_df['Compound'] = lyrics_sentiments_df['Compound']
clean_chart_df['Positive'] = lyrics_sentiments_df['Positive']
clean_chart_df['Negative'] = lyrics_sentiments_df['Negative']
clean_chart_df['Neutral'] = lyrics_sentiments_df['Neutral']

# SAVE to a .csv output
clean_chart_df.to_csv('billboard_analysis.csv')

# VIEW resulting dataframe
clean_chart_df

In [None]:
chart_group = chart_df.groupby(["Year"]).mean()
chart_group_df = pd.DataFrame(chart_group).reset_index(drop=False)
chart_group_df

In [None]:
plt.rcParams["figure.figsize"] = (20,10)
plt.plot(chart_group_df['Year'], chart_group_df['Negative'], marker="o", linewidth=0 ,alpha=0.8, c='k', label="Negative")
plt.xticks(rotation=90)
plt.ylabel("Negative Sentiment Analysis")
plt.xlabel("Year")
plt.show

In [None]:
plt.rcParams["figure.figsize"] = (20,10)
plt.plot(chart_group_df['Year'], chart_group_df['Positive'], marker="x", linewidth=0 ,alpha=0.8, c='k', label="Negative")
plt.xticks(rotation=90)
plt.ylabel("Positive Sentiment Analysis")
plt.xlabel("Year")
plt.show

In [None]:
plt.rcParams["figure.figsize"] = (20,10)
plt.plot(chart_group_df['Year'], chart_group_df['Neutral'], marker="^", linewidth=0 ,alpha=0.8, c='k', label="Negative")
plt.xticks(rotation=90)
plt.ylabel("Neutral Sentiment Analysis")
plt.xlabel("Year")
plt.show

In [None]:
plt.rcParams["figure.figsize"] = (20,10)
plt.plot(chart_group_df['Year'], chart_group_df['Compound'], marker="s", linewidth=0 ,alpha=0.8, c='k', label="Negative")
plt.xticks(rotation=90)
plt.ylabel("Compound Sentiment Analysis")
plt.xlabel("Year")
plt.show