# Insert data story name here
By Nicole Mbithe, ncm2144

# Step 1: Import/download the necessary libraries

 Note that if these are not installed on your computer you might want to install them before running the notebook. The easiest way is to "pip install " on your computer.

In [48]:
import nltk 
import string
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('vader_lexicon')
from nltk.corpus import stopwords
en_stopwords = set(stopwords.words('english')) 
import pandas as pd


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/nicolembithe/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/nicolembithe/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/nicolembithe/nltk_data...


# Step 2: Reading the data, data cleaning and preparation for analysis

In [70]:
#Load the data in. This creates a dataframe with all the data
lyrics = pd.read_csv("data/lyrics.csv")

#Helper function to help clean the data
def preprocess(v):
    
    #tokenize the sentences
    x = nltk.word_tokenize(v)
    
    #remove punctuation
    x = [word for word in x if word not in string.punctuation]
    
    #remove stopwords
    x = [word.lower() for word in x if word not in  en_stopwords]
    
    return x

#Also want to get the sentiment score for each song based on the sentences in the song and add a column with
sid = SentimentIntensityAnalyzer()
def get_sentiments(v):
    
    #get the sentences in the song
    x = nltk.sent_tokenize(v)
    
    #get the seniment score for the each sentence and track the max and the min
    max_sent = -float('inf')
    min_sent = float('inf')
    
    for sentence in x:
        score = sid.polarity_scores(sentence)['compound']
        max_sent = score if score > max_sent else max_sent
        min_sent = score if score < min_sent else min_sent

    
    return pd.Series([max_sent, min_sent], index=['max_sent', 'min_sent'])
    
#get the sentiment scores for each song
lyrics = pd.concat([lyrics, lyrics['lyrics'].apply(get_sentiments)], axis = 1)

#get the cleaned words from the song lyrics
lyrics['lyrics'] = lyrics['lyrics'].apply(preprocess)

# Step 3: Exploratory analysis of the data

In [75]:
#Do a word cloud by genre
#Check max hapiness scored by genre
#Check max hapiness score by the year and observes the gtresn
#Determine what is the most emotional genre in music
lyrics.n
lyrics[(lyrics.genre == 'Hip-Hop')][['year', 'max_sent']].set_index('')

Unnamed: 0,year,max_sent
0,2009,0.6533
1,2009,0.7567
2,2007,0.8225
3,2007,-0.0258
4,2007,-0.9947
5,2007,0.9907
6,2013,0.7845
7,2013,0.7261
8,2005,0.9633
9,2005,-0.9776


In [51]:

# Maybe check male vs female singers
# How though
lyrics.head()

Unnamed: 0,song,year,artist,genre,lyrics,song words
0,when-you-were-with-me,2009,a,Hip-Hop,I stopped by the house we called our home\nIt ...,"[i, stopped, house, called, home, it, little, ..."
1,careless-whisper,2009,a,Hip-Hop,I feel so unsure\nAs I take your hand and lead...,"[i, feel, unsure, as, i, take, hand, lead, dan..."
2,2-59,2007,a,Hip-Hop,Mark:] Sunday football I got boot off the pitc...,"[mark, sunday, football, i, got, boot, pitch, ..."
3,power-of-desire,2007,a,Hip-Hop,[Chris:] Fallin' for a fantasy\nI threw away m...,"[chris, fallin, fantasy, i, threw, away, desti..."
4,you-re-not-in-love,2007,a,Hip-Hop,something in the way we touch\nyou hold my han...,"[something, way, touch, hold, hand, hold, back..."


In [42]:
N = 5
count = 0
count = 0 if count == N else N + 1  

In [44]:
count

6

In [56]:
type(sid.polarity_scores("I'm so happy")['compound'])

float

In [69]:
df = pd.concat([lyrics, lyrics], axis =1)
df.head()

Unnamed: 0,song,year,artist,genre,lyrics,song.1,year.1,artist.1,genre.1,lyrics.1
0,when-you-were-with-me,2009,a,Hip-Hop,I stopped by the house we called our home\nIt ...,when-you-were-with-me,2009,a,Hip-Hop,I stopped by the house we called our home\nIt ...
1,careless-whisper,2009,a,Hip-Hop,I feel so unsure\nAs I take your hand and lead...,careless-whisper,2009,a,Hip-Hop,I feel so unsure\nAs I take your hand and lead...
2,2-59,2007,a,Hip-Hop,Mark:] Sunday football I got boot off the pitc...,2-59,2007,a,Hip-Hop,Mark:] Sunday football I got boot off the pitc...
3,power-of-desire,2007,a,Hip-Hop,[Chris:] Fallin' for a fantasy\nI threw away m...,power-of-desire,2007,a,Hip-Hop,[Chris:] Fallin' for a fantasy\nI threw away m...
4,you-re-not-in-love,2007,a,Hip-Hop,something in the way we touch\nyou hold my han...,you-re-not-in-love,2007,a,Hip-Hop,something in the way we touch\nyou hold my han...
