In [1]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
import numpy as np
import lda
from sklearn.feature_extraction.text import CountVectorizer

In [12]:
df = pd.read_csv('billboard_lyrics_1964-2015.csv', encoding = 'ISO-8859-1')
df.head(5)

Unnamed: 0,Rank,Song,Artist,Year,Lyrics,Source
0,1,wooly bully,sam the sham and the pharaohs,1965,sam the sham miscellaneous wooly bully wooly b...,3.0
1,2,i cant help myself sugar pie honey bunch,four tops,1965,sugar pie honey bunch you know that i love yo...,1.0
2,3,i cant get no satisfaction,the rolling stones,1965,,1.0
3,4,you were on my mind,we five,1965,when i woke up this morning you were on my mi...,1.0
4,5,youve lost that lovin feelin,the righteous brothers,1965,you never close your eyes anymore when i kiss...,1.0


In [3]:
df = df.dropna(axis = 0, how = 'any')
df.shape

(4913, 6)

In [4]:
df = df[df['Lyrics'].apply(len, 0) > 5]

In [5]:
df_6575 = df[(df['Year'] >= 1965) & (df['Year'] <= 1975)]

In [6]:
def get_topics(df0, n_topics, n_top_words):
    lists = df0.Lyrics.values.tolist()
    tokenized = [word_tokenize(doc.lower()) for doc in lists]
    docs = [[token for token in tokens if token.isalpha()] for tokens in tokenized]
    vocabulary = [word for doc in tokenized for word in doc]
    vocabulary = sorted(list(set(vocabulary)))
    wordnet = WordNetLemmatizer()
    def lemmatize(doc):
        return [wordnet.lemmatize(word) for word in word_tokenize(doc.lower())]
    count_vectorizer = CountVectorizer(stop_words=stopwords.words('english'),
                                   vocabulary=vocabulary,
                                   tokenizer=lemmatize)
    feature_matrix = count_vectorizer.fit_transform(lists)
    x = np.array(feature_matrix.todense())
    model = lda.LDA(n_topics=n_topics, n_iter=1000, random_state=1)
    model.fit(x)
    topic_word = model.topic_word_ 
    n_top_words = n_top_words
    for i, topic_dist in enumerate(topic_word):
         topic_words = np.array(vocabulary)[np.argsort(topic_dist)][:-n_top_words:-1]
         print('Topic {}: {}'.format(i, ' '.join(topic_words)))

In [7]:
df1 = df[(df['Year'] >= 1965) & (df['Year'] < 1977)]
df2 = df[(df['Year'] >= 1977) & (df['Year'] < 1989)]
df3 = df[(df['Year'] >= 1989) & (df['Year'] < 2001)]
df4 = df[(df['Year'] >= 2001)]
df1.head(5)

Unnamed: 0,Rank,Song,Artist,Year,Lyrics,Source
0,1,wooly bully,sam the sham and the pharaohs,1965,sam the sham miscellaneous wooly bully wooly b...,3.0
1,2,i cant help myself sugar pie honey bunch,four tops,1965,sugar pie honey bunch you know that i love yo...,1.0
3,4,you were on my mind,we five,1965,when i woke up this morning you were on my mi...,1.0
4,5,youve lost that lovin feelin,the righteous brothers,1965,you never close your eyes anymore when i kiss...,1.0
5,6,downtown,petula clark,1965,when youre alone and life is making you lonel...,1.0


In [8]:
get_topics(df1, 10, 20)

INFO:lda:n_documents: 1107
INFO:lda:vocab_size: 11137
INFO:lda:n_words: 128842
INFO:lda:n_topics: 10
INFO:lda:n_iter: 1000
INFO:lda:<0> log likelihood: -1327671
INFO:lda:<10> log likelihood: -1019385
INFO:lda:<20> log likelihood: -994156
INFO:lda:<30> log likelihood: -985351
INFO:lda:<40> log likelihood: -978809
INFO:lda:<50> log likelihood: -974006
INFO:lda:<60> log likelihood: -971607
INFO:lda:<70> log likelihood: -968472
INFO:lda:<80> log likelihood: -966424
INFO:lda:<90> log likelihood: -964871
INFO:lda:<100> log likelihood: -963127
INFO:lda:<110> log likelihood: -961678
INFO:lda:<120> log likelihood: -961001
INFO:lda:<130> log likelihood: -960359
INFO:lda:<140> log likelihood: -959856
INFO:lda:<150> log likelihood: -958653
INFO:lda:<160> log likelihood: -957718
INFO:lda:<170> log likelihood: -957505
INFO:lda:<180> log likelihood: -956968
INFO:lda:<190> log likelihood: -956720
INFO:lda:<200> log likelihood: -955802
INFO:lda:<210> log likelihood: -956234
INFO:lda:<220> log likelihoo

Topic 0: know dont love say time ill see im make cant go one never come want tell way mind away
Topic 1: get come youre boogie right dig dance let im ready well back doin yes baby floor good wont little
Topic 2: oh cant doo friend said dit got fire put come going light want call high aint sky enough funk
Topic 3: song na rock music sing im wan got band go roll black night keep play long round river yellow
Topic 4: got good aint woman im shes dont yeah ta man na know gon lovin « keep thing get baby
Topic 5: back home boy said man take bring got town ride train go mama one well dont around ya old
Topic 6: yeah hey like come oh shake say ah little uh lady bit want know ha thats huh water take
Topic 7: la oh time day night one life dream gone da never like lonely eye would love could left ha
Topic 8: let gim rain help sky summer day sun shine night sunshine like star come world hair dat awimoweh diamond
Topic 9: love baby na im girl gon oh sweet youre let like know wan feel little make nee

In [9]:
get_topics(df2, 10, 20)

INFO:lda:n_documents: 1133
INFO:lda:vocab_size: 12461
INFO:lda:n_words: 157037
INFO:lda:n_topics: 10
INFO:lda:n_iter: 1000
INFO:lda:<0> log likelihood: -1577952
INFO:lda:<10> log likelihood: -1235556
INFO:lda:<20> log likelihood: -1202556
INFO:lda:<30> log likelihood: -1189261
INFO:lda:<40> log likelihood: -1180621
INFO:lda:<50> log likelihood: -1175340
INFO:lda:<60> log likelihood: -1170399
INFO:lda:<70> log likelihood: -1166998
INFO:lda:<80> log likelihood: -1165516
INFO:lda:<90> log likelihood: -1163024
INFO:lda:<100> log likelihood: -1161000
INFO:lda:<110> log likelihood: -1159391
INFO:lda:<120> log likelihood: -1157140
INFO:lda:<130> log likelihood: -1156400
INFO:lda:<140> log likelihood: -1156731
INFO:lda:<150> log likelihood: -1154630
INFO:lda:<160> log likelihood: -1153653
INFO:lda:<170> log likelihood: -1153246
INFO:lda:<180> log likelihood: -1152171
INFO:lda:<190> log likelihood: -1151965
INFO:lda:<200> log likelihood: -1151442
INFO:lda:<210> log likelihood: -1150897
INFO:lda

Topic 0: love know youre cant baby feel im one time hold need make give want let heart tell oh little
Topic 1: got rock hot baby big roll burn ta well city get heat look like ive make come house n
Topic 2: night yeah tonight come light know new da right get go like away time youll miss party star easy
Topic 3: never im time ive could away heart know see way life go long ill day still ever u cry
Topic 4: oh ooh boy la youre uh wild beat bad one whoa heart angel like call huh lady yeah ah
Topic 5: dont want say know got youre aint take let time like need think wont good im make talk get
Topic 6: na gon wan baby let take im got oh feel youre make somebody know want yeah get way like
Topic 7: girl shes like little shake stand around em youre get red end theyre running woman town way dead luck
Topic 8: dance eye music keep dancing every round bop song people boogie hear night watching make sound well play go
Topic 9: get im come go dont back cant stop yeah let thing man keep hey groove hard

In [10]:
get_topics(df3, 10, 20)

INFO:lda:n_documents: 1144
INFO:lda:vocab_size: 17383
INFO:lda:n_words: 222610
INFO:lda:n_topics: 10
INFO:lda:n_iter: 1000
INFO:lda:<0> log likelihood: -2257414
INFO:lda:<10> log likelihood: -1794174
INFO:lda:<20> log likelihood: -1736938
INFO:lda:<30> log likelihood: -1713673
INFO:lda:<40> log likelihood: -1702872
INFO:lda:<50> log likelihood: -1696473
INFO:lda:<60> log likelihood: -1690804
INFO:lda:<70> log likelihood: -1685718
INFO:lda:<80> log likelihood: -1682220
INFO:lda:<90> log likelihood: -1679257
INFO:lda:<100> log likelihood: -1678106
INFO:lda:<110> log likelihood: -1676103
INFO:lda:<120> log likelihood: -1673489
INFO:lda:<130> log likelihood: -1671695
INFO:lda:<140> log likelihood: -1670080
INFO:lda:<150> log likelihood: -1669312
INFO:lda:<160> log likelihood: -1667848
INFO:lda:<170> log likelihood: -1665295
INFO:lda:<180> log likelihood: -1665077
INFO:lda:<190> log likelihood: -1664924
INFO:lda:<200> log likelihood: -1663608
INFO:lda:<210> log likelihood: -1662435
INFO:lda

Topic 0: take life oh world go hand day u see time yeah feel place believe away people power light fly
Topic 1: yeah baby let got like get want go come girl know ta oh body make love night good right
Topic 2: ya like im get yo nigga see back yall got em shit aint niggaz put wit money dat big
Topic 3: wa never back ever time remember said knew ive come didnt rain night im wish saw could coming away
Topic 4: know dont im got man girl say like make get aint cause little one play shes day thing think
Topic 5: da la jump duh ba everybody kick ah tu cuerpo alegria que long macarena ride party thong dee hey
Topic 6: na wan gon oh dont make im get go come really know got want tell youre boy mmm stop
Topic 7: love way like every night feel little dream youre day talk thats kiss sweet miss im step eye thing
Topic 8: love dont know im baby want youre one cant need heart time give tell cause see oh could say
Topic 9: ill love im never real anything make say forever wont promise give turn always ca

In [11]:
get_topics(df4, 10, 20)

INFO:lda:n_documents: 1466
INFO:lda:vocab_size: 22860
INFO:lda:n_words: 360078
INFO:lda:n_topics: 10
INFO:lda:n_iter: 1000
INFO:lda:<0> log likelihood: -3647666
INFO:lda:<10> log likelihood: -2962156
INFO:lda:<20> log likelihood: -2862843
INFO:lda:<30> log likelihood: -2828059
INFO:lda:<40> log likelihood: -2809661
INFO:lda:<50> log likelihood: -2795829
INFO:lda:<60> log likelihood: -2786397
INFO:lda:<70> log likelihood: -2779257
INFO:lda:<80> log likelihood: -2772909
INFO:lda:<90> log likelihood: -2768918
INFO:lda:<100> log likelihood: -2765202
INFO:lda:<110> log likelihood: -2761904
INFO:lda:<120> log likelihood: -2758923
INFO:lda:<130> log likelihood: -2756285
INFO:lda:<140> log likelihood: -2754338
INFO:lda:<150> log likelihood: -2752234
INFO:lda:<160> log likelihood: -2748829
INFO:lda:<170> log likelihood: -2748316
INFO:lda:<180> log likelihood: -2747075
INFO:lda:<190> log likelihood: -2745352
INFO:lda:<200> log likelihood: -2743435
INFO:lda:<210> log likelihood: -2743085
INFO:lda

Topic 0: na baby wan love got gon dont girl know want hey im let make like yeah cause ta get
Topic 1: go take make light let back get come head low slow walk turn around ill gone side watch fire
Topic 2: love im know one wa never time say dont way life could see youre ever thing back cant think
Topic 3: im cant dont like youre keep feel heart wont take away let cause know life without go make want
Topic 4: like nigga bitch get got shit dont im money fuck aint hoe give fly imma want know throw put
Topic 5: uh want ay ah know huh ooh ha dont got bay make ya youre baby e bad aah lose
Topic 6: im like girl got know ya aint get man cause em see da wit right tell bout hot back
Topic 7: get like let yeah rock shake dance turn body move come night got party hand go put club tonight
Topic 8: la boy dem got dat know bass di wiggle big want really time dirty bang yo see ridin 1
Topic 9: oh like whoa boom beautiful new night good youre feel every diamond ive hear bed sing im eye light
