In [247]:
import time

import os, sys
import pandas as pd, numpy as np

import datetime

In [2]:
__scrobbles__ = 'downloads/gps56-2.csv'

In [3]:
pd.options.mode.chained_assignment = None

In [4]:
weekday_map = {0: 'monday',
              1: 'tuesday', 
              2: 'wednesday',
              3: 'thursday', 
              4: 'friday', 
              5: 'saturday', 
              6: 'sunday'}

In [5]:
df = pd.read_csv(__scrobbles__, header = None, names = ['artist', 'album', 'song', 'datetime'])

In [6]:
# remove where datetime is NaN (negligible)
df = df[~df.datetime.isna()]

In [7]:
# convert datetime column to acutal datetime objects
# subtract 7 hours since original times are UTC
df.datetime = pd.to_datetime(df.datetime) - pd.Timedelta(hours = 7)

In [8]:
df.head(10)

Unnamed: 0,artist,album,song,datetime
1,Drake,Dark Lane Demo Tapes,Pain 1993 (with Playboi Carti),2020-05-04 17:34:00
2,Juice WRLD,Death Race For Love,Empty,2020-05-04 15:07:00
3,Juice WRLD,Death Race For Love,Rider,2020-05-04 15:05:00
4,Juice WRLD,Death Race For Love,Make Believe,2020-05-04 15:05:00
5,Juice WRLD,Death Race For Love,She’s The One,2020-05-04 14:58:00
6,Juice WRLD,Death Race For Love,10 Feet,2020-05-04 14:56:00
7,Juice WRLD,Death Race For Love,Ring Ring (with Rvssian feat. Clever),2020-05-04 14:53:00
8,Juice WRLD,Death Race For Love,Maze,2020-05-04 14:50:00
9,Juice WRLD,Death Race For Love,Empty,2020-05-04 14:46:00
10,Juice WRLD,Death Race For Love,Make Believe,2020-05-04 14:44:00


# Lyric Sentiment Analysis

In [65]:
import nltk
from nltk.corpus import stopwords
# nltk.download()
import re

In [292]:
df_lyrics = pd.read_json(r"scraping/output/lyrics/completed.json", orient = 'table')

In [13]:
test_lyrics = df_lyrics.loc[('Drake', '10 Bands')].lyrics

In [339]:
stop_words = [item.replace("""'""", '') for item in stopwords.words('english')]
stop_words.extend(['im', 'like', 'yeah', 'get', 'oh', 'aint', 'got', 'wanna', 'want', 'ooh', 'ay', 'ayy', 'uh', 'cant', 'mhm', 'hm'])
stop_words.extend(['intro', 'verse', 'outro', 'verse', 'chorus'])
re_compiled = re.compile(r'[^a-zA-Z0-9- ]')


In [325]:
def clean_lyrics(lyrics):
    # make all lower 
    lyrics = lyrics.lower()
    # remove newline characters with a space
    lyrics = lyrics.replace('\n', ' ')
    # replace $ with s
    lyrics = lyrics.replace('$', 's')
    # remove chorus designations
    lyrics = lyrics.replace('chorus', '')
    # remove verse designations
    lyrics = lyrics.replace('verse', '')
    lyrics = re.sub(re_compiled, '', lyrics)
    lyrics = ' '.join(item for item in lyrics.split() if item not in stop_words)
    # return [item for item in lyrics.split() if item not in stop_words]
    return lyrics

In [287]:
def get_most_common(lyrics, n):
    try:
        tokens = lyrics.split()
        freq = nltk.FreqDist(tokens)
        n_most_common = [item[0] for item in freq.most_common(n)]
        return n_most_common
    except IndexError:
        return []

In [326]:
df_lyrics['clean_lyrics'] = df_lyrics.lyrics.apply(clean_lyrics)

In [177]:
df_lyrics['n_most_common'] = df_lyrics.lyrics.apply(lambda x: get_most_common(x, 5))

In [327]:
df_lyrics.clean_lyrics

artist       song                      
!!!          Even When the Water's Cold    friends told better bottom river bed said try ...
$uicideboy$  #1 STUNNA                     yung mutt suicide 1 yung mutt marble martyr fu...
             2ND HAND                      soulja rag murder grey59 see scrubs face mean ...
             AM / PM                       1 lil half cut windows frown ash jeans eatin b...
             Champion Of Death             yung plague chump change boy gotta hate boy pu...
                                                                 ...                        
walk.        seagull.                      yeahhh ends large big big man ting rag lets 1 ...
wasiu        Cigarettes & Poutine          - wasiu still ridin metro listenin kenlo funki...
yugi boi     No Problem                    21 felix sandman - boys emotions garzi - calif...
             The End                       reedukay reedukay reedukay beatz boi reedukay ...
ÊMIA         Psychic          

In [192]:
top_lyrics = df_lyrics.n_most_common.apply(pd.Series)

In [197]:
top_lyrics.columns = ['lyric_{}'.format(i+1) for i in top_lyrics.columns]

In [198]:
top_lyrics

Unnamed: 0_level_0,Unnamed: 1_level_0,lyric_1,lyric_2,lyric_3,lyric_4,lyric_5
artist,song,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
!!!,Even When the Water's Cold,friends,know,even,waters,cold
$uicideboy$,#1 STUNNA,one,let,keys,rover,truck
$uicideboy$,2ND HAND,fuck,rollin,ima,fucking,shawty
$uicideboy$,AM / PM,outta,people,dead,put,drivin
$uicideboy$,Champion Of Death,boy,pull,motherfucking,shoot,em
...,...,...,...,...,...,...
walk.,seagull.,move,man,fuck,one,big
wasiu,Cigarettes & Poutine,ridin,still,poutine,funkiness,bring
yugi boi,No Problem,-,ft,lil,love,remix
yugi boi,The End,beat,pig,wilbur,reedukay,real


In [328]:
df_merged = df.merge(df_lyrics.clean_lyrics, how = 'left', left_on = ['artist', 'song'], right_index = True)

In [329]:
df_merged

Unnamed: 0,artist,album,song,datetime,clean_lyrics
1,Drake,Dark Lane Demo Tapes,Pain 1993 (with Playboi Carti),2020-05-04 17:34:00,drake - pain 1993 playboi carti marshmello - k...
2,Juice WRLD,Death Race For Love,Empty,2020-05-04 15:07:00,unknown ran away think coming back home whoa-w...
3,Juice WRLD,Death Race For Love,Rider,2020-05-04 15:05:00,wha- lets see rider lets see rider sounds clea...
4,Juice WRLD,Death Race For Love,Make Believe,2020-05-04 15:05:00,figured gonna break heart regardless regardles...
5,Juice WRLD,Death Race For Love,She’s The One,2020-05-04 14:58:00,da-la la-la-la-la la-la ooh-ooh wake dream eve...
...,...,...,...,...,...
55553,Madeintyo,Thank You Mr. Tokyo,Time Of Her Life,2017-07-04 12:26:00,hey hook time life dick one night grab dick mi...
55554,DJ Khaled,Grateful,Iced Out My Arms,2017-07-04 12:23:00,future dj khaled diamonds fingers southside yo...
55555,Young Thug,Beautiful Thugger Girls,Daddy's Birthday,2017-07-01 14:33:00,typical day new york know sayin high fuck diam...
55556,Lil Wayne,Tha Carter IV (Deluxe),How to Love,2017-06-29 20:39:00,lil wayne cut music little louder lil wayne lo...


In [330]:
df_merged.clean_lyrics = df_merged.clean_lyrics.fillna('')

In [331]:
concat_lyrics = df_merged.groupby(df_merged.datetime.dt.date)['clean_lyrics'].agg(' '.join)

In [334]:
days_top5 = concat_lyrics.apply(lambda x: get_most_common(x, 5))

In [338]:
days_top5.tail(20)

datetime
2020-04-14                 [-, feat, know, lil, love]
2020-04-15           [bitch, nigga, know, love, fuck]
2020-04-16          [shit, bout, pussy, talk, niggas]
2020-04-17          [bitch, niggas, boom, shit, fuck]
2020-04-18           [mhm, know, bitch, back, niggas]
2020-04-19           [footnote, may, one, first, see]
2020-04-20                  [2018, -, one, de, would]
2020-04-21                    [-, de, one, would, la]
2020-04-22                  [2018, -, de, one, would]
2020-04-23               [2018, says, tony, go, know]
2020-04-24                  [de, la, one, men, would]
2020-04-26                 [-, lil, feat, shit, know]
2020-04-27                 [2018, feat, 2017, -, man]
2020-04-28             [one, would, men, great, know]
2020-04-29               [2018, man, know, feat, one]
2020-04-30                [2018, -, feat, 2017, love]
2020-05-01                [-, love, back, baby, know]
2020-05-02                [-, love, baby, back, feat]
2020-05-03    [labo

In [335]:
df_merged.join(days_top5, on = df_merged.datetime.dt.date, rsuffix = '_concat').tail(20)

Unnamed: 0,artist,album,song,datetime,clean_lyrics,clean_lyrics_concat
55538,Getter,Wat The Frick EP,Something New,2017-07-05 06:37:00,instrumental,"[thou, na, know, thy, one]"
55539,Che Ecru,buries,2 AM,2017-07-05 06:34:00,1 ropes trynna hit road maybe go roll play rol...,"[thou, na, know, thy, one]"
55540,Deorro,Good Evening,Guide Me,2017-07-05 06:31:00,1 alright feel end ill right til instrumental ...,"[thou, na, know, thy, one]"
55541,BONJR,Hey Sego,hey sego,2017-07-05 06:28:00,give give give juice give give give give juice...,"[thou, na, know, thy, one]"
55542,Oshi,oshi,state of flux,2017-07-05 06:24:00,instrumental,"[thou, na, know, thy, one]"
55543,Young Thug,Slime Season 3,With Them,2017-07-05 06:20:00,thugger thugger baby fuck lets lil shawty say ...,"[thou, na, know, thy, one]"
55544,Hyper Potions,Adventures,Adventures,2017-07-04 20:53:00,instrumental,"[arms, patek, ice, put, know]"
55545,Young Thug,Beautiful Thugger Girls,Daddy's Birthday,2017-07-04 13:05:00,typical day new york know sayin high fuck diam...,"[arms, patek, ice, put, know]"
55546,Young Thug,Slime Season,Again,2017-07-04 13:01:00,gucci mane young thug nigga turn guwop trap ho...,"[arms, patek, ice, put, know]"
55547,Young Thug,Slime Season,Overdosin,2017-07-04 12:50:00,thugger thugger rock water fuckin whale drug e...,"[arms, patek, ice, put, know]"


In [268]:
single_day = df_merged[df_merged.datetime.dt.date == datetime.date(2017, 7, 11)]

In [281]:
# single_day[single_day.lyrics.str.contains('to vend and to velnerate')].loc[55212].lyrics

In [283]:
concat_lyrics

datetime
2017-06-29    [Intro: 2 Chainz]\nYeah, hahaha\nYeah, ooh\nM-...
2017-07-01    [Intro]\nJust a typical day in New York, you k...
2017-07-04    [Verse 1]\nI been on the ropes with you\nSo I'...
2017-07-05    [Verse 1]\nI been on the ropes with you\nSo I'...
2017-07-06    [Verse 1]\nI'ma handle business\nS600 big body...
                                    ...                        
2020-04-30    Drake - Pain 1993 (with Playboi Carti)\nMarshm...
2020-05-01    Drake - Pain 1993 (with Playboi Carti)\nMarshm...
2020-05-02    Drake - Pain 1993 (with Playboi Carti)\nMarshm...
2020-05-03    Drake - Pain 1993 (with Playboi Carti)\nMarshm...
2020-05-04    Drake - Pain 1993 (with Playboi Carti)\nMarshm...
Name: lyrics, Length: 969, dtype: object