In [21]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

In [81]:

state_labs = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

def fetch_data_kw(kw, date):
    dir = '/media/johnattan/LaCie/Twitter_Terms/' + kw + '/' + date + '.csv'
    df = pd.read_csv(dir, index_col=None, header=0)
    #df = df.drop(labels = ['Unnamed: 0', 'Unnamed: 0.1', 'id'], axis = 1)

    return df


def subset_date(df, date):
    time_vec = df.created_at.values
    inds = [x[0:10] == date for x in df.created_at.values]   
    
    return df[inds]

days = ['01', '02', '03', '04', '05', '06', '07', '08', '09'] + list(range(10,32))

def count_tweets(df, yr_mo):
    counts = []
    for day in days:
        tmp_df = subset_date(df, yr_mo + '-' + str(day))
        counts.append(tmp_df.shape[0])

    return counts
 

def plot_lines(yr_mo):
    beer_df = fetch_data_kw('BEER', yr_mo)
    wine_df = fetch_data_kw('WINE', yr_mo)
    alco_df = fetch_data_kw('ALCOHOL', yr_mo)
    drnk_df = fetch_data_kw('DRINKING', yr_mo)
    liquor_df = fetch_data_kw('LIQUOR', yr_mo)

    yr_mo_m = yr_mo[:5] + '0' + yr_mo[5:]

    counts_beer = count_tweets(beer_df, yr_mo_m)
    counts_wine = count_tweets(wine_df, yr_mo_m)
    counts_alco = count_tweets(alco_df, yr_mo_m)
    counts_drnk = count_tweets(drnk_df, yr_mo_m)
    counts_liquor = count_tweets(liquor_df, yr_mo_m)

    counts_df = pd.concat([
        pd.DataFrame({'Day' : range(31), 'Counts' : counts_beer, 'Keyword' : 'BEER'}),
        pd.DataFrame({'Day' : range(31), 'Counts' : counts_wine, 'Keyword' : 'WINE'}),
        pd.DataFrame({'Day' : range(31), 'Counts' : counts_alco, 'Keyword' : 'ALCOHOL'}),
        pd.DataFrame({'Day' : range(31), 'Counts' : counts_drnk, 'Keyword' : 'DRINKING'})
    ])

    #return counts_df
    fig = px.line(counts_df, x="Day", y="Counts", color='Keyword', title = 'Popularity for ' + yr_mo)
    fig.show()
    fig.write_image('figures/pop-lines-' + yr_mo + '.png')

In [82]:
plot_lines('2020-6')