In [1]:
import spacy
import warnings
import itertools
import collections
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go

from ipywidgets import widgets

pio.renderers.default='iframe'
nlp = spacy.load("en_core_web_sm")
warnings.simplefilter(action='ignore', category=FutureWarning)

# Prepare Data

In [2]:
column_names = ['id', 'word', 'count']
df_bigram = pd.DataFrame(columns=column_names)
df_trigram = pd.DataFrame(columns=column_names)

# create corpus of decades
files = ['1950-1959', '1960-1969', '1970-1979', '1980-1989', '1990-1999', '2000-2009','2010-2019', '2020-2029']
for file in files:
    a = pd.read_csv("count_word_lemmatized/"+file+"_bigram.csv", skiprows=1,  names=column_names)
    a['source_decade']=file
    df_bigram = pd.concat([df_bigram, a])
    b = pd.read_csv("count_word_lemmatized/"+file+"_trigram.csv", skiprows=1,  names=column_names)
    b['source_decade']=file
    df_trigram = pd.concat([df_trigram, b])

In [3]:
# group by on bigrams
for index, row in df_bigram.iterrows():
    words = [word.lower() for word in row['word'].split()]
    row['word'] = ' '.join([x for x in words if "automation" not in x])
df_bigram = df_bigram.groupby(['source_decade', 'word']).agg({'count': 'sum'}).reset_index()
# split and group by on trigrams
for index, row in df_trigram.iterrows():
    words = [word.lower() for word in row['word'].split()]
    row['word'] = [x for x in words if "automation" not in x]
df_trigram = df_trigram.explode(['word'])
df_trigram = df_trigram.groupby(['source_decade', 'word']).agg({'count': 'sum'}).reset_index()

In [4]:
# Get top 50 keywords in each decade & use it to create options
df_bigram = df_bigram[(df_bigram['word'] != '') & (df_bigram['word'] != ' ')]
keywords = df_bigram.sort_values('count', ascending = False).groupby('source_decade').head(50)['word'].tolist()
df_bigram = df_bigram[df_bigram['word'].isin(keywords)]
bigram_options = df_bigram['word'].tolist()
# Get top 50 keywords in each decade
df_trigram = df_trigram[(df_trigram['word'] != '') & (df_trigram['word'] != ' ')]
keywords = df_trigram.sort_values('count', ascending = False).groupby('source_decade').head(50)['word'].tolist()
df_trigram = df_trigram[df_trigram['word'].isin(keywords)]
trigram_options = df_trigram['word'].tolist()

In [5]:
# pivot bigram dataframe
df_bigram = df_bigram.pivot_table('count', ['source_decade'], 'word').reset_index()
df_bigram = df_bigram.fillna(0)
# pivot trigram dataframe
df_trigram = df_trigram.pivot_table('count', ['source_decade'], 'word').reset_index()
df_trigram = df_trigram.fillna(0)

# Create Visualization

## Bigram

In [6]:
textbox = widgets.Dropdown(
    description='Keyword: ',
    value='union',
    options=bigram_options)

# plotly
fig = go.Figure(data=[go.Bar(x=[], y=[])])

# drop down
buttons = []
columns = df_bigram.columns[1:]
for column in columns:
    buttons.append(dict(method='restyle', label=column, visible=True,
                        args=[{'y':[df_bigram[column]], 'x':[df_bigram['source_decade']], 'type':'bar'}, [0]],))

# some adjustments to the updatemenus
updatemenu = []
your_menu = dict()
updatemenu.append(your_menu)
updatemenu[0]['buttons'] = buttons
updatemenu[0]['direction'] = 'down'
updatemenu[0]['x'] = 0.1
updatemenu[0]['xanchor'] = 'left'
updatemenu[0]['y'] = 1.1
updatemenu[0]['yanchor'] = 'top'
updatemenu[0]['showactive'] = True

# add dropdown menus to the figure
fig.update_layout(showlegend=False, updatemenus=updatemenu)
fig.update_layout(title_text='Frequency of keyword (Bigram) across decades', title_x=0.5)
fig.update_xaxes(title_text="Decade")
fig.update_yaxes(title_text="Count")
fig.show()

## Trigram

In [8]:
textbox = widgets.Dropdown(
    description='Keyword: ',
    value='union',
    options=trigram_options)

# plotly
fig = go.Figure(data=[go.Bar(x=[], y=[])])

# drop down
buttons = []
columns = df_trigram.columns[1:]
for column in columns:
    buttons.append(dict(method='restyle', label=column, visible=True,
                        args=[{'y':[df_trigram[column]], 'x':[df_trigram['source_decade']], 'type':'bar'}, [0]],))

# some adjustments to the updatemenus
updatemenu = []
your_menu = dict()
updatemenu.append(your_menu)
updatemenu[0]['buttons'] = buttons
updatemenu[0]['direction'] = 'down'
updatemenu[0]['x'] = 0.1
updatemenu[0]['xanchor'] = 'left'
updatemenu[0]['y'] = 1.1
updatemenu[0]['yanchor'] = 'top'
updatemenu[0]['showactive'] = True

# add dropdown menus to the figure
fig.update_layout(showlegend=False, updatemenus=updatemenu)
fig.update_layout(title_text='Frequency of keyword (Trigram) across decades', title_x=0.5)
fig.update_xaxes(title_text="Decade")
fig.update_yaxes(title_text="Count")
fig.show()