In [120]:
import pandas as pd
import os
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
from plotly.offline import plot, iplot, init_notebook_mode
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go

In [121]:
init_notebook_mode(connected=True)
cf.go_offline()

In [118]:
def sentiment_timeseries(policy, timeframe):
    df = pd.read_csv(f"{parent_path}/Data/Sentiment Data/{policy}_valuable_opinions.csv")

    # Standardising datetime format
    dates = []
    for row in df['Comment Datetime']:
        if '/' in row:
            if '/' in row[-4:]:
                date = dt.datetime.strptime(row, "%d/%m/%y")
            else: 
                date = dt.datetime.strptime(row, "%d/%m/%Y")
            dates.append(date.date())
        elif '-' in row:
            date = dt.datetime.strptime(row, "%Y-%m-%d")
            dates.append(date.date())

    df['Comment Datetime'] = dates

    # Truncate according to the policy timeframe
    start, end = pd.to_datetime([timeframe[0], timeframe[1]], format='%d%b%Y')
    df_within_date = df[(df['Comment Datetime'] >= start) & (df['Comment Datetime'] <= end)]
    df_within_date = df_within_date[['Comment Datetime', 'Vader_compound_score']]

    # Finding the mean vader sentiment score per day 
    grouped_date = df_within_date.groupby(by=["Comment Datetime"]).mean()
    grouped_date.reset_index(inplace=True)

    # Finding the moving average (7-day)
    grouped_date['SMA_7'] = grouped_date.Vader_compound_score.rolling(7, min_periods=1).mean()

    # Plotting the figure 
    fig = go.Figure(layout_title_text=f"Timeseries Sentiment Analysis of {policy}")
    fig.add_trace(go.Scatter(x = grouped_date['Comment Datetime'], y = grouped_date['SMA_7'], mode='lines', 
                  name="Rolling 7-Day Average of Vader Compound Score"))
    fig.add_trace(go.Scatter(x = grouped_date['Comment Datetime'], y = grouped_date['Vader_compound_score'], mode='lines', 
                  name="Vader Compound Score", visible='legendonly'))
    fig.update_layout(width = 1200, height=800)
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1))
    fig.update_xaxes(title_text='Date')
    fig.update_yaxes(title_text='Vader Compound Score')

    fig.write_image(f"timeseries sentiment plots/{policy}.png", engine="kaleido")
    fig.write_html(f"timeseries sentiment plots/{policy}.html")
    #fig.show()
    print(policy, "is done!")


# 

In [119]:
parent_path = "/Users/joshuawong/Documents/GitHub/Covid-19-Singapore-Analysis"

policy_list = ["circuit breaker", "economic measures", "foreign worker", "mask", "social distancing", "tracetogether", "vaccination"]

policy_timeframe = {"circuit breaker":("1APR2020", "1JUL2020"), 
                    "economic measures":("1FEB2020", "1MAR2021"), 
                    "foreign worker":("1APR2020","1JAN2021"), 
                    "mask":("1FEB2020","1MAR2021"), 
                    "social distancing":("1FEB2020", "1FEB2021"), 
                    "tracetogether":("1MAR2020", "1MAR2021"), 
                    "vaccination":("1AUG2020","1MAR2021")}

for policy, timeframe in policy_timeframe.items():
    sentiment_timeseries(policy, timeframe)


circuit breaker is done!
economic measures is done!
foreign worker is done!
mask is done!
social distancing is done!
tracetogether is done!
vaccination is done!
