In [1]:
import pandas as pd
import os
import datetime as dt

import matplotlib.pyplot as plt
from plotly.offline import plot, iplot, init_notebook_mode
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from plotly_functions import *

In [2]:
init_notebook_mode(connected=True)
cf.go_offline()

# Functions

## Getting data from the csvs

In [23]:
def sentiment_with_cases_static(policy, timeframe):
    df_within_date = get_data(policy)

    # Finding the mean vader sentiment score per day
    grouped_date = df_within_date.groupby(by=["Comment Datetime"]).mean()
    grouped_date.reset_index(inplace=True)

    # Finding the moving average (7-day)
    grouped_date['SMA_7'] = grouped_date.Vader_compound_score.rolling(7, min_periods=1).mean()

    # Finding the number of comments per day
    grouped_date_counts = df_within_date.groupby(by=["Comment Datetime"]).count()
    grouped_date_counts.reset_index(inplace=True)
    grouped_date['Comment Count'] = grouped_date_counts['Vader_compound_score']

    # Setting the number of cases within the policy timeframe 
    daily_confirmed_within_timeframe = daily_confirmed.loc[(daily_confirmed["Date"]>= timeframe[0]) & (daily_confirmed["Date"]<= timeframe[1])]

    # Plotting the figure 
    fig = make_subplots(specs=[[{"secondary_y": True}]]) # NEW
    fig.add_trace(go.Scatter(x = grouped_date['Comment Datetime'], y = grouped_date['SMA_7'], mode='lines', 
                  name="Vader 7-day MA"), secondary_y=False)
    fig.add_trace(go.Scatter(x = grouped_date['Comment Datetime'], y = grouped_date['Vader_compound_score'], mode='lines', 
                  name="Vader Compound Score", opacity=.5), secondary_y=False)

    fig.add_bar(x = daily_confirmed_within_timeframe["Date"], y = daily_confirmed_within_timeframe["Daily Confirmed "],
                name="Cases Count", secondary_y=True)

    #fig.update_layout(width = 1200, height=800)
    fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
                      title_text=f"Timeseries Sentiment Analysis of {policy} with Daily Number of Cases",
                      yaxis_range=[-2,1], 
                      yaxis2_range=[0,1426*2])
                      
    fig.update_layout(width = 1200, height=800)
    fig.update_xaxes(title_text='Date', ticks="outside", showgrid=False)
    fig.update_yaxes(title_text='Vader Compound Score', ticks="outside", showgrid=False)
    fig.update_yaxes(title_text='Number of Cases', ticks="outside", showgrid=False, secondary_y=True)

    fig.write_image(f"timeseries sentiment plots/{policy}_with_cases.png", engine="kaleido")
    # fig.show()
    print(policy, "(with cases, static) is done!")

In [24]:
def sentiment_with_cases_html(policy, timeframe):
    df_within_date = get_data(policy)

    # Finding the mean vader sentiment score per day
    grouped_date = df_within_date.groupby(by=["Comment Datetime"]).mean()
    grouped_date.reset_index(inplace=True)

    # Finding the moving average (7-day)
    grouped_date['SMA_7'] = grouped_date.Vader_compound_score.rolling(7, min_periods=1).mean()

    # Finding the number of comments per day
    grouped_date_counts = df_within_date.groupby(by=["Comment Datetime"]).count()
    grouped_date_counts.reset_index(inplace=True)

    grouped_date['Comment Count'] = grouped_date_counts['Vader_compound_score']

    # Setting the number of cases within the policy timeframe 
    daily_confirmed_within_timeframe = daily_confirmed.loc[(daily_confirmed["Date"]>= timeframe[0]) & (daily_confirmed["Date"]<= timeframe[1])]

    # Plotting the figure 
    fig = make_subplots(specs=[[{"secondary_y": True}]]) # NEW
    fig.add_trace(go.Scatter(x = grouped_date['Comment Datetime'], y = grouped_date['SMA_7'], mode='lines', 
                  name="Vader 7-day MA"), secondary_y=False)
    fig.add_trace(go.Scatter(x = grouped_date['Comment Datetime'], y = grouped_date['Vader_compound_score'], mode='lines', 
                  name="Vader Compound Score", opacity=.5,visible='legendonly'), secondary_y=False)
    fig.add_bar(x = daily_confirmed_within_timeframe["Date"], y = daily_confirmed_within_timeframe["Daily Confirmed "],
                name="Cases Count", secondary_y=True, visible='legendonly')

    #fig.update_layout(width = 1200, height=800)
    fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
                      title_text=f"Timeseries Sentiment Analysis of {policy} with Daily Number of Comments",
                      yaxis_range=[-2,1],
                      yaxis2_range=[0,1426*2])
                      
    fig.update_layout(width = 1200, height=800)
    fig.update_xaxes(title_text='Date', ticks="outside", showgrid=False)
    fig.update_yaxes(title_text='Vader Compound Score', ticks="outside", showgrid=False)
    fig.update_yaxes(title_text='Number of Cases', ticks="outside", showgrid=False, secondary_y=True)

    fig.write_html(f"timeseries sentiment plots/{policy}_with_cases.html")
    # fig.show()
    print(policy, "(with cases, html) is done!")

In [25]:
def emotion_count(policy):
    anger_df = pd.read_csv(f"{parent_path}/Data/Sentiment Data/{policy}_valuable_anger.csv")
    fear_df = pd.read_csv(f"{parent_path}/Data/Sentiment Data/{policy}_valuable_fear.csv")
    joy_df = pd.read_csv(f"{parent_path}/Data/Sentiment Data/{policy}_valuable_joy.csv")
    neutral_df = pd.read_csv(f"{parent_path}/Data/Sentiment Data/{policy}_valuable_neu.csv")
    sad_df = pd.read_csv(f"{parent_path}/Data/Sentiment Data/{policy}_valuable_sad.csv")

    emotions = ['anger', 'fear','sad','neutral','joy']
    counts = [len(anger_df), len(fear_df), len(sad_df), len(neutral_df), len(joy_df)] 

    fig = go.Figure([go.Bar(x=emotions, y=counts, text=counts)])
    fig.update_layout(title_text=f"Emotion Counts for {policy}")
    fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
    # fig.show()
    fig.write_image(f"timeseries sentiment plots/Emotions/{policy}_emotions.png", engine="kaleido")

    print(policy, "(emotion count) is done!")

# Plotting out the graphs

In [3]:
# parent_path = "/Users/joshuawong/Documents/GitHub/Covid-19-Singapore-Analysis"
# parent_path = "C:/Users/user/Documents/GitHub/Covid-19-Singapore-Analysis"

policy_list = ["circuit breaker", "economic measures", "foreign worker", "mask", "social distancing", "tracetogether", "vaccination"]

policy_timeframe = {"circuit breaker":("1APR2020", "1JUL2020"), 
                    "economic measures":("1FEB2020", "1MAR2021"), 
                    "foreign worker":("1APR2020","1JAN2021"), 
                    "mask":("1FEB2020","1MAR2021"), 
                    "social distancing":("1FEB2020", "1FEB2021"), 
                    "tracetogether":("1MAR2020", "1MAR2021"), 
                    "vaccination":("1AUG2020","1MAR2021")}

for policy, timeframe in policy_timeframe.items():
    sentiment_with_comments_static(policy, timeframe)
    sentiment_with_comments_html(policy, timeframe)
    sentiment_with_cases_static(policy, timeframe)
    sentiment_with_cases_html(policy, timeframe)
    print('*'*20)
    print('\n')

for policy in policy_list:
    emotion_count(policy)

circuit breaker (with comments, static) is done!
circuit breaker (with comments, html) is done!
circuit breaker (with cases, static) is done!
circuit breaker (with cases, html) is done!
********************


economic measures (with comments, static) is done!
economic measures (with comments, html) is done!
economic measures (with cases, static) is done!
economic measures (with cases, html) is done!
********************


foreign worker (with comments, static) is done!
foreign worker (with comments, html) is done!
foreign worker (with cases, static) is done!
foreign worker (with cases, html) is done!
********************


mask (with comments, static) is done!
mask (with comments, html) is done!
mask (with cases, static) is done!
mask (with cases, html) is done!
********************


social distancing (with comments, static) is done!
social distancing (with comments, html) is done!
social distancing (with cases, static) is done!
social distancing (with cases, html) is done!
**********

In [3]:
parent_path = "/Users/joshuawong/Documents/GitHub/Covid-19-Singapore-Analysis"

df = pd.read_csv(f"{parent_path}/Data/Sentiment Data/social_media_sentiments.csv")

# Standardising datetime format
dates = []
for row in df['Comment Datetime']:
    if '/' in row:
        if '/' in row[-4:]:
            date = dt.datetime.strptime(row, "%d/%m/%y")
        else: 
            date = dt.datetime.strptime(row, "%d/%m/%Y")
        dates.append(date.date())
    elif '-' in row:
        date = dt.datetime.strptime(row, "%Y-%m-%d")
        dates.append(date.date())

df['Comment Datetime'] = dates
df = df[['Comment Datetime', 'Emotions', 'Platform']]
df

platform_list = ['Facebook', 'Instagram', 'Reddit', 'Hardwarezone', 'Twitter']
emotions_list = ['Joy', 'Sadness', 'Neutral', 'Fear', 'Anger']

platform_dict = {}

for platform in platform_list:
    for emotion in emotions_list:
        platform_dict[platform] = []

for platform in platform_list:
    for emotion in emotions_list:
        emotion = emotion.lower()
        platform_dict[platform].append(len(df[(df['Platform'] == platform) & (df["Emotions"]==emotion)].index))
  
plot = go.Figure(data=[
    go.Bar(name = emotions_list[0], x = platform_list, y = platform_dict[platform_list[0]]),
    go.Bar(name = emotions_list[1], x = platform_list, y = platform_dict[platform_list[1]]),
    go.Bar(name = emotions_list[2], x = platform_list, y = platform_dict[platform_list[2]]),
    go.Bar(name = emotions_list[3], x = platform_list, y = platform_dict[platform_list[3]]),
    go.Bar(name = emotions_list[4], x = platform_list, y = platform_dict[platform_list[4]]),
])
plot.update_layout(title_text="Number of Comments per Emotion per Platform")
plot.update_xaxes(title_text='Platform', ticks="outside", showgrid=False)
plot.update_yaxes(title_text='Emotion', ticks="outside", showgrid=True)
plot.show()
plot.write_image("Number of Comments per Emotion per Platform.png", engine="kaleido")

NameError: name 'platforms_list' is not defined