# Calculation of the hype value and the visualization

# Import

In [35]:
import pandas as pd
from os import listdir
from os.path import isfile, join
import itertools
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import warnings
from math import log

In [36]:
warnings.simplefilter('ignore')

# Data

### Twitter data

In [37]:
# Amount of tweets per day created from the mined tweets in Data.ipynb
TweetsPerDay = pd.read_csv("Data/CoronaTweetsPerDay.csv", index_col=0)
TweetsPerHour = pd.read_csv("Data/CoronaTweetsPerHour.csv", index_col=0)

In [38]:
TweetsPerDay.head()

Unnamed: 0,date,tweets,CumSum
0,2020-02-23,1471,1471
1,2020-02-24,3121,4592
2,2020-02-25,3479,8071
3,2020-02-26,4831,12902
4,2020-02-27,7122,20024


In [39]:
# Amount of tweets per hour and per day normalized created from the mined tweets in data.ipynb
TweetsPerDayNormalized  = pd.read_csv("Data/NormalizedDFPerDay.csv", index_col=0)
TweetsPerHourNormalized = pd.read_csv("Data/NormalizedDFPerHour.csv", index_col=0)

In [40]:
TweetsPerHourNormalized.head()

Unnamed: 0,datetime,coronavirus,CumSum
0,2020-02-23 00:00:00,0.026226,0.0
1,2020-02-23 01:00:00,0.010199,0.000127
2,2020-02-23 02:00:00,0.005828,0.0002
3,2020-02-23 03:00:00,0.005342,0.000266
4,2020-02-23 04:00:00,0.004857,0.000327


### COVID-19 Statistics

In [41]:
#gathered from
Cases = pd.read_csv("Data/Cases/confirmedCases.csv").transpose()
Cases = Cases.rename(columns=Cases.iloc[0]).drop(Cases.index[0]).reset_index()
Death = pd.read_csv("Data/Cases/confirmedDeaths.csv").transpose()
Death = Death.rename(columns=Death.iloc[0]).drop(Death.index[0]).reset_index()
Recoveries = pd.read_csv("Data/Cases/confirmedRecoveries.csv").transpose() 
Recoveries = Recoveries.rename(columns=Recoveries.iloc[0]).drop(Recoveries.index[0]).reset_index()
Cov9Cases = [Cases, Death, Recoveries]

# Processing

In [42]:
# turning the date column in datetime objects
Cases = Cases.rename(columns={"index":"date"})
Cases["date"] = pd.to_datetime(Cases.date)
Death = Death.rename(columns={"index":"date"})
Death["date"] = pd.to_datetime(Death.date)
Recoveries = Recoveries.rename(columns={"index":"date"})
Recoveries["date"] = pd.to_datetime(Recoveries.date)

In [43]:
TimeFrame = [TweetsPerDay.date.loc[0],TweetsPerDay.date.loc[len(TweetsPerDay)-1]]

In [44]:
# get cases on the dates we collected tweets on
mask = (Cases['date'] >= TimeFrame[0]) & (Cases['date'] <= TimeFrame[1])
CasesCTF = Cases.loc[mask]

In [45]:
CasesCTF

Unnamed: 0,date,Netherlands,Europe,EU,Non-Europe,Worldwide
32,2020-02-23,0,199,188,78766,78965
33,2020-02-24,0,277,262,79291,79568
34,2020-02-25,0,381,365,80032,80413
35,2020-02-26,0,542,522,80853,81395
36,2020-02-27,1,806,776,81948,82754
37,2020-02-28,1,1095,1051,83025,84120
38,2020-02-29,6,1467,1400,84544,86011
39,2020-03-01,10,2207,2106,86162,88369
40,2020-03-02,18,2734,2596,87572,90306
41,2020-03-03,24,3361,3183,89479,92840


# Calculating hype

Amount of tweets / actual cases

In [46]:
ConfCasesNL = CasesCTF.Netherlands.to_list()
Tweets = TweetsPerDay.tweets.to_list()
ConfCasesEU = CasesCTF.EU.to_list()

In [47]:
# Amount of tweets / actual cases in the netherlands
HypeNetherlands = []
for n in range(len(Tweets)):
    try:
        NL = Tweets[n]/ConfCasesNL[n]
    except:
        NL = Tweets[n]
    HypeNetherlands.append(NL)

In [48]:
# Amount of tweets / actual cases in the EU
HypeEurope = []
for n in range(len(Tweets)):
    try:
        EU = Tweets[n]/ConfCasesEU[n]
    except:
        EU = Tweets[n]
    HypeEurope.append(EU)

## Normalizing Hype

In [49]:
Time = CasesCTF["date"].to_list()
zippedList = list(zip(Time,HypeEurope,HypeNetherlands))
HypeDf = pd.DataFrame(zippedList, columns=['Date',"hypeEU", "hypeNL"])

In [50]:
def normalize(df):
    result = df.copy()
    for feature_name in df.columns[1:]:
        max_value = int(df[feature_name].max())
        min_value = int(df[feature_name].min())
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

In [51]:
HypeDfNorm = normalize(HypeDf)

In [52]:
HypeDfNorm.head()

Unnamed: 0,Date,hypeEU,hypeNL
0,2020-02-23,0.711315,0.203548
1,2020-02-24,1.082929,0.43406
2,2020-02-25,0.866501,0.484074
3,2020-02-26,0.841344,0.672953
4,2020-02-27,0.834349,0.993015


In [53]:
CasesNorm = normalize(CasesCTF[['date','Europe','Netherlands']])

In [54]:
CasesNorm.head()

Unnamed: 0,date,Europe,Netherlands
32,2020-02-23,0.0,0.0
33,2020-02-24,0.00143119,0.0
34,2020-02-25,0.00333945,0.0
35,2020-02-26,0.00629358,0.0
36,2020-02-27,0.0111376,0.000881057


# Visualizing hype

In [55]:
# Hype tweets compared to the cases in nl
Time = CasesCTF["date"].to_list()
fig = go.Figure()
fig.add_trace(go.Line(x=Time, y=HypeNetherlands, name="Hype Netherlands"))
fig.update_layout(
    title="Hype compared to dutch cases",
    xaxis_title="Date",
    yaxis_title="Hype"
    )
fig.show()

In [57]:
# Hype tweets compared to the cases in nl y log axis
Time = CasesCTF["date"].to_list()
fig = go.Figure()
fig.add_trace(go.Line(x=Time, y=HypeNetherlands, name="Hype Netherlands"))
fig.update_layout(
    title="Hype compared to dutch cases log y axis",
    xaxis_title="Date",
    yaxis_title="Hype"
    )
fig.update_layout(yaxis_type="log")
fig.show()


In [27]:
# Hype tweets compared to the cases in EU
Time = CasesCTF["date"].to_list()
fig = go.Figure()
fig.add_trace(go.Line(x=Time, y=HypeEurope, name="Hype Europe"))
fig.update_layout(
    title="Hype compared to EU cases",
    xaxis_title="Date",
    yaxis_title="Hype"
    )
fig.show()

# Visualizing tweets and cases

In [28]:
Time = TweetsPerDayNormalized["date"].to_list()
TweetsNorm = TweetsPerDayNormalized.CumSum.to_list()
EucasesNorm = CasesNorm.Netherlands.to_list()
fig = go.Figure()
fig.add_trace(go.Line(x=Time, y=TweetsNorm, name="Tweets cumulative"))
fig.add_trace(go.Line(x=Time, y=EucasesNorm, name="Cases in the Netherlands"))
fig.update_layout(
    title="Development of COVID-19 cases and tweets",
    xaxis_title="Date",
    yaxis_title="#"
    )
fig.show()

# Visualizing hype cycle

In [29]:
# log 10 of hype line
logHypeNL = [log(x,10) for x in HypeDf.hypeNL.to_list()]
zippedList = list(zip(Time,logHypeNL))
HypeDf = pd.DataFrame(zippedList, columns=['Date', "hypeNLLog"])
HypeDflogNorm = normalize(HypeDf)

In [30]:
Time = TweetsPerDayNormalized["date"].to_list()
HypeNetherlands = HypeDflogNorm.hypeNLLog.to_list()
TweetsNorm = TweetsPerDayNormalized.tweets.to_list()
EucasesNorm = CasesNorm.Netherlands.to_list()
fig = go.Figure()
fig.add_trace(go.Line(x=Time, y=HypeNetherlands, name="Normalized logarithmic Hype"))
fig.add_trace(go.Line(x=Time, y=TweetsNorm, name="Normalized amount of Tweets per day"))
fig.add_trace(go.Line(x=Time, y=EucasesNorm, name="Normalized Cases in the Netherlands"))
fig.update_layout(
    title="COVID-19 in the Netherlands",
    xaxis_title="Date",
    yaxis_title="#"
    )
fig.show()