# Imports

In [24]:
import pandas as pd
import plotly.graph_objs as go

# Data Reading

In [25]:
count_df = pd.read_csv('https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv')


Columns (2,3) have mixed types.Specify dtype option on import or set low_memory=False.



In [26]:
count_df.head()

Unnamed: 0,CountryName,CountryCode,RegionName,RegionCode,Jurisdiction,Date,C1_School closing,C1_Flag,C2_Workplace closing,C2_Flag,...,StringencyIndex,StringencyIndexForDisplay,StringencyLegacyIndex,StringencyLegacyIndexForDisplay,GovernmentResponseIndex,GovernmentResponseIndexForDisplay,ContainmentHealthIndex,ContainmentHealthIndexForDisplay,EconomicSupportIndex,EconomicSupportIndexForDisplay
0,Aruba,ABW,,,NAT_TOTAL,20200101,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Aruba,ABW,,,NAT_TOTAL,20200102,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Aruba,ABW,,,NAT_TOTAL,20200103,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Aruba,ABW,,,NAT_TOTAL,20200104,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Aruba,ABW,,,NAT_TOTAL,20200105,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Data Engineering

## a. Getting UK

In [27]:
count_df = count_df[(count_df["CountryName"]=="United Kingdom") & (count_df['RegionCode'].isna())]

In [28]:
count_df.head()

Unnamed: 0,CountryName,CountryCode,RegionName,RegionCode,Jurisdiction,Date,C1_School closing,C1_Flag,C2_Workplace closing,C2_Flag,...,StringencyIndex,StringencyIndexForDisplay,StringencyLegacyIndex,StringencyLegacyIndexForDisplay,GovernmentResponseIndex,GovernmentResponseIndexForDisplay,ContainmentHealthIndex,ContainmentHealthIndexForDisplay,EconomicSupportIndex,EconomicSupportIndexForDisplay
29656,United Kingdom,GBR,,,NAT_TOTAL,20200101,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29657,United Kingdom,GBR,,,NAT_TOTAL,20200102,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29658,United Kingdom,GBR,,,NAT_TOTAL,20200103,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29659,United Kingdom,GBR,,,NAT_TOTAL,20200104,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29660,United Kingdom,GBR,,,NAT_TOTAL,20200105,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Getting Deaths and Cases

In [29]:
count_df = count_df.drop(['CountryName', 'CountryCode', 'RegionName', 'RegionCode'], axis=1).set_index('Date').fillna(method='ffill')

In [30]:
uk_df = count_df[["ConfirmedCases", "ConfirmedDeaths"]]

In [31]:
uk_df.index = pd.to_datetime(uk_df.index, format="%Y%m%d")

In [32]:
uk_df

Unnamed: 0_level_0,ConfirmedCases,ConfirmedDeaths
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-01,,
2020-01-02,,
2020-01-03,,
2020-01-04,,
2020-01-05,,
...,...,...
2020-11-28,1605172.0,58030.0
2020-11-29,1617327.0,58245.0
2020-11-30,1629657.0,58448.0
2020-12-01,1643086.0,59051.0


## Calculating Infection Rate

In [33]:
uk_df.head()

Unnamed: 0_level_0,ConfirmedCases,ConfirmedDeaths
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-01,,
2020-01-02,,
2020-01-03,,
2020-01-04,,
2020-01-05,,


In [34]:
#uk_df

## Filtering by Date

In [35]:
uk_df.index = pd.to_datetime(uk_df.index, format="%Y%m%d")

In [36]:
uk_df = uk_df.sort_index()

In [37]:
uk_df

Unnamed: 0_level_0,ConfirmedCases,ConfirmedDeaths
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-01,,
2020-01-02,,
2020-01-03,,
2020-01-04,,
2020-01-05,,
...,...,...
2020-11-28,1605172.0,58030.0
2020-11-29,1617327.0,58245.0
2020-11-30,1629657.0,58448.0
2020-12-01,1643086.0,59051.0


In [38]:
uk_df['InfectionRate'] = uk_df['ConfirmedCases'].diff()

## Visualizing in Plotly

In [39]:
fig = go.Figure()
for col in uk_df[['ConfirmedCases', 'ConfirmedDeaths']]:
    fig.add_trace(go.Scatter(x=uk_df.index, y=uk_df[col], name=col))
    
fig.update_layout(title="Case & Death Count in UK")

In [40]:
fig = go.Figure()
for col in uk_df[['InfectionRate']]:
    fig.add_trace(go.Scatter(x=uk_df.index, y=uk_df[col], name=col))
    
fig.update_layout(title="Infection Rate in UK")

In [41]:
uk_df.to_csv("uk_data.csv")

In [42]:
count_df.to_csv("covid19.csv")