# Covid-19 Mobility and Policy Stringency Analysis

**Data Source :** 
1. Policy Stringency Data : https://www.bsg.ox.ac.uk/research/research-projects/coronavirus-government-response-tracker
2. Mobility Open Data by Apply and Google : 
- Apple 3 dimensions : https://www.apple.com/covid19/mobility
- Google 6 dimensions : https://www.google.com/covid19/mobility/

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import altair as alt
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)

data = pd.read_csv('./01-version.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,iso_code,date,location,total_cases,new_cases,total_deaths,new_deaths,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,StringencyIndex,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential,driving,transit,walking
0,0,ABW,2020-03-13,Aruba,2,2,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.1,0.4,-0.04,-0.05,0.03,0.01,,,
1,1,ABW,2020-03-20,Aruba,4,2,0,0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,30.56,-0.53,-0.07,-0.41,-0.45,-0.41,0.22,,,
2,2,ABW,2020-03-24,Aruba,12,8,0,0,3.0,2.0,0.0,0.0,0.0,1.0,0.0,3.0,44.84,-0.55,-0.21,-0.49,-0.57,-0.51,0.26,,,
3,3,ABW,2020-03-25,Aruba,17,5,0,0,3.0,2.0,0.0,0.0,0.0,1.0,0.0,3.0,44.84,-0.57,-0.2,-0.56,-0.6,-0.53,0.27,,,
4,4,ABW,2020-03-26,Aruba,19,2,0,0,3.0,2.0,0.0,0.0,0.0,1.0,0.0,3.0,44.84,-0.57,-0.15,-0.54,-0.61,-0.55,0.27,,,


#### Sample Analysis : Italy

In [2]:
italy = data[data.iso_code=='ITA']
italy.drop('Unnamed: 0', axis=1, inplace=True)
italy['date'] = pd.to_datetime(italy['date'])
italy = italy.reset_index(drop=True)
italy.head()

Unnamed: 0,iso_code,date,location,total_cases,new_cases,total_deaths,new_deaths,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,StringencyIndex,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential,driving,transit,walking
0,ITA,2019-12-31,Italy,0,0,0,0,,,,,,,,,,,,,,,,,,
1,ITA,2020-01-01,Italy,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,
2,ITA,2020-01-02,Italy,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,
3,ITA,2020-01-03,Italy,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,
4,ITA,2020-01-04,Italy,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,


- Policy Stringency Anlaysis

In [3]:
policy = italy[['date','C1_School closing','C2_Workplace closing','C3_Cancel public events','C4_Restrictions on gatherings','C5_Close public transport',
                 'C6_Stay at home requirements','C7_Restrictions on internal movement','C8_International travel controls',]]
policy['date'] = pd.to_datetime(policy['date'])
temp = policy.groupby(policy['date'].dt.strftime('%B')).mean()
temp.drop(index='December',axis=0,inplace=True)
new_order = ['January', 'February', 'March', 'April', 'May']
temp = temp.reindex(new_order, axis=0)
temp = temp.reset_index()
temp

Unnamed: 0,date,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls
0,January,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.419355
1,February,0.724138,0.827586,0.482759,0.965517,0.0,0.482759,0.62069,3.0
2,March,3.0,3.0,2.0,4.0,0.677419,2.387097,2.0,3.0
3,April,3.0,3.0,2.0,4.0,1.633333,2.3,2.0,3.0
4,May,3.0,1.4,2.0,4.0,0.4,1.176471,1.176471,2.176471


In [4]:
# policy stringency change over time

temp = temp.unstack().reset_index()
temp.drop(index=[0,1,2,3,4],axis=0,inplace=True)
temp.reset_index()
col=['measures','month','avg_stringency']
temp.columns = col
temp['month'] = temp['month'].map({0:'Jan',1:'Feb',2:'Mar',3:'Apr',4:'May'})

g1=alt.Chart(temp).mark_circle(opacity=0.8,
    stroke='black',
    strokeWidth=1).encode(
    x=alt.X('month:O',sort=["Jan", "Feb", "Mar", "Apr", "May"]),
    y='measures:O',
    size='avg_stringency:Q',
    color='avg_stringency:Q'
    ).properties(
    width=450,
    height=320)

g2=alt.Chart(italy).mark_line(point=True).encode(
    alt.X('new_cases', scale=alt.Scale(zero=False)),
    alt.Y('StringencyIndex', scale=alt.Scale(zero=False)),
).properties(
    width=450,
    height=320)

(g1|g2)

- Policy stage analysis : the end of Feburary increases the stringency immediately

In [5]:
temp = pd.melt(policy,id_vars=['date'],value_vars=['C1_School closing','C2_Workplace closing','C3_Cancel public events','C4_Restrictions on gatherings','C5_Close public transport',
                 'C6_Stay at home requirements','C7_Restrictions on internal movement','C8_International travel controls'],var_name='stringency',value_name='level')

g1=alt.Chart(temp).mark_area(
    color="lightblue",opacity=0.3
).encode(
    x='date',
    y='level',
    color='stringency',
).properties(
    width=450,
    height=320
)

temp = italy[['date','StringencyIndex']]
g2=alt.Chart(italy).mark_area(
    line={'color':'darkgreen'},
    color=alt.Gradient(
        gradient='linear',
        stops=[alt.GradientStop(color='white', offset=0),
               alt.GradientStop(color='darkgreen', offset=1)],
        x1=1,
        x2=1,
        y1=1,
        y2=0
    )
).encode(
    x='date',
    y='StringencyIndex',
).properties(
    width=450,
    height=320
)

(g1|g2)

#### Cross-country analysis

In [6]:
df = pd.DataFrame(data[data.iso_code.isin(['FRA','ITA','DEU','ESP','GBR'])])
df.drop('Unnamed: 0',axis=1,inplace=True)
df.date = pd.to_datetime(df.date)
df.head()

Unnamed: 0,iso_code,date,location,total_cases,new_cases,total_deaths,new_deaths,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,StringencyIndex,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential,driving,transit,walking
4218,DEU,2019-12-31,Germany,0,0,0,0,,,,,,,,,,,,,,,,,,
4219,DEU,2020-01-01,Germany,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,
4220,DEU,2020-01-02,Germany,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,
4221,DEU,2020-01-03,Germany,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,
4222,DEU,2020-01-04,Germany,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,


- Policy Stringency and the new Covid-19 cases

In [7]:
# Normally, the cases is disagoised after 15 days, so we take the lag effect
def lag_effect(df,loc,num):
    temp = df[['location','date','new_cases']]
    temp.index=temp.location
    temp.drop('location',axis=1,inplace=True)
    temp[temp.index==loc]
    temp= pd.DataFrame(temp.groupby(['location'])['new_cases'].shift(-15))
    lag_df =temp[temp.index==loc]
    datelist = pd.date_range('2019-12-31', periods=num).tolist()
    lag_df['date']= pd.to_datetime(datelist)
    return lag_df
lag_fr = lag_effect(df,'France',141)
lag_ger= lag_effect(df,'Germany',141)
lag_ita = lag_effect(df,'Italy',141)
lag_uk = lag_effect(df,'United Kingdom',141)
lag_spa = lag_effect(df,'Spain',140)

In [8]:
lag_df = pd.concat((lag_fr,lag_ger,lag_ita,lag_uk,lag_spa),axis=0)
lag_df = lag_df[lag_df['new_cases'].notna()]
lag_df = lag_df.reset_index()
lag = lag_df.merge(df, on=['location','date'],how='left')
lag.head()

Unnamed: 0,location,new_cases_x,date,iso_code,total_cases,new_cases_y,total_deaths,new_deaths,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,StringencyIndex,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential,driving,transit,walking
0,France,0.0,2019-12-31,FRA,0,0,0,0,,,,,,,,,,,,,,,,,,
1,France,0.0,2020-01-01,FRA,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,
2,France,0.0,2020-01-02,FRA,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,
3,France,0.0,2020-01-03,FRA,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,
4,France,0.0,2020-01-04,FRA,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,


In [9]:
alt.Chart(lag).mark_trail().encode(
    alt.X('date', scale=alt.Scale(zero=False)),
    alt.Y('new_cases_x:Q', scale=alt.Scale(zero=False)),
    size='StringencyIndex:Q',
    color='location'
).properties(
    width=500,
    height=320
).interactive()

In [10]:
alt.Chart(lag_df).mark_circle(opacity=0.8,
    stroke='black',
    strokeWidth=1).encode(
    x='date',
    y='location',
    size=alt.Size('new_cases:Q',
        scale=alt.Scale(range=[0, 1500]),
        legend=alt.Legend(title='New Corona cases')
    ),
    color='location'
    ).properties(
    width=450,
    height=320)

- Policy Stringency V.S New cases

In [11]:
g1=alt.Chart(lag).mark_circle().encode(
    x='StringencyIndex',
    y='new_cases_y',
    color='location',
#     size='new_cases_x'
).interactive()

g2=alt.Chart(lag).mark_circle().encode(
    x='date',
    y='StringencyIndex',
    color='location',
#     size='StringencyIndex'
).interactive()

(g1|g2)

In [12]:
df.transit = df.transit.fillna(method='bfill')
temp = pd.DataFrame(data[data.location.isin(['France','Italy','Germany','Spain','United Kingdom','United States','Taiwan','Korea'])])
temp = df[['date','location','transit']]
temp.fillna(method='ffill',inplace=True)

rect = alt.Chart(temp).mark_rect().encode(
    alt.X('date',bin=True),
    alt.Y('location'),
    alt.Color('transit', scale=alt.Scale(scheme='greenblue'))
).properties(
    width=450,
    height=320)
circ = rect.mark_point().encode(
    alt.ColorValue('grey'),
    alt.Size('transit',
        legend=alt.Legend(title='Records in Selection')
    )
).properties(
    width=450,
    height=400)

alt.vconcat(
    rect + circ,
).resolve_legend(
    color="independent",
    size="independent"
)

---
### Model Analysis:
we take the case of Germany to test if there is Herd Effect : Today's Mobility is affectd yesterday's.

In [13]:
ger = data[data.location=='Germany']
ger = ger[['date','new_cases','transit','walking']]
ger = ger.dropna()
ger.reset_index(inplace=True)
ger['transit_lag']=ger.transit.shift(1)
ger['walking_lag']=ger.walking.shift(1)
ger=ger.dropna()
ger.head()

Unnamed: 0,index,date,new_cases,transit,walking,transit_lag,walking_lag
1,4232,2020-01-14,0,0.0285,0.0404,0.0,0.0
2,4233,2020-01-15,0,0.0241,0.0718,0.0285,0.0404
3,4234,2020-01-16,0,0.0537,0.1076,0.0241,0.0718
4,4235,2020-01-17,0,0.1432,0.2413,0.0537,0.1076
5,4236,2020-01-18,0,0.1637,0.3791,0.1432,0.2413


In [14]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import BayesianRidge

def model(df, X_var,y_var,train_size):
    '''
    This uses Bayesian Ridge to train Germany Transportation Mobility data
    '''
    y = df[y_var]
    X = df[X_var]
    X_train = X.iloc[:train_size,:]
    y_train = y.iloc[:train_size]
    X_test = X.iloc[train_size:,:]
    y_test = y.iloc[train_size:]
    clf = BayesianRidge(compute_score=True)
    clf.fit(X_train, y_train.values)
    y_pred = clf.predict(X_test)
    y_test = pd.DataFrame(y_test)
    y_test['pred']=y_pred
    y_test['date'] = df['date'][train_size:]
    return y_test

pred_df = model(ger,['new_cases','transit_lag'],'transit',80)
cols =['DEU_transit','DEU_pred_transit','date']
pred_df['date'] = pd.to_datetime(pred_df['date'])
pred_df.columns = cols
pred_df.head()

Unnamed: 0,DEU_transit,DEU_pred_transit,date
81,-0.6798,-0.690689,2020-04-03
82,-0.6698,-0.706447,2020-04-04
83,-0.6084,-0.69539,2020-04-05
84,-0.5933,-0.611582,2020-04-06
85,-0.5942,-0.599295,2020-04-07


In [15]:
g1=alt.Chart(pred_df).mark_line(point=True).encode(
    alt.X('date', scale=alt.Scale(zero=False)),
    alt.Y('DEU_transit:Q', scale=alt.Scale(zero=False)),
#     size='transit:Q',
).properties(
    width=500,
    height=320
)


g2=alt.Chart(pred_df).mark_line(point=True).encode(
    alt.X('date', scale=alt.Scale(zero=False)),
    alt.Y('DEU_pred_transit:Q', scale=alt.Scale(zero=False)),
    color=alt.ColorValue('orange'),
).properties(
    width=500,
    height=320
)

alt.vconcat(
    g1 + g2,
).resolve_legend(
    color="independent",
    size="independent"
)

In [16]:
pred_df = model(ger,['new_cases','walking_lag'],'transit',80)
cols =['DEU_walking','DEU_pred_walking','date']
pred_df['date'] = pd.to_datetime(pred_df['date'])
pred_df.columns = cols
pred_df.head()

Unnamed: 0,DEU_walking,DEU_pred_walking,date
81,-0.6798,-0.803454,2020-04-03
82,-0.6698,-0.807519,2020-04-04
83,-0.6084,-0.736676,2020-04-05
84,-0.5933,-0.426979,2020-04-06
85,-0.5942,-0.490486,2020-04-07


In [17]:
g1=alt.Chart(pred_df).mark_line(point=True).encode(
    alt.X('date', scale=alt.Scale(zero=False)),
    alt.Y('DEU_walking:Q', scale=alt.Scale(zero=False)),
#     size='transit:Q',
).properties(
    width=500,
    height=320
)


g2=alt.Chart(pred_df).mark_line(point=True).encode(
    alt.X('date', scale=alt.Scale(zero=False)),
    alt.Y('DEU_pred_walking:Q', scale=alt.Scale(zero=False)),
    color=alt.ColorValue('orange'),
).properties(
    width=500,
    height=320
)

alt.vconcat(
    g1 + g2,
).resolve_legend(
    color="independent",
    size="independent"
)

- **Conclusion:** the Herd Behavior does exist in the Covid-19 Mobility case. In this sample, the **Public Transportation** has a more significant result than walking