# Regression Project2 - RDD


#### Data Source:
    https://www.inspq.qc.ca/sites/default/files/covid/donnees/covid19-hist.csv

## RDD Analysis

In [None]:
import os
import matplotlib.pyplot as plt 
import numpy as np 
from statsmodels.formula.api import ols
import pandas as pd
from patsy import dmatrices
import scipy.stats as sc
import seaborn as sns 
import statsmodels.api as sm
import scipy.stats
import scipy.special

sns.set()

In [None]:
#    - COVID CASES -
df_cov = pd.read_csv('data/graph_1-1_page_par_region.csv')
df_cov

df_cov.rename({
    'Date de résultat du test':'Date',
    'Cas confirmés':'Cases',
}, axis=1, inplace=True)

df_cov['Date'] = pd.to_datetime(df_cov['Date'])


from datetime import datetime

  # we'll use this for time calculations
def fromdate(x): return datetime.timestamp(x)/60/60/24
   
    
lst_formula = ['''Cases ~ 
        Date
        + Lockdown
        + Lockdown:Date
    ''',
              '''Cases ~ 
        Date 
        + Lockdown 
        + Date:Lockdown
    ''',
              '''Cases ~ 
        Date
        + Lockdown
        + Date:Lockdown
    ''']

models = [0, 0, 0]

lst_events = ['Lockdown', 'School Opening', 'Dec Lockdown']

dates = [
    [
        datetime.strptime('2020-03-05','%Y-%m-%d'),  # when we start fitting the data
        datetime.strptime('2020-03-20','%Y-%m-%d'),  # lockdown
        datetime.strptime('2020-04-17','%Y-%m-%d'),  # when we stop fitting the data
    ],
    [
        datetime.strptime('2020-08-17','%Y-%m-%d'),
        datetime.strptime('2020-08-31','%Y-%m-%d'),
        datetime.strptime('2020-09-14','%Y-%m-%d'),
    ],
    [
        datetime.strptime('2020-12-04','%Y-%m-%d'),
        datetime.strptime('2020-12-25','%Y-%m-%d'),
        datetime.strptime('2021-01-15','%Y-%m-%d'),
    ],
]

# convert all dates to days
dates = [
    [fromdate(i) for i in x] for x in dates
]

# convert columns to days (days since 1970, we'll subtract from it later to place it near zero)
df_cov['Date'] = df_cov['Date'].map(fromdate)

offset_time = 14  # incubation time

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (16,6), dpi = 90)
axes=[ax1, ax2, ax3]

for i in range(len(axes)):
        
    ax = axes[i]

    lockdown = dates[i][1]
    before = dates[i][0] - lockdown  # these are offsets
    after = dates[i][2] - lockdown  # these are offsets

    # --------------------------------------------

    df_cov2 = df_cov.copy()  # model fitting

    df_cov2['Date'] -= lockdown + offset_time  # 0 is not lockdown, 0 is lockdown + incubation time, etc
    df_cov2 = df_cov2.loc[df_cov2['Date'] < after]  # remove before max date
    df_cov2 = df_cov2.loc[df_cov2['Date'] > before]  # remove after min date

    df_cov2['Lockdown'] = (df_cov2['Date'] > 0).astype(int)

    # --------------------------------------------

    df_cov3 = df_cov.copy()  # display

    display_size = 6

    df_cov3['Date'] -= lockdown + offset_time
    df_cov3 = df_cov3.loc[df_cov3['Date'] < after*display_size]  # display_size times more data
    df_cov3 = df_cov3.loc[df_cov3['Date'] > before*display_size]

    # --------------------------------------------

    real_lockdown=-offset_time  # we'll draw a line when the real lockdown happened
    ax.plot([real_lockdown, real_lockdown], [0, 2000], c='#000', alpha=1.0, linewidth=0.5, label=lst_events[i])
    ax.plot([0, 0], [0, 2000], c='#888', alpha=1.0, linewidth=0.5, label='Two Week')

    # --------------------------------------------

    # both scatter and plot
    linewidth=0.5
    ax.scatter(df_cov3['Date'], df_cov3['Cases'], c='orange', alpha=1, s=2.5, label='Cases')
    ax.plot(df_cov3['Date'], df_cov3['Cases'], c='orange', alpha=1, linewidth=linewidth)

    # --------------------------------------------

    formula = lst_formula[i] 

    y, x = dmatrices(formula, df_cov2, return_type='dataframe')
    model1 = sm.OLS(y, x).fit()
    models[i] = model1
    ax.plot(df_cov2['Date'], model1.predict(x), c='blue', alpha=1.0, linewidth=1.0, label='Predict')
    ax.set_xlabel('Days')
    ax.set_ylabel('Cases')
    ax.tick_params(labelrotation=90)
    n_weeks = 6
    ax.set_xticks(np.linspace(-7*n_weeks,7*n_weeks,7))
    ax.set_xticklabels([f'Week {int(x/7)}' for x in np.linspace(-7*n_weeks,7*n_weeks,7)])
    ax.tick_params(labelrotation=90)
    ax.legend(fancybox=True, framealpha=0.2)

fig.suptitle('Effects of Events on Covid-19')
plt.tight_layout()
plt.show()

## Model Summaries

In [None]:
models[0].summary()


#### All p-values are significant (P<0.05) except Lockdown, showing that there was not an immediate effect for the lockdown on the number of Covid-19 cases. 

#### A stronger impact of the lockdown on Covid-19 cases is seen over time through the significant p-value(P<0.000) for Lockdown:Date combined.


In [None]:
models[1].summary()

#### All p-values are significant (P<0.05).

#### This demonstrates that there was an effect of schools reopening in August 2020 on the number of Covid-19 cases. 


In [None]:
models[2].summary()

#### All p-values are significant (P<0.05).

#### This demonstrates that the second lockdown in December 2020 had a significant impact on the number of Covid-19 cases. 


## Justifications

Time spans three weeks pre- and post-event, except the start date, as data collection only commenced March 5th 2020. This is to allow time for incubation periods and testing time for Covid-19.

No polynomial features added to the RDD model as they did not allow for accurate interpretation of the data as seen by the p-values here. 

Extra???

## Two paragraph explanation 

We performed a RDD analysis on data collected on Covid-19 cases in Quebec, Canada during 2020. The three significant events used in the analysis were the first lockdown in March, the schools reopening in August, and the second lockdown in December. All of the events cause a significant change in the number of recorded Covid-19 cases.

The first lockdown showed a delayed decrease in the number of cases, as immediately after the lockdown there was no significant change, but with offset time to account for people getting sick and then tested, there was significance (P<0.05) decrease. The schools reopening causes a significant increase (P<0.05) in the number of cases. Finally, the second lockdown caused a significant decrease (P<0.05) in the number of Covid-19 cases.

