# Regression Project2 - RDD


### Requirements: 

#### A - find data on * at least * one COVID measure for y:

either:

    1 - COVID cases
    2 - hospitalizations
    3 - deaths
#### B - Use RDD to estimate the effect of the following events in Quebec:

- D1. 20/3/2020 - lockdown

- D2. 31/8/2020 - The reopening of schools 

- D3. 25/12/2020 - lockdown

#### C - provide the following for each:

- c.1) A RDD plot similar to the ones shown above

- c.2) An interpretation of the p-value on the effect of the measure taken (the cutoff parameter)

- c.3) A justification on the design of your regression:

       The amount of time included on both sides of the cutoff (longer is not necessarily better)

       The polynomial degree (higher is not always better)

       Other regression design considerations

- c.4) A 2 paragraph explanation of your findings for that event.

#### Data Sources:
    https://www.donneesquebec.ca/recherche/dataset/covid-19-portrait-quotidien-des-cas-confirmes
    https://www.inspq.qc.ca/covid-19/donnees

    https://www.inspq.qc.ca/sites/default/files/covid/donnees/covid19-hist.csv
    the columns explained  https://github.com/SimonCoulombe/covidtwitterbot
    
    https://ourworldindata.org/explorers/coronavirus-data-explorer?zoomToSelection=true&time=2020-03-01..latest&facet=none&pickerSort=asc&pickerMetric=location&Metric=Hospital+patients&Interval=7-day+rolling+average&Relative+to+Population=true&Color+by+test+positivity=false&country=~CAN
    https://health-infobase.canada.ca/covid-19/#a3
    https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv

In [None]:
import os
import matplotlib.pyplot as plt 
import numpy as np 
from statsmodels.formula.api import ols
import pandas as pd
from patsy import dmatrices
import scipy.stats as sc
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
import seaborn as sns 
import statsmodels.api as sm

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import metrics
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import Ridge

import scipy.stats
import scipy.special

from sklearn.model_selection import train_test_split

sns.set()

####  DATAFRAMES

In [None]:
# os.getcwd()

#    - COVID CASES -
df_cov = pd.read_csv('data/graph_1-1_page_par_region.csv')
df_cov

df_cov.rename({
    'Date de résultat du test':'Date',
    'Cas confirmés':'Cases',
}, axis=1, inplace=True)

df_cov['Date'] = pd.to_datetime(df_cov['Date'])


from datetime import datetime

  # we'll use this for time calculations
def fromdate(x): return datetime.timestamp(x)/60/60/24
   
    
lst_formula = ['''Cases ~ 
        Date
        + Lockdown
        + Lockdown:Date
    ''',
              '''Cases ~ 
        Date 
        + Lockdown 
        + Date:Lockdown
    ''',
              '''Cases ~ 
        Date
        + Lockdown
        + Date:Lockdown
    ''']

models = [0, 0, 0]

lst_events = ['Lockdown', 'School Opening', 'Dec Lockdown']

dates = [
    [
        datetime.strptime('2020-03-05','%Y-%m-%d'),  # when we start fitting the data
        datetime.strptime('2020-03-20','%Y-%m-%d'),  # lockdown
        datetime.strptime('2020-04-17','%Y-%m-%d'),  # when we stop fitting the data
    ],
    [
        datetime.strptime('2020-08-17','%Y-%m-%d'),
        datetime.strptime('2020-08-31','%Y-%m-%d'),
        datetime.strptime('2020-09-14','%Y-%m-%d'),
    ],
    [
        datetime.strptime('2020-12-04','%Y-%m-%d'),
        datetime.strptime('2020-12-25','%Y-%m-%d'),
        datetime.strptime('2021-01-15','%Y-%m-%d'),
    ],
]

# convert all dates to days
dates = [
    [fromdate(i) for i in x] for x in dates
]

# convert columns to days (days since 1970, we'll subtract from it later to place it near zero)
df_cov['Date'] = df_cov['Date'].map(fromdate)

offset_time = 14  # incubation time

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (16,6), dpi = 90)
axes=[ax1, ax2, ax3]

for i in range(len(axes)):
        
    ax = axes[i]

    lockdown = dates[i][1]
    before = dates[i][0] - lockdown  # these are offsets
    after = dates[i][2] - lockdown  # these are offsets

    # --------------------------------------------

    df_cov2 = df_cov.copy()  # model fitting

    df_cov2['Date'] -= lockdown + offset_time  # 0 is not lockdown, 0 is lockdown + incubation time, etc
    df_cov2 = df_cov2.loc[df_cov2['Date'] < after]  # remove before max date
    df_cov2 = df_cov2.loc[df_cov2['Date'] > before]  # remove after min date

    df_cov2['Lockdown'] = (df_cov2['Date'] > 0).astype(int)

    # --------------------------------------------

    df_cov3 = df_cov.copy()  # display

    display_size = 6

    df_cov3['Date'] -= lockdown + offset_time
    df_cov3 = df_cov3.loc[df_cov3['Date'] < after*display_size]  # display_size times more data
    df_cov3 = df_cov3.loc[df_cov3['Date'] > before*display_size]

    # --------------------------------------------

    real_lockdown=-offset_time  # we'll draw a line when the real lockdown happened
    ax.plot([real_lockdown, real_lockdown], [0, 2000], c='#000', alpha=1.0, linewidth=0.5, label=lst_events[i])
    ax.plot([0, 0], [0, 2000], c='#888', alpha=1.0, linewidth=0.5, label='Two Week')

    # --------------------------------------------

    # both scatter and plot
    linewidth=0.5
    ax.scatter(df_cov3['Date'], df_cov3['Cases'], c='orange', alpha=1, s=2.5, label='Cases')
    ax.plot(df_cov3['Date'], df_cov3['Cases'], c='orange', alpha=1, linewidth=linewidth)

    # --------------------------------------------

    formula = lst_formula[i] 

    y, x = dmatrices(formula, df_cov2, return_type='dataframe')
    model1 = sm.OLS(y, x).fit()
    models[i] = model1
    ax.plot(df_cov2['Date'], model1.predict(x), c='blue', alpha=1.0, linewidth=1.0, label='Predict')
    ax.set_xlabel('Days')
    ax.set_ylabel('Cases')
    ax.tick_params(labelrotation=90)
    n_weeks = 6
    ax.set_xticks(np.linspace(-7*n_weeks,7*n_weeks,7))
    ax.set_xticklabels([f'Week {int(x/7)}' for x in np.linspace(-7*n_weeks,7*n_weeks,7)])
    ax.tick_params(labelrotation=90)
    ax.legend(fancybox=True, framealpha=0.2)

fig.suptitle('Effects of Events on Covid-19')
plt.tight_layout()
plt.show()

In [None]:
# df_cov['Date']-ts.dates[0]
# df_cov
# df_cov = pd.read_csv('data/graph_1-1_page_par_region.csv')
# df_cov.loc[df_cov['Cas confirmés']>0]
# df_cov2

models[2].summary()


In [None]:
# model.summary()
# ts.days

In [None]:
# y,x=dmatrices('Cases ~ Date',df_cov)

# model=sm.OLS(y,x).fit()
# model.summary()


In [None]:
#df.columns


## 1- COVID CASES

#### A- Dependances and Response definition

In [None]:
# if a common df is used, use: df.copy()


In [None]:
x_cov =[['','',]]
y_cov =[['']]

#### B- RDD for D1. 20/3/2020 | D2. 31/8/2020 | D3. 25/12/2020 

#### C.1) PLOT

#### C.2) Interpretation of the p-value on the effect of the measure taken

#### C.3) Justification on the design of your regression

#### C.4)  paragraph explanation of your findings for that event.

### 2- HOSPITALIZATIONS

#### A- Dependances and Response definition

In [None]:
# if a common df is used, use: df.copy()


#### B- RDD for D1. 20/3/2020 | D2. 31/8/2020 | D3. 25/12/2020

#### C.1) PLOT

#### C.2) Interpretation of the p-value on the effect of the measure taken

#### C.3) Justification on the design of your regression

#### C.4) paragraph explanation of your findings for that event.

### 3- DEATHS

#### A- Dependances and Response definition

In [None]:
# if a common df is used, use: df.copy()


#### B- RDD for D1. 20/3/2020 | D2. 31/8/2020 | D3. 25/12/2020

#### C.1) PLOT

#### C.2) Interpretation of the p-value on the effect of the measure taken

#### C.3) Justification on the design of your regression

#### C.4)  paragraph explanation of your findings for that event.

# - CONCLUSION -
### A 2 paragraph explanation of your findings for that event.