In [2]:
import pandas as pd
import numpy as np
import glob
import datetime

# **Load**

In [3]:
restrictions_df = pd.read_csv("restrictions.csv", sep = ";", header = 0, usecols = [2,3,4,5,6])

restrictions_df.head(20)

Unnamed: 0,intervention_category,intervention_type,description,organization,start_date
0,Case management,Other,"Canada’s Flight Plan for Navigating COVID-19, ...",Transport Canada,2020-08-14 00:00:00
1,Case management,Other,Consistent approach for air carriers to collec...,Transport Canada,2020-09-11 00:00:00
2,Health workforce,Licence reinstatement/reclassification,Emergency registration for regulated nurses su...,College of Nurses of Ontario,2020-08-04 00:00:00
3,Public information,Phase and alert level changes,Windsor-Essex public health unit entered Stage...,Office of the Premier,2020-08-12 00:00:00
4,Closures/openings,Recreation,Capacity limit for gyms and other fitness faci...,Office of the Premier,2020-08-15 00:00:00
5,Distancing,Gatherings,Limitations on indoor gatherings relaxed to al...,"Ministry of Heritage, Sport, Tourism and Cultu...",2020-08-21 00:00:00
6,Health services,Visitors,Long-term care residents permitted short-stay ...,Ministry of Long-Term Care,2020-08-28 00:00:00
7,Health services,Visitors,Visitor restrictions at long-term care homes r...,Ministry of Long-Term Care,2020-09-09 00:00:00
8,Closures/openings,Education,Public schools opened for first day of in-pers...,Office of the Premier,2020-09-08 00:00:00
9,Distancing,Gatherings,Indoor gatherings limited to 10 and outdoor ga...,Ministry of Health,2020-09-18 00:00:00


In [4]:
holiday_df = pd.read_csv("holiday.csv", sep = ";", header = 0, usecols = [2,3,4])
holiday_df.head()

Unnamed: 0,holiday_date,name,statutory
0,2020-08-02 00:00:00,New Brunswick Day,False
1,2020-08-02 00:00:00,Federal Civic Holiday,True
2,2020-08-02 00:00:00,Terry Fox Day,False
3,2020-08-02 00:00:00,Saskatchewan Day,False
4,2020-08-02 00:00:00,Colonel By Day,False


In [5]:
patient_df = pd.read_csv("patient_data.csv", sep = ",", header = 0, usecols = [6,7,8])

patient_df.head()

Unnamed: 0,Age_Group,Client_Gender,Case_AcquisitionInfo
0,20s,MALE,CC
1,50s,MALE,CC
2,40s,FEMALE,CC
3,<20,FEMALE,NO KNOWN EPI LINK
4,20s,MALE,CC


# **Process**

**Restriction**

In [8]:
#insert id column
#restrictions_df.insert(0, 'restriction_id', range(0, len(restrictions_df)))

restrictions_df.set_index('restriction_id', inplace=True)

restrictions_df.head(20)

Unnamed: 0_level_0,intervention_category,intervention_type,description,organization,start_date
restriction_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,Case management,Other,"Canada’s Flight Plan for Navigating COVID-19, ...",Transport Canada,2020-08-14 00:00:00
1,Case management,Other,Consistent approach for air carriers to collec...,Transport Canada,2020-09-11 00:00:00
2,Health workforce,Licence reinstatement/reclassification,Emergency registration for regulated nurses su...,College of Nurses of Ontario,2020-08-04 00:00:00
3,Public information,Phase and alert level changes,Windsor-Essex public health unit entered Stage...,Office of the Premier,2020-08-12 00:00:00
4,Closures/openings,Recreation,Capacity limit for gyms and other fitness faci...,Office of the Premier,2020-08-15 00:00:00
5,Distancing,Gatherings,Limitations on indoor gatherings relaxed to al...,"Ministry of Heritage, Sport, Tourism and Cultu...",2020-08-21 00:00:00
6,Health services,Visitors,Long-term care residents permitted short-stay ...,Ministry of Long-Term Care,2020-08-28 00:00:00
7,Health services,Visitors,Visitor restrictions at long-term care homes r...,Ministry of Long-Term Care,2020-09-09 00:00:00
8,Closures/openings,Education,Public schools opened for first day of in-pers...,Office of the Premier,2020-09-08 00:00:00
9,Distancing,Gatherings,Indoor gatherings limited to 10 and outdoor ga...,Ministry of Health,2020-09-18 00:00:00


**Holiday**

In [9]:
holiday_df.insert(0, 'holiday_id', range(0, len(holiday_df)))

holiday_df.set_index('holiday_id', inplace=True)

holiday_df.head(20)

Unnamed: 0_level_0,holiday_date,name,statutory
holiday_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,2020-08-02 00:00:00,New Brunswick Day,False
1,2020-08-02 00:00:00,Federal Civic Holiday,True
2,2020-08-02 00:00:00,Terry Fox Day,False
3,2020-08-02 00:00:00,Saskatchewan Day,False
4,2020-08-02 00:00:00,Colonel By Day,False
5,2020-08-02 00:00:00,Natal Day,False
6,2020-08-02 00:00:00,British Columbia Day,False
7,2020-08-02 00:00:00,August Civic Holiday,True
8,2020-08-09 00:00:00,National Peacekeepers' Day,False
9,2020-08-20 00:00:00,Gold Cup Parade Day,False


**Patient**

In [10]:

patient_df.columns = ['age_group', 'client_gender', 'case_acquisition']

patient_df = patient_df.drop_duplicates(['age_group', 'client_gender', 'case_acquisition'])


patient_df.insert(0, 'patient_id', range(0, len(patient_df)))

patient_df.set_index('patient_id', inplace=True)

print(patient_df.shape)
patient_df.head(20)

(178, 3)


Unnamed: 0_level_0,age_group,client_gender,case_acquisition
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,20s,MALE,CC
1,50s,MALE,CC
2,40s,FEMALE,CC
3,<20,FEMALE,NO KNOWN EPI LINK
4,20s,MALE,NO KNOWN EPI LINK
5,30s,FEMALE,CC
6,40s,FEMALE,OB
7,<20,FEMALE,CC
8,40s,MALE,OB
9,30s,MALE,NO KNOWN EPI LINK


# **Export data**

In [None]:
holiday_df.to_excel("holiday_v2.xlsx")
restrictions_df.to_excel("restrictions_v2.xlsx")
patient_df.to_excel("patients_v2.xlsx")

# **DB Connection**

In [None]:
from sqlalchemy import create_engine, text
import psycopg2 
import io
from config2 import PASSWORD


engine = create_engine('postgresql://belha066:'+PASSWORD+'@www.eecs.uottawa.ca:15432/group_12')


#create tables from SQL file called schema.txt
def create_tables(engine):
    sql_file = open("schema2.sql")
    escaped_sql = text(sql_file.read())
    engine.execute(escaped_sql)
    
#load data from datafram to database
def push_df_to_db(engine, df, table_name):
    try:
        conn = engine.raw_connection()
        cur = conn.cursor()
        output = io.StringIO()
        df.to_csv(output, sep='\t', header=False, index=False)
        output.seek(0)
        contents = output.getvalue()
        cur.copy_from(output, table_name, null="") # null values become ''
        conn.commit()
    except Exception as e:
        print(e)
    finally:
        conn.close()

In [None]:
create_tables(engine)

In [None]:
push_df_to_db(engine, holiday_df, "holiday")
push_df_to_db(engine, patient_df, "person")