# Installation

In [None]:
!pip install pandas
!pip install numpy
!pip install sqlalchemy
!pip install pymysql

Import libraries

In [1]:
import pandas as pd
import numpy as np
import sqlalchemy as db
import json

# Extract

Read CSV files

Load Progress

In [2]:
doctors = pd.read_parquet('cleaned/doctors.parquet')
clinics = pd.read_parquet('cleaned/clinics.parquet')
px = pd.DataFrame()
for i in range(8):
    temp = pd.read_parquet(f"cleaned/px_{i+1}.parquet")
    px = pd.concat([px,temp])
appointments = pd.read_parquet('cleaned/appointments.parquet')

In [3]:
del doctors['age']

In [4]:
merge_df = pd.merge(appointments, clinics, on = 'clinicid', how = 'inner')

In [5]:
merge_df = pd.merge(merge_df, doctors, on = 'doctorid', how = 'inner')

In [6]:
merge_df = pd.merge(merge_df, px, on = 'pxid', how = 'inner')

In [7]:
merge_df.rename(columns={'age': 'patientage'}, inplace=True)
merge_df.rename(columns={'gender': 'patientgender'}, inplace=True)

In [8]:
merge_df = merge_df.iloc[:,3:]

In [9]:
merge_df['RegionName'].unique()

array(['National Capital Region (NCR)', 'Central Visayas (VII)',
       'SOCCSKSARGEN (Cotabato Region) (XII)', 'CALABARZON (IV-A)',
       'Northern Mindanao (X)', 'Ilocos Region (I)', 'Bicol Region (V)',
       'Eastern Visayas (VIII)', 'Western Visayas (VI)',
       'Central Luzon (III)'], dtype=object)

In [10]:
luzon_df = merge_df[merge_df['RegionName'].isin(['National Capital Region (NCR)', 
                                                 'CALABARZON (IV-A)', 
                                                 'Ilocos Region (I)', 
                                                 'Bicol Region (V)', 
                                                 'Central Luzon (III)'])]

visayas_mindanao_df = merge_df[merge_df['RegionName'].isin(['Central Visayas (VII)', 
                                                            'SOCCSKSARGEN (Cotabato Region) (XII)', 
                                                            'Northern Mindanao (X)', 
                                                            'Eastern Visayas (VIII)', 
                                                            'Western Visayas (VI)'])]

In [11]:
merge_df.columns

Index(['apptid', 'status', 'TimeQueued', 'QueueDate', 'StartTime', 'EndTime',
       'type', 'Virtual', 'hospitalname', 'IsHospital', 'City', 'Province',
       'RegionName', 'mainspecialty', 'patientage', 'patientgender'],
      dtype='object')

In [12]:
np.sort(merge_df['type'].unique())

array(['Consultation', 'Inpatient'], dtype=object)

# Database Operations
### Connect to MySQL

abstracted `connection.json` 

In [25]:
connection = open('connection.json')
connection = json.load(connection)
host = connection['host']
user = connection['user']
password = connection['password']
port0 = connection['port0']
port1 = connection['port1']
port2 = connection['port2']

In [26]:
db_name0 = 'center'
engine0 = db.create_engine('mysql+pymysql://' + user + ':' + password + '@' + host + ':' + port0)

In [27]:
db_name1 = 'node_2'
engine1 = db.create_engine('mysql+pymysql://' + user + ':' + password + '@' + host + ':' + port1)

In [28]:
db_name2 = 'node_3'
engine2 = db.create_engine('mysql+pymysql://' + user + ':' + password + '@' + host + ':' + port2)

Define the schemas of CSV files

In [29]:
# appointments schema
appointments_schema = {
    "pxid":db.types.VARCHAR(32),
    "clinicid":db.types.VARCHAR(32),
    "doctorid":db.types.VARCHAR(32),
    "apptid":db.types.VARCHAR(32),
    "status":db.types.Enum("Queued", "Complete", "Serving","Cancel","NoShow","Skip"),
    "TimeQueued":db.types.DATETIME(),
    "QueueDate":db.types.DATE(),
    "StartTime":db.types.DATETIME(),
    "EndTime":db.types.DATETIME(),
    "type":db.types.Enum("Consultation","Inpatient"),
    "Virtual":db.types.BOOLEAN(),
    "hospitalname":db.types.VARCHAR(255),
    "IsHospital":db.types.BOOLEAN(),
    "City":db.types.VARCHAR(255),
    "Province":db.types.VARCHAR(255),
    "RegionName":db.types.VARCHAR(255),
    "mainspecialty":db.types.VARCHAR(255),
    "patientage":db.dialects.mysql.TINYINT(255),
    "patientgender":db.types.Enum("MALE", "FEMALE")
}

Insert the merge_df to central node

In [30]:
# central node
with engine0.connect() as conn:
    conn.execute(db.text("DROP DATABASE IF EXISTS `" + db_name0 + "`;"))
    conn.execute(db.text("CREATE DATABASE `" + db_name0 + "`;"))
engine0 = db.create_engine('mysql+pymysql://' + user + ':' + password + '@' + host + ':' + port0 + '/' + db_name0)

In [31]:
merge_df.to_sql('appointments', engine0, if_exists='replace', index=False, dtype=appointments_schema)

320140

In [32]:
with engine0.connect() as conn:
    conn.execute(db.text("""
        ALTER TABLE `""" + db_name0 + """`.`appointments` 
        CHANGE COLUMN `apptid` `apptid` VARCHAR(32) NOT NULL ,
        ADD PRIMARY KEY (`apptid`);
    """))
    print("apptid IS A PRIMARY KEY IN CENTRAL NODE")

apptid IS A PRIMARY KEY IN CENTRAL NODE


Insert the luzon_df to node 2

In [33]:
# node 2
with engine1.connect() as conn:
    conn.execute(db.text("DROP DATABASE IF EXISTS `" + db_name1 + "`;"))
    conn.execute(db.text("CREATE DATABASE `" + db_name1 + "`;"))
engine1 = db.create_engine('mysql+pymysql://' + user + ':' + password + '@' + host + ':' + port1 + '/' + db_name1)

In [34]:
luzon_df.to_sql('appointments', engine1, if_exists='replace', index=False, dtype=appointments_schema)

261600

In [35]:
with engine1.connect() as conn:
    conn.execute(db.text("""
        ALTER TABLE `""" + db_name1 + """`.`appointments` 
        CHANGE COLUMN `apptid` `apptid` VARCHAR(32) NOT NULL ,
        ADD PRIMARY KEY (`apptid`);
    """))
    print("apptid IS A PRIMARY KEY IN NODE 2")

apptid IS A PRIMARY KEY IN NODE 2


Insert the visayas_mindanao_df to node 3

In [36]:
# node 3
with engine2.connect() as conn:
    conn.execute(db.text("DROP DATABASE IF EXISTS `" + db_name2 + "`;"))
    conn.execute(db.text("CREATE DATABASE `" + db_name2 + "`;"))
engine2 = db.create_engine('mysql+pymysql://' + user + ':' + password + '@' + host + ':' + port2 + '/' + db_name2)

In [37]:
visayas_mindanao_df.to_sql('appointments', engine2, if_exists='replace', index=False, dtype=appointments_schema)

58540

In [38]:
with engine2.connect() as conn:
    conn.execute(db.text("""
        ALTER TABLE `""" + db_name2 + """`.`appointments` 
        CHANGE COLUMN `apptid` `apptid` VARCHAR(32) NOT NULL ,
        ADD PRIMARY KEY (`apptid`);
    """))
    print("apptid IS A PRIMARY KEY IN NODE 3")

apptid IS A PRIMARY KEY IN NODE 3
