In [None]:
from dotenv import dotenv_values
config = dotenv_values(".env")

In [35]:
import psycopg2
import os
import glob
import json

In [24]:
try:
  from google.colab import drive
  drive.mount('/content/drive', force_remount=True)
  is_local = False
except ModuleNotFoundError:
  is_local = True

In [25]:
folder_landing = "./landing" if (is_local) else "/content/drive/MyDrive/ADSDB/landing"

folder_temporal = os.path.join(folder_landing, "temporal")
folder_persistent = os.path.join(folder_landing, "persistent")

extract_dir = os.path.join(folder_persistent, "extracted")

In [26]:
table_spec = dict(
    MortICD = '',
    CountryCodes = '',
    Notes = '',
    Population = '',
    DemographicCountry = '',
    DemographicLabels = '',
    Demographic = ''
)

In [28]:
table_file = dict(
    MortICD = 'MortICD.sql',
    CountryCodes = 'CountryCodes.sql',
    Notes = 'Notes.sql',
    Population = 'Population.sql',
    DemographicCountry = 'DemographicCountry.sql',
    DemographicLabels = 'DemographicLabels.sql',
    Demographic = 'Demographic.sql',
)

In [29]:
for table in table_file:
    with open(f'../sql/{table_file[table]}', 'r') as file:
        table_spec[table] = file.read().replace('/n', '')

In [31]:
table_equi = {
    "Documentation_21June2021.doc" : None,
    "list_ctry_yrs_21June2021.xlsx" : None,
    "country_codes" : ("CountryCodes", 0),
    "notes" : ("Notes", 0),
    "pop"   : ("Population", 0),
    "MortIcd7"  : ("MortICD", 7),
    "Morticd8"  : ("MortICD", 8),
    "Morticd9"  : ("MortICD", 9),
    "Morticd10_part1" : ("MortICD", "10_1"),
    "Morticd10_part2" : ("MortICD", "10_2"),
    "Morticd10_part3" : ("MortICD", "10_3"),
    "Morticd10_part4" : ("MortICD", "10_4"),
    "Morticd10_part5" : ("MortICD", "10_5"),
    "DEM_COUNTRY.csv": ("DemographicCountry", 0),
    "DEM_LABEL.csv": ("DemographicLabels", 0),
    "DEM_DATA_NATIONAL.csv": ("Demographic", 0)
}

In [39]:
def create_table(cursor, table_type, icd_rev, timestamp):
    table_name = f"{config['SCHEMA']}.{table_type}_{icd_rev}_{timestamp}"

    cursor.execute(f'''CREATE TABLE IF NOT EXISTS {table_name} (
        {table_spec[table_type]}
    );
    ''')
    
    return table_name

def load_csv(cursor, table_name, filename):
    with open(filename, 'r') as csvfile:
        cursor.copy_expert(f'''
            COPY {table_name}
            FROM STDIN
            DELIMITER ','
            CSV HEADER;
        ''', csvfile)

In [40]:
conn = psycopg2.connect(dbname=config['DBNAME'], user=config['USER'], password=config['PASSWORD'])

In [41]:
cur = conn.cursor()
cur.execute('''CREATE SCHEMA IF NOT EXISTS ''' + config['SCHEMA'])

for meta in glob.glob(f"{folder_persistent}/extracted/*/metadata.json"):
    
    with open(meta, 'r') as f:
        metadata = json.load(f)

    folder_path = os.path.dirname(meta)
    folder_base = os.path.basename(folder_path)
    
    contents = glob.glob(f"{folder_path}/*")
    
    name_sha, _, timestamp = folder_base.rpartition("-")
    name, _, sha = name_sha.rpartition("-")
    version = sha[:4] + "_" + timestamp.partition(".")[0]

    for i in contents:
        filename = os.path.basename(i)
        if filename == "metadata.json":
            continue
    
        table = table_equi.get(filename)
    
        if table is not None:
            target_table = create_table(cur, table[0], table[1], version)
            load_csv(cur, target_table, i)

            print("LOAD", filename, "==>", target_table)
        else:
            print("SKIP", filename)
            
conn.commit()

LOAD MortIcd7 ==> formatted.MortICD_7_22b4_1642928616
LOAD Morticd8 ==> formatted.MortICD_8_9366_1642928616
LOAD Morticd9 ==> formatted.MortICD_9_71c9_1642928616
LOAD Morticd10_part1 ==> formatted.MortICD_10_1_f695_1642928617
LOAD Morticd10_part2 ==> formatted.MortICD_10_2_ffb1_1642928617
LOAD Morticd10_part3 ==> formatted.MortICD_10_3_1c6a_1642928618
LOAD Morticd10_part4 ==> formatted.MortICD_10_4_453e_1642928618
SKIP list_ctry_yrs_21June2021.xlsx
LOAD country_codes ==> formatted.CountryCodes_0_8c41_1642928619
SKIP Documentation_21June2021.doc
LOAD notes ==> formatted.Notes_0_a67b_1642928619
LOAD pop ==> formatted.Population_0_3b8f_1642928619
