In [4]:
import psycopg2
import os
import glob

In [2]:
try:
  from google.colab import drive
  drive.mount('/content/drive', force_remount=True)
  is_local = False
except ModuleNotFoundError:
  is_local = True

In [5]:
folder_landing = "./landing" if (is_local) else "/content/drive/MyDrive/ADSDB/landing"

folder_temporal = os.path.join(folder_landing, "temporal")
folder_persistent = os.path.join(folder_landing, "persistent")

extract_dir = os.path.join(folder_persistent, "extracted")

In [68]:
table_spec = dict(
    MortICD = '''
    "Country" integer,
    "Admin1" VARCHAR (3),
    "SubDiv" VARCHAR (3),
    "Year" integer,
    "List" VARCHAR (3),
    "Cause" VARCHAR (4),
    "Sex" integer,
    "Frmat" VARCHAR (2),
    "IM_Frmat" VARCHAR (2),
    "Deaths1" integer,
    "Deaths2" integer,
    "Deaths3" integer,
    "Deaths4" integer,
    "Deaths5" integer,
    "Deaths6" integer,
    "Deaths7" integer,
    "Deaths8" integer,
    "Deaths9" integer,
    "Deaths10" integer,
    "Deaths11" integer,
    "Deaths12" integer,
    "Deaths13" integer,
    "Deaths14" integer,
    "Deaths15" integer,
    "Deaths16" integer,
    "Deaths17" integer,
    "Deaths18" integer,
    "Deaths19" integer,
    "Deaths20" integer,
    "Deaths21" integer,
    "Deaths22" integer,
    "Deaths23" integer,
    "Deaths24" integer,
    "Deaths25" integer,
    "Deaths26" integer,
    "IM_Deaths1" integer,
    "IM_Deaths2" integer,
    "IM_Deaths3" integer,
    "IM_Deaths4" integer
    ''',
    CountryCodes = '''
    "country" integer,
    "name" VARCHAR(50)
    ''',
    Notes = '''
    "country" integer,
    "year" integer,
    "note" VARCHAR (100)
    ''',
    Population = '''
    "Country" integer,
    "Admin1" VARCHAR (3),
    "SubDiv" VARCHAR (3),
    "Year" integer,
    "Sex" integer,
    "Frmat" VARCHAR (2),
    "Pop1" numeric,
    "Pop2" numeric,
    "Pop3" numeric,
    "Pop4" numeric,
    "Pop5" numeric,
    "Pop6" numeric,
    "Pop7" numeric,
    "Pop8" numeric,
    "Pop9" numeric,
    "Pop10" numeric,
    "Pop11" numeric,
    "Pop12" numeric,
    "Pop13" numeric,
    "Pop14" numeric,
    "Pop15" numeric,
    "Pop16" numeric,
    "Pop17" numeric,
    "Pop18" numeric,
    "Pop19" numeric,
    "Pop20" numeric,
    "Pop21" numeric,
    "Pop22" numeric,
    "Pop23" numeric,
    "Pop24" numeric,
    "Pop25" numeric,
    "Pop26" numeric,
    "Lb" integer
    ''',
)

In [112]:
table_equi = {
    "mort_availability" : None,
    "mort_country_codes" : ("CountryCodes", 0),
    "mort_documentation71f9e29d-7e3f-41e6-aafc-c4c1775c7aa3" : None,
    "mort_notes" : ("Notes", 0),
    "mort_pop"   : ("Population", 0),
    "morticd07"  : ("MortICD", 7),
    "morticd08"  : ("MortICD", 8),
    "morticd09"  : ("MortICD", 9),
    "morticd10_part1" : ("MortICD", 10),
    "morticd10_part2" : ("MortICD", 10),
    "morticd10_part3" : ("MortICD", 10),
    "morticd10_part4" : ("MortICD", 10),
    "morticd10_part5" : ("MortICD", 10),
}

In [124]:
def create_table(cursor, table_type, icd_rev, timestamp):
    table_name = f"formatted.{table_type}_{icd_rev}_{timestamp}"

    cursor.execute(f'''CREATE TABLE IF NOT EXISTS {table_name} (
        {table_spec[table_type]}
    );
    ''')
    
    return table_name

def load_csv(cursor, table_name, filename):
    with open(filename, 'r') as csvfile:
        cursor.copy_expert(f'''
            COPY {table_name}
            FROM STDIN
            DELIMITER ','
            CSV HEADER;
        ''', csvfile)

In [125]:
conn = psycopg2.connect(dbname="adsdb", user="adsdb")

In [126]:
cur = conn.cursor()

cur.execute('''CREATE SCHEMA IF NOT EXISTS formatted''')

for i in glob.glob(f"{folder_persistent}/extracted/*/*"):
    if "metadata.json" == os.path.basename(i):
        continue

    folder_path = os.path.dirname(i)
    folder_base = os.path.basename(folder_path)
    
    name_sha, _, timestamp = folder_base.rpartition("-")
    name, _, sha = name_sha.rpartition("-")
    
    table = table_equi[name]
    
    if table is not None:
        target_table = create_table(cur, table[0], table[1], timestamp.partition(".")[0])
        load_csv(cur, target_table, i)

        print(target_table)

conn.commit()

formatted.CountryCodes_0_1642852775
formatted.Notes_0_1642852775
formatted.Population_0_1642852775
formatted.MortICD_7_1642852775
formatted.MortICD_8_1642852775
formatted.MortICD_9_1642852775
formatted.MortICD_10_1642852776
formatted.MortICD_10_1642852776
formatted.MortICD_10_1642852776
formatted.MortICD_10_1642852776
formatted.MortICD_10_1642852776


## Obsolete

In [52]:
cur = conn.cursor()
table_name = create_table(cur, "MortICD", 10, 2024)
load_csv(cur, table_name, './landing/persistent/extracted/morticd10_part1-f695bf0a3dbff3e1d662c04e35df31322f581233fb6898a6472a43e470fd26f3-1642852776.002986/Morticd10_part1')
print(table_name)
conn.commit()

In [55]:
cur = conn.cursor()
table_name = create_table(cur, "CountryCodes", 0, 2024)
load_csv(cur, table_name, './landing/persistent/extracted/mort_country_codes-8c410820356fc572845b5281b36f638e044a565f808c4e72efc8fb69b07df6b2-1642852775.502403/country_codes')
print(table_name)
conn.commit()

In [64]:
cur = conn.cursor()
table_name = create_table(cur, "Notes", 0, 2024)
load_csv(cur, table_name, './landing/persistent/extracted/mort_notes-a67b4db9d9867e9076791d77c3ce9895eb19a7c6a789f367764676ad23eaec36-1642852775.507765/notes')
print(table_name)
conn.commit()

In [72]:
cur = conn.cursor()
table_name = create_table(cur, "Population", 0, 2025)
load_csv(cur, table_name, './landing/persistent/extracted/mort_pop-3b8f463ba095690338a8d9692ab4bb0457639eefff12c9df7486c1cdaf7ac833-1642852775.50846/pop')
print(table_name)
conn.commit()