In [4]:
import psycopg2
import os

In [2]:
try:
  from google.colab import drive
  drive.mount('/content/drive', force_remount=True)
  is_local = False
except ModuleNotFoundError:
  is_local = True

In [5]:
folder_landing = "./landing" if (is_local) else "/content/drive/MyDrive/ADSDB/landing"

folder_temporal = os.path.join(folder_landing, "temporal")
folder_persistent = os.path.join(folder_landing, "persistent")

extract_dir = os.path.join(folder_persistent, "extracted")

In [7]:
%ls landing/persistent/extracted

[0m[01;34mmort_availability-72696e4e26befe59c99ef76edf051c3d797350d4b7c7cb084f60f0b61f6280cc-1642852775.496732[0m[K/
[01;34mmort_country_codes-8c410820356fc572845b5281b36f638e044a565f808c4e72efc8fb69b07df6b2-1642852775.502403[0m[K/
[01;34mmort_documentation71f9e29d-7e3f-41e6-aafc-c4c1775c7aa3-eaff73ddddc2ea57057f3fc20038f0a42ea16c0442b8e28e93e8df47de50542b-1642852775.503271[0m[K/
[01;34mmorticd07-22b463f703974c247c37a33133713348800a4cc68163eb37ff7ac139406237b5-1642852775.518347[0m[K/
[01;34mmorticd08-93662076d32b3ca378d3f9bd7170ca5c2ec81dc43958c6500ec928fe72886e6d-1642852775.615834[0m[K/
[01;34mmorticd09-71c96f66c67f0ae8fb0da942e9ba1a0b050b6d60450c58997c49fe6ba16f9b6f-1642852775.732444[0m[K/
[01;34mmorticd10_part1-f695bf0a3dbff3e1d662c04e35df31322f581233fb6898a6472a43e470fd26f3-1642852776.002986[0m[K/
[01;34mmorticd10_part2-ffb1ddb02ca487f0e074977c486fe89dc61d1180041b53e5be5872f7cf61e0da-1642852776.261795[0m[K/
[01;34mmorticd10_part3-1c6ac83e205760a121b515cad8

In [68]:
table_spec = dict(
    MortICD = '''
    "Country" integer,
    "Admin1" VARCHAR (3),
    "SubDiv" VARCHAR (3),
    "Year" integer,
    "List" VARCHAR (3),
    "Cause" VARCHAR (4),
    "Sex" integer,
    "Frmat" VARCHAR (2),
    "IM_Frmat" VARCHAR (2),
    "Deaths1" integer,
    "Deaths2" integer,
    "Deaths3" integer,
    "Deaths4" integer,
    "Deaths5" integer,
    "Deaths6" integer,
    "Deaths7" integer,
    "Deaths8" integer,
    "Deaths9" integer,
    "Deaths10" integer,
    "Deaths11" integer,
    "Deaths12" integer,
    "Deaths13" integer,
    "Deaths14" integer,
    "Deaths15" integer,
    "Deaths16" integer,
    "Deaths17" integer,
    "Deaths18" integer,
    "Deaths19" integer,
    "Deaths20" integer,
    "Deaths21" integer,
    "Deaths22" integer,
    "Deaths23" integer,
    "Deaths24" integer,
    "Deaths25" integer,
    "Deaths26" integer,
    "IM_Deaths1" integer,
    "IM_Deaths2" integer,
    "IM_Deaths3" integer,
    "IM_Deaths4" integer
    ''',
    CountryCodes = '''
    "country" integer,
    "name" VARCHAR(50)
    ''',
    Notes = '''
    "country" integer,
    "year" integer,
    "note" VARCHAR (100)
    ''',
    Population = '''
    "Country" integer,
    "Admin1" VARCHAR (3),
    "SubDiv" VARCHAR (3),
    "Year" integer,
    "Sex" integer,
    "Frmat" VARCHAR (2),
    "Pop1" numeric,
    "Pop2" numeric,
    "Pop3" numeric,
    "Pop4" numeric,
    "Pop5" numeric,
    "Pop6" numeric,
    "Pop7" numeric,
    "Pop8" numeric,
    "Pop9" numeric,
    "Pop10" numeric,
    "Pop11" numeric,
    "Pop12" numeric,
    "Pop13" numeric,
    "Pop14" numeric,
    "Pop15" numeric,
    "Pop16" numeric,
    "Pop17" numeric,
    "Pop18" numeric,
    "Pop19" numeric,
    "Pop20" numeric,
    "Pop21" numeric,
    "Pop22" numeric,
    "Pop23" numeric,
    "Pop24" numeric,
    "Pop25" numeric,
    "Pop26" numeric,
    "Lb" integer
    ''',
)

In [57]:
def create_table(cursor, table_type, icd_rev, timestamp):
    table_name = f"formatted.{table_type}_{icd_rev}_{timestamp}"

    cursor.execute(f'''CREATE TABLE {table_name} (
        {table_spec[table_type]}
    );
    ''')
    
    return table_name

def load_csv(cursor, table_name, filename):
    with open(filename, 'r') as csvfile:
        cursor.copy_expert(f'''
            COPY {table_name}
            FROM STDIN
            DELIMITER ','
            CSV HEADER;
        ''', csvfile)

In [70]:
conn = psycopg2.connect("dbname=adsdb user=adsdb")

In [44]:
cur = conn.cursor()
cur.execute('''CREATE SCHEMA IF NOT EXISTS formatted''')
conn.commit()

In [52]:
cur = conn.cursor()
table_name = create_table(cur, "MortICD", 10, 2024)
print(table_name)
conn.commit()

formatted.MortICD_10_2024


In [None]:
cur = conn.cursor()
load_csv(cur, table_name, './landing/persistent/extracted/morticd10_part1-f695bf0a3dbff3e1d662c04e35df31322f581233fb6898a6472a43e470fd26f3-1642852776.002986/Morticd10_part1')
conn.commit()

In [55]:
cur = conn.cursor()
table_name = create_table(cur, "CountryCodes", 0, 2024)
print(table_name)
conn.commit()

formatted.CountryCodes_0_2024


In [59]:
cur = conn.cursor()
load_csv(cur, table_name, './landing/persistent/extracted/mort_country_codes-8c410820356fc572845b5281b36f638e044a565f808c4e72efc8fb69b07df6b2-1642852775.502403/country_codes')
conn.commit()

In [63]:
cur = conn.cursor()
table_name = create_table(cur, "Notes", 0, 2024)
print(table_name)
conn.commit()

formatted.Notes_0_2024


In [64]:
cur = conn.cursor()
load_csv(cur, table_name, './landing/persistent/extracted/mort_notes-a67b4db9d9867e9076791d77c3ce9895eb19a7c6a789f367764676ad23eaec36-1642852775.507765/notes')
conn.commit()

In [71]:
cur = conn.cursor()
table_name = create_table(cur, "Population", 0, 2025)
print(table_name)
conn.commit()

formatted.Population_0_2025


In [72]:
cur = conn.cursor()
load_csv(cur, table_name, './landing/persistent/extracted/mort_pop-3b8f463ba095690338a8d9692ab4bb0457639eefff12c9df7486c1cdaf7ac833-1642852775.50846/pop')
conn.commit()