In [7]:
import psycopg2
import psycopg2.extras
from optparse import OptionParser, OptionGroup
import sys

In [2]:
# Connect to an existing database
conn1 = psycopg2.connect("dbname=DB-project user=postgres password=admin")
conn2 = psycopg2.connect("dbname=DB-Project-ETL user=postgres password=admin")

In [3]:
cur1 = conn1.cursor()
cur2 = conn2.cursor()

conn1.autocommit = True
conn2.autocommit = True

# Pipeline

In [4]:
def extract_pk(cursor, table):
    cursor.execute("""
    SELECT c.column_name, c.data_type
    FROM information_schema.table_constraints tc 
    JOIN information_schema.constraint_column_usage AS ccu USING (constraint_schema, constraint_name) 
    JOIN information_schema.columns AS c ON c.table_schema = tc.constraint_schema
      AND tc.table_name = c.table_name AND ccu.column_name = c.column_name
    WHERE constraint_type = 'PRIMARY KEY' and tc.table_name = '{table}';
    """.format(table=table))

def extract_pk_values(cursor, table, pk):
    cursor.execute("""
    SELECT {pk}
    FROM {table}
    """.format(table=table, pk=pk))
    
def table_allrows(cursor, table):
    cursor.execute("""
    SELECT *
    FROM {table};
    """.format(table=table))

def insert_warehouse(cursor, value, table, pk, pk_value):
    cursor.execute("""
    DELETE FROM {table}
    WHERE {pk}={pk_value};
    INSERT INTO {table}
    VALUES {value}
    """.format(table=table, value=value, pk=pk, pk_value=pk_value))
    
def delete_warehouse(cursor, table, pk, pk_values):
    cursor.execute("""
    DELETE FROM {table}
    WHERE {pk} NOT IN {pk_values}
    """.format(table=table, pk=pk, pk_values=pk_values))
    
def record_value(records):
    pk_value = records[0]
    if type(pk_value) == str:
        pk_value = "'" + pk_value + "'"
    value = str(records).replace('None', "NULL")
    if len(records) == 1:
        value = value[:-2] + ')'
    return pk_value, value

def key_pk_values(keys):
    pk_values = '('
    for key in keys:
        if type(key[0]) == str:
            pk_values += "'{}', ".format(key[0])
        else:
            pk_values += '{}, '.format(key[0])
    pk_values = pk_values[:-2]
    pk_values +=')'
    return pk_values

In [5]:
def pipeline(table):
    # insert and update 
    with conn1.cursor() as cursor1:
        table_allrows(cursor1, table)
        records = cursor1.fetchone()
        with conn2.cursor() as cursor2:
            extract_pk(cursor2, table)
            pk = cursor2.fetchone()[0]
        while records != None:
            pk_value, value = record_value(records)
            with conn2.cursor() as cursor2:
                try:
                    insert_warehouse(cursor2, value, table, pk, pk_value)
                    conn2.commit()
                except Exception as e:
                    conn2.rollback()
                    raise
            records = cursor1.fetchone()
    # delete
    with conn1.cursor() as cursor1:
        extract_pk_values(cursor1, table, pk)
        keys = cursor1.fetchall()
        pk_values = key_pk_values(keys)
        with conn2.cursor() as cursor2:
            delete_warehouse(cursor2, table, pk, pk_values)
            conn2.commit()

In [6]:
pipeline('language_book')

# Dependencies

In [20]:
def writedeps(cursor, tbl):
    sql = """SELECT
        tc.constraint_name, tc.table_name, kcu.column_name,
        ccu.table_name AS foreign_table_name,
        ccu.column_name AS foreign_column_name
    FROM
        information_schema.table_constraints AS tc
    JOIN information_schema.key_column_usage AS kcu ON
        tc.constraint_name = kcu.constraint_name
    JOIN information_schema.constraint_column_usage AS ccu ON
        ccu.constraint_name = tc.constraint_name
    WHERE constraint_type = 'FOREIGN KEY' AND tc.table_name = '%s'"""
    cursor.execute(sql % tbl)
    for row in cursor.fetchall():
        constraint, table, column, foreign_table, foreign_column = row
        print('{} -> {} [label={}];'.format(tbl, foreign_table, constraint))


def get_tables(cursor):
    cursor.execute("SELECT tablename FROM pg_tables WHERE schemaname='public'")
    for row in cursor.fetchall():
        yield row[0]

    

In [21]:
print("Digraph F {\n")
print('ranksep=1.0; size="18.5, 15.5"; rankdir=LR;')
with conn1.cursor() as cursor1:
    for i in get_tables(cursor1):
        writedeps(cursor1, i)
    print("}")

Digraph F {

ranksep=1.0; size="18.5, 15.5"; rankdir=LR;
borrowed -> user_library [label=borrowed_user_id_fkey];
written_by -> writer [label=written_by_writer_id_fkey];
translated_by -> translator [label=translated_by_translator_id_fkey];
language_book -> languageb [label=language_book_languageb_fkey];
genre_book -> genre [label=genre_book_genre_fkey];
}


# DAG