In [1]:
### CREATE TABLE

from typing import NewType
#pytopn ver up from 3.6

import psycopg2

#Check type of variable use in drop_tables
#create new type
PostgresCursor = NewType("PostgresCursor", psycopg2.extensions.cursor)
PostgresConn = NewType("PostgresConn", psycopg2.extensions.connection)
#Create SQL
table_drop_events = "DROP TABLE IF EXISTS events"
table_drop_actors = "DROP TABLE IF EXISTS actors"
table_drop_org = "DROP TABLE IF EXISTS org"

table_create_actors = """
    CREATE TABLE IF NOT EXISTS actors (
        id int,
        name text,
        org_id int,
        PRIMARY KEY(id),
        CONSTRAINT fk_org FOREIGN KEY(org_id) REFERENCES org(id)
    )
"""

table_create_events = """
    CREATE TABLE IF NOT EXISTS events (
        id text,
        type text,
        actor_id int,
        PRIMARY KEY(id),
        CONSTRAINT fk_actor FOREIGN KEY(actor_id) REFERENCES actors(id)
    )
"""

table_create_org = """
    CREATE TABLE IF NOT EXISTS org (
        id int,
        name text,
        PRIMARY KEY(id)
    )
"""

#Lis for table to create , order is importance, create main first
create_table_queries = [
    table_create_org,
    table_create_actors,
    table_create_events,
]
#Lis for table to delete
drop_table_queries = [
    table_drop_events,
    table_drop_actors,
    table_drop_org,
]

def drop_tables(cur: PostgresCursor, conn: PostgresConn) -> None:
    """
    Drops each table using the queries in `drop_table_queries` list.
    """
    for query in drop_table_queries:
        cur.execute(query)
        conn.commit()

def create_tables(cur: PostgresCursor, conn: PostgresConn) -> None:
    """
    Creates each table using the queries in `create_table_queries` list.
    """
    for query in create_table_queries:
        cur.execute(query)
        conn.commit()

def main():
    """
    - Drops (if exists) and Creates the sparkify database.
    - Establishes connection with the sparkify database and gets
    cursor to it.
    - Drops all the tables.
    - Creates all tables needed.
    - Finally, closes the connection.
    """
    conn = psycopg2.connect(
        "host=127.0.0.1 dbname=postgres user=postgres password=postgres"
    )
    cur = conn.cursor()

    drop_tables(cur, conn)
    create_tables(cur, conn)

    conn.close()

if __name__ == "__main__":
    main()

In [2]:
### ETL

import pandas as pd

import psycopg2

def process(cur, conn):
    loopcount = 0
    
    # Insert data into tables here
    #orgIDValue = int(input("org ID Value: "))
    #orgNameValue = str(input("org name Value: "))
    
    df = pd.read_json('data/org.json')
    print(df.head(20))
    while loopcount < len(df['id']) :
        cursor = conn.cursor()
        cursor.execute("INSERT INTO org (id, name) VALUES(%s, %s) ON CONFLICT (id) DO NOTHING;",(int(df['id'][loopcount]), str(df['name'][loopcount])))
        #print(int(df['id'][loopcount]))
        #print(str(df['name'][loopcount]))
        conn.commit()
        loopcount += 1
    
    loopcount = 0
    df = pd.read_json('data/actors.json')
    print(df.head(20))
    while loopcount < len(df['id']) :
        cursor = conn.cursor()
        cursor.execute("INSERT INTO actors (id, name,org_id) VALUES(%s, %s, %s) ON CONFLICT (id) DO NOTHING;",(int(df['id'][loopcount]), str(df['name'][loopcount]), int(df['org_id'][loopcount])))
        #print(int(df['id'][loopcount]))
        #print(str(df['name'][loopcount]))
        #print(int(df['org_id'][loopcount]))
        conn.commit()
        loopcount += 1
    
    loopcount = 0
    df = pd.read_json('data/events.json')
    print(df.head(20))
    while loopcount < len(df['id']) :
        cursor = conn.cursor()
        cursor.execute("INSERT INTO events (id, type,actor_id) VALUES(%s, %s, %s) ON CONFLICT (id) DO NOTHING;",(int(df['id'][loopcount]), str(df['type'][loopcount]), int(df['actor_id'][loopcount])))
        #print(int(df['id'][loopcount]))
        #print(str(df['type'][loopcount]))
        #print(int(df['actor_id'][loopcount]))
        conn.commit()
        loopcount += 1
        
        


def main():
    conn = psycopg2.connect(
        "host=127.0.0.1 dbname=postgres user=postgres password=postgres"
    )
    cur = conn.cursor()

    process(cur, conn)

    conn.close()


if __name__ == "__main__":
    main()

   id      name
0   1  CompanyA
1   2  CompanyB
   id    name  org_id
0   1  ActorA       1
1   2  ActorB       1
2   3  ActorC       2
   id      type  actor_id
0   1     Music         1
1   2  TalkShow         2
