# Loading (made by Dengsong Wang)

In [1]:
import psycopg2
import pandas as pd
import os

In [3]:
# Reading in transformed tables
fact_table = pd.read_csv("../transform/fact_table.csv")

date_dimension = pd.read_csv("../transform/date_dimension.csv")
location_dimension = pd.read_csv("../transform/location_dimension.csv")
real_estate_dimension = pd.read_csv("../transform/real_estate.csv")
regional_weather_summaries = pd.read_csv("../transform/regional_weather_summaries.csv")
weather_station_summary = pd.read_csv("../transform/weather_station_summary.csv")

In [12]:
# change dbname, user, password, host, port to your own if you want to run it on your own database
def create_conn():
    connection = psycopg2.connect(
        dbname="4142_Database",
        user="postgres",
        password = os.getenv("DB_PASS"),
        host="localhost",
        port="5432"
    )
    return connection

conn = create_conn()
cursor = conn.cursor()

# create a table 
def create_tables(connection, query_file_path="init_conn.session.sql"):
    cursor = connection.cursor()
    
    with open(query_file_path, 'r') as file_o:
        query = file_o.read()
    
    cursor.execute(query)
    connection.commit()
    cursor.close()
    
create_tables(conn)
conn.close()

In [43]:
# load the df into the table
from sqlalchemy import create_engine

def get_engine(connection):
    return create_engine(f"postgresql+psycopg2://{connection.info.user}:{connection.info.password}@{connection.info.host}:{connection.info.port}/{connection.info.dbname}")

def load_dataframe(dataframe, table_name, connection):
    engine = get_engine(connection)
    dataframe.to_sql(table_name, engine, if_exists='replace', index=False, schema='public', method='multi')

conn = create_conn()
load_dataframe(location_dimension, 'location_dimension', conn)
load_dataframe(date_dimension, 'date_dimension', conn)
load_dataframe(real_estate_dimension, 'real_estate_dimension', conn)
load_dataframe(regional_weather_summaries, 'regional_weather_summary', conn)
load_dataframe(fact_table, 'facttable', conn)

conn.close()

In [46]:
# validate the df is loaded successfully
def show_table(table,connection):
    # eng = get_engine(connection)
    # curs = connection.cursor()
    query = f"SELECT * FROM \"{table}\""
    dataframe = pd.read_sql_query(query, connection)
    connection.close()
    return dataframe

conn = create_conn()
table_df = show_table('facttable',conn)
conn.close()

  dataframe = pd.read_sql_query(query, connection)
