In [1]:
import pandas as pd
from secrets_config import source_db_user, source_db_password, source_db_server_name, source_db_database_name
import jinja2 as j2 

# import libraries for sql 
from sqlalchemy import create_engine
from sqlalchemy.engine import URL

In [2]:
# create connection to the source database 
source_connection_url = URL.create(
    drivername = "postgresql+pg8000", 
    username = source_db_user,
    password = source_db_password,
    host = source_db_server_name, 
    port = 5432,
    database = source_db_database_name, 
)

source_engine = create_engine(source_connection_url)

In [3]:
import os 
import logging 


# create function to extract data from a source database table using a `select` query from a SQL file 
def extract_from_database(table_name, engine, path="extract_queries")->pd.DataFrame:
    """
    Extracts data from a table using a SQL query specified in a SQL File 
    - `table_name`: the name of the table (without .sql)
    - `path`: the path to the extract queries directory containing the sql files. defaults to `extract_queries`
    """
    logging.basicConfig(level=logging.INFO, format="[%(levelname)s][%(asctime)s]: %(message)s")
    
    if f"{table_name}.sql" in os.listdir(path):
        logging.info(f"Extracting table: {table_name}")
    
        # read sql contents into a variable 
        with open(f"{path}/{table_name}.sql") as f: 
            raw_sql = f.read()

        # parse sql using jinja 
        parsed_sql = j2.Template(raw_sql).render(source_table = table_name, engine=engine)
        # # execute parsed sql 
        df = pd.read_sql(sql=parsed_sql, con=engine)

        logging.info(f"Successfully extracted table: {table_name}, rows extracted: {len(df)}")
        return df 
    else: 
        logging.error(f"Could not find table: {table_name}")

In [4]:
# overwrite data to the target database 
def upsert_to_database(df: pd.DataFrame, table_name: str, engine)->bool: 
    """
    Upsert dataframe to a database table 
    - `df`: pandas dataframe 
    - `table`: name of the target table 
    - `engine`: connection engine to database 
    """
    logging.basicConfig(level=logging.INFO, format="[%(levelname)s][%(asctime)s]: %(message)s")
    logging.info(f"Writing to table: {table_name}")
    df.to_sql(name=table_name, con=engine, if_exists="replace", index=False)
    logging.info(f"Successful write to table: {table_name}, rows inserted/updated: {len(df)}")
    return True 

In [5]:
from secrets_config import target_db_user, target_db_password, target_db_server_name, target_db_database_name
# create connection to the target database 
target_connection_url = URL.create(
    drivername = "postgresql+pg8000", 
    username = target_db_user,
    password = target_db_password,
    host = target_db_server_name, 
    port = 5432,
    database = target_db_database_name, 
)

target_engine = create_engine(target_connection_url)

In [6]:
# create an extract and load pipeline 
def extract_load_pipeline(source_engine, target_engine, path="extract_queries"): 
    for file in os.listdir(path):
        table_name = file.replace(".sql", "")
        df = extract_from_database(table_name=table_name, engine=source_engine, path=path)
        upsert_to_database(df=df, table_name=table_name, engine=target_engine)

In [7]:
# run the extract and load pipeline for sql files that are found 
extract_load_pipeline(
    source_engine=source_engine, 
    target_engine=target_engine, 
    path="extract_queries"
)

[INFO][2022-07-23 22:33:06,448]: Extracting table: customer
[INFO][2022-07-23 22:33:06,569]: Successfully extracted table: customer, rows extracted: 599
[INFO][2022-07-23 22:33:06,570]: Writing to table: customer
[INFO][2022-07-23 22:33:06,996]: Successful write to table: customer, rows inserted/updated: 599
[INFO][2022-07-23 22:33:06,997]: Extracting table: film_category
[INFO][2022-07-23 22:33:07,036]: Successfully extracted table: film_category, rows extracted: 1000
[INFO][2022-07-23 22:33:07,037]: Writing to table: film_category
[INFO][2022-07-23 22:33:07,526]: Successful write to table: film_category, rows inserted/updated: 1000
[INFO][2022-07-23 22:33:07,527]: Extracting table: film
[INFO][2022-07-23 22:33:07,593]: Successfully extracted table: film, rows extracted: 1000
[INFO][2022-07-23 22:33:07,594]: Writing to table: film
[INFO][2022-07-23 22:33:08,281]: Successful write to table: film, rows inserted/updated: 1000
[INFO][2022-07-23 22:33:08,282]: Extracting table: staff
[INFO