In [1]:
# etl_load.ipynb
import pandas as pd
import sqlite3
import os

# Set correct working directory
project_folder = r'c:\Users\Admin\Desktop\ETL_Midterm_Queen_897'
os.chdir(project_folder)
print("Current working directory:", os.getcwd())

# Verify input files
if not os.path.exists('data/transformed/transformed_full.csv') or not os.path.exists('data/transformed/transformed_incremental.csv'):
    raise FileNotFoundError("Ensure transformed_full.csv and transformed_incremental.csv are in the data/transformed/ folder")

# Load transformed data
full_df = pd.read_csv('data/transformed/transformed_full.csv')
incremental_df = pd.read_csv('data/transformed/transformed_incremental.csv')

# Create loaded directory
os.makedirs('loaded', exist_ok=True)

# Connect to SQLite databases
full_conn = sqlite3.connect('loaded/full_data.db')
incremental_conn = sqlite3.connect('loaded/incremental_data.db')

# Load data into SQLite
full_df.to_sql('full_data', full_conn, if_exists='replace', index=False)
incremental_df.to_sql('incremental_data', incremental_conn, if_exists='replace', index=False)

# Verify data with SQL query
print("Full Data - First 5 Rows:")
full_query = pd.read_sql_query("SELECT * FROM full_data LIMIT 5", full_conn)
print(full_query)

print("\nIncremental Data - First 5 Rows:")
incremental_query = pd.read_sql_query("SELECT * FROM incremental_data LIMIT 5", incremental_conn)
print(incremental_query)

# Close connections
full_conn.close()
incremental_conn.close()
print("\nData loaded into SQLite databases in loaded/ folder.")

Current working directory: c:\Users\Admin\Desktop\ETL_Midterm_Queen_897
Full Data - First 5 Rows:
   order_id customer_name product  quantity  unit_price  order_date   region  \
0         1         Diana  Tablet       2.0  500.000000  2024-01-20    South   
1         2           Eve  Laptop       2.0  607.142857  2024-04-29    North   
2         3       Charlie  Laptop       2.0  250.000000  2024-01-08  Unknown   
3         4           Eve  Laptop       2.0  750.000000  2024-01-07     West   
4         5           Eve  Tablet       3.0  607.142857  2024-03-07    South   

   total_price  
0  1000.000000  
1  1214.285714  
2   500.000000  
3  1500.000000  
4  1821.428571  

Incremental Data - First 5 Rows:
   order_id customer_name product  quantity  unit_price  order_date   region  \
0       101         Alice  Laptop       1.5       900.0  2024-05-09  Central   
1       102       Unknown  Laptop       1.0       300.0  2024-05-07  Central   
2       103       Unknown  Laptop       1.0  