# ETL Load Notebook

#### Libraries

In [19]:
import pandas as pd
import sqlite3
from sqlalchemy import create_engine
import os

#### Loading Transformed Data

In [None]:
transformed_full = pd.read_csv('transformed/transformed_full.csv') # Load the full transformed dataset
transformed_incremental = pd.read_csv('transformed/transformed_incremental.csv') # Load the incremental transformed dataset
print(transformed_full.head())

   order_id customer_name product  quantity  unit_price  order_date region  \
0         1         Diana  Tablet       2.0       500.0  2024-01-20  SOUTH   
1         2           Eve  Laptop       2.0       250.0  2024-04-29  NORTH   
2         3       Charlie  Laptop       2.0       250.0  2024-01-08   WEST   
3         4           Eve  Laptop       2.0       750.0  2024-01-07   WEST   
4         5           Eve  Tablet       3.0       500.0  2024-03-07  SOUTH   

   total_price     region_group  customer_order_frequency customer_tier  
0       1000.0  Southern_Region                        24           VIP  
1        500.0  Northern_Region                        17           VIP  
2        500.0   Western_Region                        11           VIP  
3       1500.0   Western_Region                        17           VIP  
4       1500.0  Southern_Region                        17           VIP  


#### Create Database storage

In [None]:
os.makedirs('loaded', exist_ok=True) # Create the 'loaded' directory if it doesn't exist

#### Creating SQL database for full data

In [None]:
engine_full = create_engine('sqlite:///loaded/full_data.db') # Create a SQLite engine for the full dataset
transformed_full.to_sql('full_data', engine_full, if_exists='replace', index=False) # Load the full dataset into the SQLite database

99

#### Creatung SQL database for incremental data

In [None]:
engine_incremental = create_engine('sqlite:///loaded/incremental_data.db') # Create a SQLite engine for the incremental dataset
transformed_incremental.to_sql('full_data', engine_incremental, if_exists='replace', index=False) # Load the incremental dataset into the SQLite database

9

## Querying the database

#### VErifying connection

In [None]:
conn_full = sqlite3.connect('loaded/full_data.db') # Connect to the full dataset SQLite database

#### Verification Query

In [None]:
verification_query = "SELECT * FROM full_data LIMIT 5" # Query to verify the data , from the exam requirements
verification_result = pd.read_sql_query(verification_query, conn_full) # Execute the query and load the first 5 records into a DataFrame
print("First 5 records from full database:")
print(verification_result)# Print the verification result
verification_result.to_csv('loaded/verification_sample.csv', index=False) # Save the verification result to a CSV file(part of the exam requirements)
conn_full.close()# Close the connection to the full dataset database

First 5 records from full database:
   order_id customer_name product  quantity  unit_price  order_date region  \
0         1         Diana  Tablet       2.0       500.0  2024-01-20  SOUTH   
1         2           Eve  Laptop       2.0       250.0  2024-04-29  NORTH   
2         3       Charlie  Laptop       2.0       250.0  2024-01-08   WEST   
3         4           Eve  Laptop       2.0       750.0  2024-01-07   WEST   
4         5           Eve  Tablet       3.0       500.0  2024-03-07  SOUTH   

   total_price     region_group  customer_order_frequency customer_tier  
0       1000.0  Southern_Region                        24           VIP  
1        500.0  Northern_Region                        17           VIP  
2        500.0   Western_Region                        11           VIP  
3       1500.0   Western_Region                        17           VIP  
4       1500.0  Southern_Region                        17           VIP  
