In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import psycopg2
warnings.filterwarnings('ignore')

from sqlalchemy import create_engine
from dotenv import load_dotenv

## Load SQL Dataset

In [2]:
# Load data from sql
def load_data(query: str) -> pd.DataFrame:
    # Load environment variables from .env file
    env_path = os.path.join("..", ".env")
    load_dotenv(dotenv_path=env_path)

    # Retrieve database connection parameters
    DB_USER = os.getenv("DB_USER")
    DB_PASSWORD = os.getenv("DB_PASSWORD")
    DB_HOST = os.getenv("DB_HOST")
    DB_PORT = os.getenv("DB_PORT")
    DB_NAME = os.getenv("DB_NAME")

    # Use psycopg2 directly
    conn = psycopg2.connect(
        host=DB_HOST,
        port=DB_PORT,
        database=DB_NAME,
        user=DB_USER,
        password=DB_PASSWORD
    )
    
    try:
        df = pd.read_sql_query(query, conn)
        return df
    finally:
        conn.close()

In [22]:
# List of SQL Queries

query_1 = "SELECT * FROM raw.supply_chain_data;"
query_2 = "SELECT * FROM core.products;"
query_3 = "SELECT * FROM core.suppliers;"
query_4 = "SELECT * FROM core.inventory_status;"
query_5 = "SELECT * FROM core.sales_orders;"
query_6 = "SELECT * FROM core.production_metrics;"
query_7 = "SELECT * FROM staging.stg_supply_chain_data;"
query_8 = "SELECT * FROM logistics.shipments;"
query_9 = "SELECT * FROM inspection.quality_inspections;"
query_10 = "SELECT * FROM logistics.container_registry;"
query_11 = "SELECT * FROM logistics.shipments;"
query_12 = "SELECT * FROM inspection.feature_mart;"
query_13 = "SELECT * FROM inspection.cv_detections;"
query_14 = "SELECT * FROM ml.inspection_training;"
query_15 = "SELECT * FROM ml.inspection_training m LEFT JOIN logistics.shipments s ON m.shipment_id = s.shipment_id;"

In [4]:
for sql in [query_1, query_2, query_3, query_4, query_5, query_6, query_7, query_8, query_9, query_10, query_11, query_12, query_13, query_14]:
    df = load_data(sql)
    print(f"Data from query:\n{df.head()}\n")

Data from query:
                               order_id  availability     costs  \
0  6af613b6-569c-5c22-9c37-2ed93f31d3af            55  188.6847   
1  b04965e6-a9bb-591f-8f8a-1adcb2c8dc39            55  187.4925   
2  4b166dbe-d99d-5091-abdd-95b83330ed3a            55  188.9681   
3  98123fde-012f-5ff3-8b50-881449dac91a            55  190.6116   
4  6ed955c6-506a-5343-9be4-2c0afae02eef            55  187.3124   

  customer_demographics  defect_rates inspection_results  lead_time  \
0            Non-binary      0.224447            Pending         29   
1            Non-binary      0.226086            Pending         29   
2            Non-binary      0.231951            Pending         29   
3            Non-binary      0.233616            Pending         29   
4            Non-binary      0.221073            Pending         29   

   lead_times location  manufacturing_costs  ...  product_type  \
0           8   Mumbai              43.4694  ...      haircare   
1           8   Mumba

In [5]:
metadata = pd.read_csv("../data/image_metadata.csv")
metadata.head()

Unnamed: 0,detection_id,image_name,shipment_id,container_id,class_id,confidence,bbox_x,bbox_y,bbox_w,bbox_h,bbox_area,detected_at,model_version
0,,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.989,0.535185,0.273698,0.035185,0.197396,0.006945,2026-01-26 18:37:47.802469,yolo_v8
1,,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.987,0.95463,0.26849,0.02963,0.226562,0.006713,2026-01-26 18:37:47.802495,yolo_v8
2,,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,1,0.876,0.145833,0.521875,0.084259,0.022917,0.001931,2026-01-26 18:37:47.802498,yolo_v8
3,,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.937,0.561111,0.27474,0.053704,0.226562,0.012167,2026-01-26 18:37:47.802771,yolo_v8
4,,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.751,0.7875,0.351042,0.032407,0.159375,0.005165,2026-01-26 18:37:47.802774,yolo_v8


In [None]:
# Primary: SELECT * FROM inspection.cv_detections;
load_data(query_13).head()

Unnamed: 0,detection_id,image_name,shipment_id,container_id,class_id,confidence,bbox_x_center,bbox_y_center,bbox_width,bbox_height,bbox_area,detected_at,model_version
0,c14e129b-62fa-48c7-9d7c-98acedb7f1bf,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.989,0.535185,0.273698,0.035185,0.197396,0.006945,2026-01-26 18:37:47.802469,yolo_v8
1,b73784e2-1036-4d72-9b7c-e3662afbaeb4,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.987,0.95463,0.26849,0.02963,0.226562,0.006713,2026-01-26 18:37:47.802495,yolo_v8
2,6dd811b3-6437-4d41-b73e-b513de0d3bee,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,1,0.876,0.145833,0.521875,0.084259,0.022917,0.001931,2026-01-26 18:37:47.802498,yolo_v8
3,f4345e22-343c-41ab-9277-bea38daf2001,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.937,0.561111,0.27474,0.053704,0.226562,0.012167,2026-01-26 18:37:47.802771,yolo_v8
4,5eb3a128-9d3a-4d11-81da-2f806b55d278,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.751,0.7875,0.351042,0.032407,0.159375,0.005165,2026-01-26 18:37:47.802774,yolo_v8


In [None]:
# manual inspector results: SELECT * FROM inspection.quality_inspections;
load_data(query_9).head()

Unnamed: 0,inspection_id,product_id,inspection_result,defect_rate,lead_time,inspected_at
0,c0a687cd-aad6-438c-acc5-87affaeb4ee9,6af613b6-569c-5c22-9c37-2ed93f31d3af,Pending,0.224447,29,2026-01-29
1,86cd4fa5-983c-4338-8f94-0e30a0e6403b,b04965e6-a9bb-591f-8f8a-1adcb2c8dc39,Pending,0.226086,29,2026-01-29
2,ca8b347c-2b68-48a8-aabf-663cc25b9862,4b166dbe-d99d-5091-abdd-95b83330ed3a,Pending,0.231951,29,2026-01-29
3,78716785-883e-4047-b0f3-e9e56e250c31,98123fde-012f-5ff3-8b50-881449dac91a,Pending,0.233616,29,2026-01-29
4,d5f4ef67-a556-47cc-8672-1349f1805754,6ed955c6-506a-5343-9be4-2c0afae02eef,Pending,0.221073,29,2026-01-29


In [None]:
# Business Impact Layer: SELECT * FROM logistics.shipments;
load_data(query_11).head()

Unnamed: 0,shipment_id,product_id,supplier_id,shipping_carrier,transportation_mode,route,shipping_cost,total_cost,shipping_time,shipped_at
0,00008f68-92bf-4a78-ae7d-fbd7777e6118,571c764d-3494-590e-a92a-3d1fae982307,481d2a47-1926-4300-a761-3eaee832726e,Carrier B,Road,Route B,4.39,849.65,5,2026-01-24
1,000134e7-0c94-48f5-ad2e-a65802791497,75c17c44-28e4-5394-924e-1fe88480eef4,5851b929-d1da-42ae-8e9a-03fb1a00dc5e,Carrier C,Sea,Route B,8.12,760.83,10,2026-01-24
2,0002802a-34be-440d-9830-b929cf450699,8b2cc05f-a1ad-5f3d-a3bf-3302b5cc9adb,78f7bced-cc7f-42ca-97ca-454196333805,Carrier A,Rail,Route B,7.42,996.64,2,2026-01-24
3,0008b8e3-5204-45ea-9ebb-962288fd1e24,6799c474-653d-559d-8638-c8e45c7e6c2e,78f7bced-cc7f-42ca-97ca-454196333805,Carrier B,Rail,Route B,4.35,230.18,10,2026-01-24
4,000b13e6-df05-48df-a32e-32ef2b7668c6,6fcc2a1b-dc88-50a5-bb99-da1c9fe6f75a,f0840541-582b-4cd7-8e4b-675d14885b1d,Carrier A,Road,Route C,4.79,472.6,1,2026-01-24


In [None]:
# ML Layer: SELECT * FROM ml.inspection_training;
load_data(query_14).head()

Unnamed: 0,shipment_id,total_detections,avg_confidence,total_damage_area,dent_count,defect_rate,is_high_risk
0,76ca8527-35b6-4b6e-82a5-ac1516853d14,4,0.8425,0.011481,0,0.226086,0
1,bf046423-d395-4109-8431-e03a6ac08bf5,2,0.887,0.012859,0,0.232399,1
2,160ad73a-faa7-4cd5-a68d-7c351a0c30a1,2,0.856,0.005352,0,0.224839,0
3,f221fddb-9725-456b-a674-d6c3315328b2,2,0.866,0.01061,0,0.221834,0
4,6a5c35a6-8d70-4b53-982e-9ece522a34c2,3,0.908,0.008472,0,0.227898,0


In [20]:
load_data(query_14).head() # SELECT * FROM ml.inspection_training;

Unnamed: 0,shipment_id,total_detections,avg_confidence,total_damage_area,dent_count,defect_rate,is_high_risk
0,76ca8527-35b6-4b6e-82a5-ac1516853d14,4,0.8425,0.011481,0,0.226086,0
1,bf046423-d395-4109-8431-e03a6ac08bf5,2,0.887,0.012859,0,0.232399,1
2,160ad73a-faa7-4cd5-a68d-7c351a0c30a1,2,0.856,0.005352,0,0.224839,0
3,f221fddb-9725-456b-a674-d6c3315328b2,2,0.866,0.01061,0,0.221834,0
4,6a5c35a6-8d70-4b53-982e-9ece522a34c2,3,0.908,0.008472,0,0.227898,0


In [23]:
load_data(query_15).head() # SELECT * FROM ml.inspection_training m LEFT JOIN logistics.shipments s ON m.shipment_id = s.shipment_id;

Unnamed: 0,shipment_id,total_detections,avg_confidence,total_damage_area,dent_count,defect_rate,is_high_risk,shipment_id.1,product_id,supplier_id,shipping_carrier,transportation_mode,route,shipping_cost,total_cost,shipping_time,shipped_at
0,76ca8527-35b6-4b6e-82a5-ac1516853d14,4,0.8425,0.011481,0,0.226086,0,76ca8527-35b6-4b6e-82a5-ac1516853d14,b04965e6-a9bb-591f-8f8a-1adcb2c8dc39,a3257930-e46a-4994-ae1c-28e5a794489e,Carrier B,Road,Route B,3.22,187.49,3,2026-01-24
1,bf046423-d395-4109-8431-e03a6ac08bf5,2,0.887,0.012859,0,0.232399,1,bf046423-d395-4109-8431-e03a6ac08bf5,7fef88f7-411d-5669-b42d-bf5fc7f9b58b,bccc2020-682a-4478-9518-a1e220548c72,Carrier B,Road,Route B,2.71,186.88,2,2026-01-24
2,160ad73a-faa7-4cd5-a68d-7c351a0c30a1,2,0.856,0.005352,0,0.224839,0,160ad73a-faa7-4cd5-a68d-7c351a0c30a1,23986425-d3a5-5e13-8bab-299745777a8d,27415fea-7f99-4659-89b0-5ff57dc2ef2b,Carrier B,Road,Route B,2.69,186.7,2,2026-01-24
3,f221fddb-9725-456b-a674-d6c3315328b2,2,0.866,0.01061,0,0.221834,0,f221fddb-9725-456b-a674-d6c3315328b2,ce1ae2d5-3454-5952-97ff-36ff935bcfe9,bccc2020-682a-4478-9518-a1e220548c72,Carrier B,Road,Route B,2.77,188.46,6,2026-01-24
4,6a5c35a6-8d70-4b53-982e-9ece522a34c2,3,0.908,0.008472,0,0.227898,0,6a5c35a6-8d70-4b53-982e-9ece522a34c2,8f8173d9-2f8d-5636-a693-24d9f79ba651,bdf8677d-73aa-408a-b2cd-78caba9743e4,Carrier B,Road,Route B,3.31,191.23,2,2026-01-24
