In [61]:
import pandas as pd
import os
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
from sqlalchemy import create_engine, text
from dotenv import load_dotenv

In [77]:
# Load environment variables from .env file
load_dotenv(os.path.join(os.getcwd(), '..', '.env'))

# Database connection parameters from environment variables
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')

# Create a database connection
engine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

# Query to fetch data from the database
query_1 = "SELECT * FROM inspection.cv_detections;"
query_2 = "SELECT * FROM ml.inspection_training;"

In [78]:
# Load SQL data into a pandas dataframe using a direct connection
with engine.connect() as connection:
    df_cv_detections = pd.read_sql(text(query_1), connection)

print(f"ðŸ“Š Data loaded: {df_cv_detections.shape[0]} rows Ã— {df_cv_detections.shape[1]} columns")
print(f"\nFirst few rows:")
df_cv_detections.head()

ðŸ“Š Data loaded: 2134 rows Ã— 13 columns

First few rows:


Unnamed: 0,detection_id,image_name,shipment_id,container_id,class_id,confidence,bbox_x_center,bbox_y_center,bbox_width,bbox_height,bbox_area,detected_at,model_version
0,c14e129b-62fa-48c7-9d7c-98acedb7f1bf,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.989,0.535185,0.273698,0.035185,0.197396,0.006945,2026-01-26 18:37:47.802469,yolo_v8
1,b73784e2-1036-4d72-9b7c-e3662afbaeb4,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.987,0.95463,0.26849,0.02963,0.226562,0.006713,2026-01-26 18:37:47.802495,yolo_v8
2,6dd811b3-6437-4d41-b73e-b513de0d3bee,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,1,0.876,0.145833,0.521875,0.084259,0.022917,0.001931,2026-01-26 18:37:47.802498,yolo_v8
3,f4345e22-343c-41ab-9277-bea38daf2001,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.937,0.561111,0.27474,0.053704,0.226562,0.012167,2026-01-26 18:37:47.802771,yolo_v8
4,5eb3a128-9d3a-4d11-81da-2f806b55d278,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.751,0.7875,0.351042,0.032407,0.159375,0.005165,2026-01-26 18:37:47.802774,yolo_v8


In [81]:
with engine.connect() as connection:
    df_ml_inspection = pd.read_sql(text(query_2), connection)

df_ml_inspection

Unnamed: 0,shipment_id,total_detections,avg_confidence,total_damage_area,dent_count,defect_rate,is_high_risk
0,76ca8527-35b6-4b6e-82a5-ac1516853d14,4,0.842500,0.011481,0,0.226086,0
1,bf046423-d395-4109-8431-e03a6ac08bf5,2,0.887000,0.012859,0,0.232399,1
2,160ad73a-faa7-4cd5-a68d-7c351a0c30a1,2,0.856000,0.005352,0,0.224839,0
3,f221fddb-9725-456b-a674-d6c3315328b2,2,0.866000,0.010610,0,0.221834,0
4,6a5c35a6-8d70-4b53-982e-9ece522a34c2,3,0.908000,0.008472,0,0.227898,0
...,...,...,...,...,...,...,...
921,e57e955d-2c03-4772-9ac4-9fe7979fd1a4,3,0.810667,0.018552,0,0.360559,1
922,5044cfb8-b78b-4ddf-a682-57c6b4d5dc77,2,0.836000,0.019396,0,0.360559,1
923,b7197f2d-1763-4eb0-89cb-5769d7d9b12d,2,0.943000,0.011474,0,0.360559,1
924,a384a479-8aab-4d5a-b0ff-e4df80a4558a,3,0.892667,0.022298,0,0.341844,1


In [82]:
df_cv_detections

Unnamed: 0,detection_id,image_name,shipment_id,container_id,class_id,confidence,bbox_x_center,bbox_y_center,bbox_width,bbox_height,bbox_area,detected_at,model_version
0,c14e129b-62fa-48c7-9d7c-98acedb7f1bf,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.989,0.535185,0.273698,0.035185,0.197396,0.006945,2026-01-26 18:37:47.802469,yolo_v8
1,b73784e2-1036-4d72-9b7c-e3662afbaeb4,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.987,0.954630,0.268490,0.029630,0.226562,0.006713,2026-01-26 18:37:47.802495,yolo_v8
2,6dd811b3-6437-4d41-b73e-b513de0d3bee,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,1,0.876,0.145833,0.521875,0.084259,0.022917,0.001931,2026-01-26 18:37:47.802498,yolo_v8
3,f4345e22-343c-41ab-9277-bea38daf2001,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.937,0.561111,0.274740,0.053704,0.226562,0.012167,2026-01-26 18:37:47.802771,yolo_v8
4,5eb3a128-9d3a-4d11-81da-2f806b55d278,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.751,0.787500,0.351042,0.032407,0.159375,0.005165,2026-01-26 18:37:47.802774,yolo_v8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2129,ed1a8b41-b7e4-4059-83fc-29a00af7f640,168_20220613T113307214Z_s00.mp4_150.jpg,c943c053-4c47-4d2f-8ac4-4cb22a220754,CNT-b1fa8997-a4ee-4e53-9987-a99764454b67,0,0.942,0.539352,0.279167,0.076852,0.263542,0.020254,2026-01-26 18:37:48.008433,yolo_v8
2130,75995af4-b448-4da4-8b50-064a8a3e477f,168_20220613T113307214Z_s00.mp4_150.jpg,c943c053-4c47-4d2f-8ac4-4cb22a220754,CNT-b1fa8997-a4ee-4e53-9987-a99764454b67,1,0.967,0.574537,0.639323,0.136111,0.027604,0.003757,2026-01-26 18:37:48.008435,yolo_v8
2131,9da4df46-4581-4aef-91e0-ca549dc7b052,168_20220613T113307214Z_s00.mp4_150.jpg,c943c053-4c47-4d2f-8ac4-4cb22a220754,CNT-b1fa8997-a4ee-4e53-9987-a99764454b67,1,0.895,0.957407,0.613281,0.085185,0.024479,0.002085,2026-01-26 18:37:48.008437,yolo_v8
2132,ed118de8-f832-4b26-bf49-99087503aa5c,116_20220509T012527135Z_s00.mp4_30100.jpg,a6b46823-99fc-4594-b36e-6e60fdd30959,CNT-4a734f95-d038-4567-8b90-d137c940b3a5,0,0.814,0.831019,0.196615,0.065741,0.242188,0.015922,2026-01-26 18:37:48.008570,yolo_v8


# Feature engineering

In [83]:
obj_cols = df_cv_detections.select_dtypes(include=['object']).columns
num_cols = df_cv_detections.select_dtypes(include=['int', 'float64']).columns

print(f"data of object columns: {obj_cols}")
print(f"data of numerical columns: {num_cols}")

data of object columns: Index(['detection_id', 'image_name', 'shipment_id', 'container_id', 'class_id',
       'model_version'],
      dtype='object')
data of numerical columns: Index(['confidence', 'bbox_x_center', 'bbox_y_center', 'bbox_width',
       'bbox_height', 'bbox_area'],
      dtype='object')


# Container damage detection to prevent bad shipment
--------
# SQL comprehensive analytics data for proper data:
* **Insert container images for damage box detection**
* **Analytics statisctical to ensure the shipments are reliable**

In [84]:
df_cv_detections

Unnamed: 0,detection_id,image_name,shipment_id,container_id,class_id,confidence,bbox_x_center,bbox_y_center,bbox_width,bbox_height,bbox_area,detected_at,model_version
0,c14e129b-62fa-48c7-9d7c-98acedb7f1bf,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.989,0.535185,0.273698,0.035185,0.197396,0.006945,2026-01-26 18:37:47.802469,yolo_v8
1,b73784e2-1036-4d72-9b7c-e3662afbaeb4,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.987,0.954630,0.268490,0.029630,0.226562,0.006713,2026-01-26 18:37:47.802495,yolo_v8
2,6dd811b3-6437-4d41-b73e-b513de0d3bee,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,1,0.876,0.145833,0.521875,0.084259,0.022917,0.001931,2026-01-26 18:37:47.802498,yolo_v8
3,f4345e22-343c-41ab-9277-bea38daf2001,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.937,0.561111,0.274740,0.053704,0.226562,0.012167,2026-01-26 18:37:47.802771,yolo_v8
4,5eb3a128-9d3a-4d11-81da-2f806b55d278,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.751,0.787500,0.351042,0.032407,0.159375,0.005165,2026-01-26 18:37:47.802774,yolo_v8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2129,ed1a8b41-b7e4-4059-83fc-29a00af7f640,168_20220613T113307214Z_s00.mp4_150.jpg,c943c053-4c47-4d2f-8ac4-4cb22a220754,CNT-b1fa8997-a4ee-4e53-9987-a99764454b67,0,0.942,0.539352,0.279167,0.076852,0.263542,0.020254,2026-01-26 18:37:48.008433,yolo_v8
2130,75995af4-b448-4da4-8b50-064a8a3e477f,168_20220613T113307214Z_s00.mp4_150.jpg,c943c053-4c47-4d2f-8ac4-4cb22a220754,CNT-b1fa8997-a4ee-4e53-9987-a99764454b67,1,0.967,0.574537,0.639323,0.136111,0.027604,0.003757,2026-01-26 18:37:48.008435,yolo_v8
2131,9da4df46-4581-4aef-91e0-ca549dc7b052,168_20220613T113307214Z_s00.mp4_150.jpg,c943c053-4c47-4d2f-8ac4-4cb22a220754,CNT-b1fa8997-a4ee-4e53-9987-a99764454b67,1,0.895,0.957407,0.613281,0.085185,0.024479,0.002085,2026-01-26 18:37:48.008437,yolo_v8
2132,ed118de8-f832-4b26-bf49-99087503aa5c,116_20220509T012527135Z_s00.mp4_30100.jpg,a6b46823-99fc-4594-b36e-6e60fdd30959,CNT-4a734f95-d038-4567-8b90-d137c940b3a5,0,0.814,0.831019,0.196615,0.065741,0.242188,0.015922,2026-01-26 18:37:48.008570,yolo_v8


In [85]:
df_cv_detections[df_cv_detections["class_id"] == "0"]

Unnamed: 0,detection_id,image_name,shipment_id,container_id,class_id,confidence,bbox_x_center,bbox_y_center,bbox_width,bbox_height,bbox_area,detected_at,model_version
0,c14e129b-62fa-48c7-9d7c-98acedb7f1bf,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.989,0.535185,0.273698,0.035185,0.197396,0.006945,2026-01-26 18:37:47.802469,yolo_v8
1,b73784e2-1036-4d72-9b7c-e3662afbaeb4,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,0,0.987,0.954630,0.268490,0.029630,0.226562,0.006713,2026-01-26 18:37:47.802495,yolo_v8
3,f4345e22-343c-41ab-9277-bea38daf2001,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.937,0.561111,0.274740,0.053704,0.226562,0.012167,2026-01-26 18:37:47.802771,yolo_v8
4,5eb3a128-9d3a-4d11-81da-2f806b55d278,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,0,0.751,0.787500,0.351042,0.032407,0.159375,0.005165,2026-01-26 18:37:47.802774,yolo_v8
6,eb717f97-b413-4684-8abc-3efd81483c56,109_20220211T041216722Z_s00.mp4_75800.jpg,45d62050-340e-422c-89a2-ab77ecd1f945,CNT-5ee962d3-0e4f-4afc-8a1f-1f273ab17970,0,0.839,0.735648,0.313021,0.049074,0.162500,0.007975,2026-01-26 18:37:47.802973,yolo_v8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2124,ee89fed7-9852-4ada-a97e-6e64ff9225b9,14_20220315T113440505Z_s00.mp4_98600.jpg,e57e955d-2c03-4772-9ac4-9fe7979fd1a4,CNT-ba130532-600a-4e7a-8b13-a3b5d13ec9dc,0,0.757,0.850926,0.326562,0.040741,0.195833,0.007978,2026-01-26 18:37:48.008213,yolo_v8
2125,f99f1541-845f-4d75-aabf-715081c65f2a,14_20220315T113440505Z_s00.mp4_98600.jpg,e57e955d-2c03-4772-9ac4-9fe7979fd1a4,CNT-ba130532-600a-4e7a-8b13-a3b5d13ec9dc,0,0.858,0.103704,0.365885,0.053704,0.160938,0.008643,2026-01-26 18:37:48.008223,yolo_v8
2127,57f202a4-b518-4b61-ab63-0ddf28f9a61b,10_20220427T113818842Z_s00.mp4_67500.jpg,aba56c46-b2ae-434d-9190-94857cefdb8b,CNT-3dc1025b-3591-4fb7-94b6-2e9e651ac999,0,0.936,0.920833,0.404687,0.021296,0.116667,0.002485,2026-01-26 18:37:48.008303,yolo_v8
2129,ed1a8b41-b7e4-4059-83fc-29a00af7f640,168_20220613T113307214Z_s00.mp4_150.jpg,c943c053-4c47-4d2f-8ac4-4cb22a220754,CNT-b1fa8997-a4ee-4e53-9987-a99764454b67,0,0.942,0.539352,0.279167,0.076852,0.263542,0.020254,2026-01-26 18:37:48.008433,yolo_v8


In [86]:
df_cv_detections[df_cv_detections["class_id"] == "1"]

Unnamed: 0,detection_id,image_name,shipment_id,container_id,class_id,confidence,bbox_x_center,bbox_y_center,bbox_width,bbox_height,bbox_area,detected_at,model_version
2,6dd811b3-6437-4d41-b73e-b513de0d3bee,14_20220428T012931844Z_s00.mp4_108400.jpg,2ad1dfa0-0791-4e07-b12c-9f5b268b6f66,CNT-59083db3-904b-4876-992a-3aee14f226d7,1,0.876,0.145833,0.521875,0.084259,0.022917,0.001931,2026-01-26 18:37:47.802498,yolo_v8
5,4a2c60c6-32bd-4d8f-81d6-7d7984e6c86a,114_20220425T114900821Z_s00.mp4_35100.jpg,df75a457-a3f2-42b3-98fb-650fea877b70,CNT-8cebf7ae-ecfd-4484-abce-2a60220a5352,1,0.989,0.200000,0.577865,0.138889,0.024479,0.003400,2026-01-26 18:37:47.802777,yolo_v8
7,43f4883b-8c88-42f7-9de0-ae8ac2437950,109_20220211T041216722Z_s00.mp4_75800.jpg,45d62050-340e-422c-89a2-ab77ecd1f945,CNT-5ee962d3-0e4f-4afc-8a1f-1f273ab17970,1,0.782,0.668981,0.521094,0.089815,0.018229,0.001637,2026-01-26 18:37:47.802977,yolo_v8
9,c3fe563c-ae05-4aff-a27f-4f5274248a84,116_20220322T011330692Z_s00.mp4_23500.jpg,648f4444-df86-42ca-994a-a162adefab53,CNT-70b1dae1-1ddc-45c6-b8aa-f2b752da922b,1,0.924,0.252315,0.667708,0.176852,0.026042,0.004606,2026-01-26 18:37:47.803165,yolo_v8
11,0ab73fbd-584f-4251-87f6-15c3788ea959,10_20220509T035631797Z_s00.mp4___650.jpg,e98b5b3a-67f9-4e0d-8eb7-c68558c7a047,CNT-ceda2210-82f0-4f1e-8320-0d83ba6ba049,1,0.753,0.260648,0.817448,0.056481,0.009896,0.000559,2026-01-26 18:37:47.803351,yolo_v8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2126,be6c4e23-ee2b-4f97-abfa-65b8cc8accce,14_20220315T113440505Z_s00.mp4_98600.jpg,e57e955d-2c03-4772-9ac4-9fe7979fd1a4,CNT-ba130532-600a-4e7a-8b13-a3b5d13ec9dc,1,0.817,0.532870,0.575521,0.084259,0.022917,0.001931,2026-01-26 18:37:48.008246,yolo_v8
2128,c6758ef8-a828-45e7-b553-c0c3ef8a1b63,10_20220427T113818842Z_s00.mp4_67500.jpg,aba56c46-b2ae-434d-9190-94857cefdb8b,CNT-3dc1025b-3591-4fb7-94b6-2e9e651ac999,1,0.849,0.700000,0.538802,0.059259,0.014063,0.000833,2026-01-26 18:37:48.008305,yolo_v8
2130,75995af4-b448-4da4-8b50-064a8a3e477f,168_20220613T113307214Z_s00.mp4_150.jpg,c943c053-4c47-4d2f-8ac4-4cb22a220754,CNT-b1fa8997-a4ee-4e53-9987-a99764454b67,1,0.967,0.574537,0.639323,0.136111,0.027604,0.003757,2026-01-26 18:37:48.008435,yolo_v8
2131,9da4df46-4581-4aef-91e0-ca549dc7b052,168_20220613T113307214Z_s00.mp4_150.jpg,c943c053-4c47-4d2f-8ac4-4cb22a220754,CNT-b1fa8997-a4ee-4e53-9987-a99764454b67,1,0.895,0.957407,0.613281,0.085185,0.024479,0.002085,2026-01-26 18:37:48.008437,yolo_v8
