In [47]:
import pandas as pd
import joblib
import os
import warnings
warnings.filterwarnings("ignore")

# Load component dataset

In [48]:
df_features = pd.read_parquet("df_features.parquet")
df_images = pd.read_parquet("df_images.parquet")

# Load all models
models = {}
for filename in os.listdir("models"):
    if filename.endswith(".pkl"):
        model_name = filename.replace(".pkl", "")
        models[model_name] = joblib.load(f"models/{filename}")
        print(f"loaded {model_name}")

loaded XGBoost
loaded LogisticRegression
loaded DecisionTree


In [49]:
models.keys()

dict_keys(['XGBoost', 'LogisticRegression', 'DecisionTree'])

In [50]:
# Define features and target

FEATURES = [
    "total_detections",
    "avg_confidence",
    "total_damage_area",
    "dent_count",
    "defect_rate"
]

TARGET = "is_high_risk"

# Featuring images data

### Define images dataset for decision dataset where is

In [51]:
image_name = "162_20220617T122356165Z_s00.mp4___750.jpg"

shipment_id = df_images.loc[df_images["image_name"] == image_name, "shipment_id"].iloc[0]
row = df_features[df_features["shipment_id"] == shipment_id].iloc[0]

In [52]:
row

shipment_id          0e469d1d-71e9-4df9-b15d-f468ea942534
total_detections                                        1
avg_confidence                                      0.969
total_damage_area                                0.007049
dent_count                                              1
defect_rate                                      0.007049
is_high_risk                                            0
risk_score                                       0.006831
reliability_flag                                        1
Name: 45, dtype: object

# Machine learning Multi-Model Risk Prediction

In [53]:
X = row[FEATURES].values.reshape(1, -1)

model_results = {}

for model_name, model in models.items():
    prob = model.predict_proba(X)[0, 1]

    model_results[model_name] = {
        "risk_probability": round(float(prob), 3),
        "decision": "HIGH_RISK" if prob >= 0.5 else "LOW_RISK"
    }

model_results

{'XGBoost': {'risk_probability': 0.098, 'decision': 'LOW_RISK'},
 'LogisticRegression': {'risk_probability': 0.123, 'decision': 'LOW_RISK'},
 'DecisionTree': {'risk_probability': 0.0, 'decision': 'LOW_RISK'}}

# YOLO Evidence (Real or Mock)

In [54]:
# Use mock for temporary example dataset
yolo_evidence = {
    "damage_detected": True,
    "confidence": 0.90,
    "bbox_count": int(row.total_detections),
    "classes": ["dent"] if row.dent_count > 0 else []
}

#  RAG Reasoning (Controlled & Safe)

In [55]:
import numpy as np

avg_risk = np.mean([
    v["risk_probability"] for v in model_results.values()
])

if avg_risk >= 0.7 and yolo_evidence["confidence"] >= 0.7:
    rag_reasoning = (
        "Based on multiple model predictions indicating high risk "
        "and high-confidence visual damage detection, the container "
        "is likely compromised and should not proceed with shipping "
        "until further inspection or repair is completed."
    )
elif avg_risk >= 0.5 and yolo_evidence["confidence"] < 0.5:
    rag_reasoning = (
        "Model predictions and visual inspection indicate low risk. "
        "No significant damage is detected, and the container may "
        "proceed with shipping under standard monitoring procedures."
    )
else:
    rag_reasoning = (
        "The assessment shows mixed indicators. While some damage "
        "signals are present, further manual inspection is recommended "
        "before making a shipping decision."
    )

# Final JSON Output (Multi-Model)

#### * **Input data images for prediction result**
#### * **Models comparison for best choice prediction**
#### * **RAG reasoning for shiping reasonibility**

In [56]:
final_result = {
    "image": image_name,
    "shipment_id": shipment_id,

    "models": model_results,

    "model_consensus": {
        "average_risk_probability": round(float(avg_risk), 3),
        "final_decision": (
            "HIGH_RISK" if avg_risk >= 0.5 else "LOW_RISK"
        )
    },
    "detection yolo": yolo_evidence,

    "rag_assesment": {
        "summary": rag_reasoning,
        "recommendation": (
            "DO NOT SHIP" if avg_risk >= 0.5 else "SAFE TO SHIP"
        )
    }
}

final_result

{'image': '162_20220617T122356165Z_s00.mp4___750.jpg',
 'shipment_id': '0e469d1d-71e9-4df9-b15d-f468ea942534',
 'models': {'XGBoost': {'risk_probability': 0.098, 'decision': 'LOW_RISK'},
  'LogisticRegression': {'risk_probability': 0.123, 'decision': 'LOW_RISK'},
  'DecisionTree': {'risk_probability': 0.0, 'decision': 'LOW_RISK'}},
 'model_consensus': {'average_risk_probability': 0.074,
  'final_decision': 'LOW_RISK'},
 'detection yolo': {'damage_detected': True,
  'confidence': 0.9,
  'bbox_count': 1,
  'classes': ['dent']},
 'rag_assesment': {'summary': 'The assessment shows mixed indicators. While some damage signals are present, further manual inspection is recommended before making a shipping decision.',
  'recommendation': 'SAFE TO SHIP'}}