In [2]:
# -----------------------------
# Train Random Forest (All Features)
# -----------------------------

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle
import os

# 1️⃣ Load dataset
df = pd.read_csv("../data/smart_logistics_dataset.csv")
print("✅ Dataset loaded! Shape:", df.shape)

# 2️⃣ Prepare features and target
X = df.drop(columns=["Logistics_Delay"])
y = df["Logistics_Delay"]

# 3️⃣ Encode categorical features (one-hot)
X_encoded = pd.get_dummies(X, drop_first=True)
print("Encoded features:", X_encoded.shape[1])

# 4️⃣ Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42
)
print("Data split complete. Training samples:", X_train.shape[0])

# 5️⃣ Train Random Forest
rf_model = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)
rf_model.fit(X_train, y_train)

# 6️⃣ Evaluate model
y_pred = rf_model.predict(X_test)
print("\n✅ Model Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# 7️⃣ Save model + training columns
os.makedirs("../models", exist_ok=True)

with open("../models/rf_model.pkl", "wb") as f:
    pickle.dump(rf_model, f)

with open("../models/feature_columns.pkl", "wb") as f:
    pickle.dump(X_encoded.columns.tolist(), f)

print("\n✅ Model and feature columns saved successfully!")


✅ Dataset loaded! Shape: (1000, 16)
Encoded features: 1024
Data split complete. Training samples: 800

✅ Model Accuracy: 100.0 %

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        91
           1       1.00      1.00      1.00       109

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200


✅ Model and feature columns saved successfully!


In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

# Train model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate
y_pred = rf_model.predict(X_test)
print("Accuracy:", round(accuracy_score(y_test, y_pred)*100, 2), "%")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save model
with open('../models/rf_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)


Accuracy: 100.0 %
Confusion Matrix:
 [[ 91   0]
 [  0 109]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        91
           1       1.00      1.00      1.00       109

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200

