In [3]:
import pandas as pd
from pathlib import Path


# file paths (change names/paths if different)
fp_delivery = "delivery_performance.csv"
fp_orders =  "orders.csv"
fp_routes =  "routes_distance.csv"


# read CSVs
df_delivery = pd.read_csv(fp_delivery)
df_orders = pd.read_csv(fp_orders)
df_routes = pd.read_csv(fp_routes)

print("Shapes:", df_delivery.shape, df_orders.shape, df_routes.shape)

# merge step-by-step (inner join to keep only orders present across all files)
mask = df_delivery.merge(df_orders, on="Order_ID", how="inner")
df_merged = mask.merge(df_routes, on="Order_ID", how="inner")

print("Merged shape:", df_merged.shape)
df_merged.head(3)


Shapes: (150, 8) (200, 9) (150, 7)
Merged shape: (150, 22)


Unnamed: 0,Order_ID,Carrier,Promised_Delivery_Days,Actual_Delivery_Days,Delivery_Status,Quality_Issue,Customer_Rating,Delivery_Cost_INR,Order_Date,Customer_Segment,...,Order_Value_INR,Origin,Destination,Special_Handling,Route,Distance_KM,Fuel_Consumption_L,Toll_Charges_INR,Traffic_Delay_Minutes,Weather_Impact
0,ORD000001,SpeedyLogistics,1,2,Slightly-Delayed,Perfect,3,387.86,2025-10-09,Individual,...,238.73,Kolkata,Hyderabad,,Kolkata-Hyderabad,152.59,23.02,122.08,21,
1,ORD000002,SpeedyLogistics,2,3,Slightly-Delayed,Minor_Damage,1,430.19,2025-09-29,SMB,...,17.01,Hyderabad,Kolkata,,Hyderabad-Kolkata,362.05,43.98,289.64,33,
2,ORD000003,SpeedyLogistics,10,15,Severely-Delayed,Minor_Damage,3,1039.19,2025-09-15,SMB,...,3024.95,Mumbai,Pune,,Mumbai-Pune,519.74,65.75,415.79,2,


In [15]:
df_merged.to_csv('final_OFI_df.csv', index=False)

In [4]:
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 22 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Order_ID                150 non-null    object 
 1   Carrier                 150 non-null    object 
 2   Promised_Delivery_Days  150 non-null    int64  
 3   Actual_Delivery_Days    150 non-null    int64  
 4   Delivery_Status         150 non-null    object 
 5   Quality_Issue           150 non-null    object 
 6   Customer_Rating         150 non-null    int64  
 7   Delivery_Cost_INR       150 non-null    float64
 8   Order_Date              150 non-null    object 
 9   Customer_Segment        150 non-null    object 
 10  Priority                150 non-null    object 
 11  Product_Category        150 non-null    object 
 12  Order_Value_INR         150 non-null    float64
 13  Origin                  150 non-null    object 
 14  Destination             150 non-null    ob

In [9]:
df = df_merged.copy()
df['delay_days'] = df['Actual_Delivery_Days'] - df['Promised_Delivery_Days']
df['delay_days'].isnull().sum()

np.int64(0)

In [10]:
df["is_delayed"] = df["delay_days"].apply(lambda x: 1 if pd.notna(x) and x > 0 else 0)

In [11]:
from sklearn.model_selection import train_test_split
import joblib
RANDOM_SEED = 42

# keep only rows with label available for supervised training
df_train = df[df["Actual_Delivery_Days"].notna()].copy()
print("Rows with known actual delivery:", len(df_train))

# For simplicity pick a subset of features for now
features = ["Carrier", "Promised_Delivery_Days", "Distance_KM", "Delivery_Cost_INR", "Traffic_Delay_Minutes", "Priority"]
target = "is_delayed"

# simple split
X = df_train[features]
y = df_train[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=RANDOM_SEED)
print("Train/Test sizes:", X_train.shape, X_test.shape)


Rows with known actual delivery: 150
Train/Test sizes: (120, 6) (30, 6)


In [33]:
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

drop_cols = ["Route", "Origin", "Destination", "Customer_Segment", "Order_Date", "Special_Handling"]
# if df not defined in this cell, ensure you run the merge step before this cell
try:
    df = df.drop(columns=drop_cols, errors="ignore")
    print("Dropped columns (if present):", drop_cols)
except NameError:
    print("Warning: df not found in scope. Make sure you run the data-merge cell before this one.")

numeric_feats = [
    "Promised_Delivery_Days",
    "Order_Value_INR",
    "Delivery_Cost_INR",
    "Distance_KM",
    "Fuel_Consumption_L",
    "Toll_Charges_INR"
    # Excluded Traffic_Delay_Minutes (to avoid leakage)
]

categorical_ohe_feats = [
    "Carrier",
    "Product_Category"
    # Removed Quality_Issue and Delivery_Status
]

# Ordinal categorical features: Priority and Weather_Impact
categorical_ordinal_feats = ["Priority", "Weather_Impact"]

# --------- Transformers ----------------------------------------------------
# Numeric transformer: median impute + standard scale
numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

# One-hot transformer for nominal categorical variables
cat_ohe_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="constant", fill_value="Unknown")),
    ("ohe", OneHotEncoder(handle_unknown="ignore"))
])

# Ordinal transformers
# Priority ordering: Economy < Standard < Express
priority_categories = [["Economy", "Standard", "Express", "Unknown"]]

# Weather ordering: Unknown (blank) -> fog -> light_Rain -> Heavy_Rain
# (assumes fog has lower impact than rain; adjust if you prefer a different order)
weather_categories = [["Unknown", "Fog", "Light_Rain", "Heavy_Rain"]]

priority_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="constant", fill_value="Unknown")),
    ("ordinal", OrdinalEncoder(categories=priority_categories))
])

weather_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="constant", fill_value="Unknown")),
    ("ordinal", OrdinalEncoder(categories=weather_categories))
])

# Combine ordinal transformers into a small wrapper via ColumnTransformer for the two features
# (we'll reference them separately in the main ColumnTransformer)
ordinal_transformers = [
    ("priority_ord", priority_transformer, ["Priority"]),
    ("weather_ord", weather_transformer, ["Weather_Impact"])
]

# --------- Final ColumnTransformer ----------------------------------------
preprocessor = ColumnTransformer(transformers=[
    ("num", numeric_transformer, numeric_feats),
    ("cat_ohe", cat_ohe_transformer, categorical_ohe_feats),
    # add ordinal transformers individually
    ("priority_ord", priority_transformer, ["Priority"]),
    ("weather_ord", weather_transformer, ["Weather_Impact"])
], remainder="drop")

print("Preprocessor created.")
print("Numeric features:", numeric_feats)
print("One-hot features:", categorical_ohe_feats)
print("Ordinal features: Priority ->", priority_categories[0], "; Weather ->", weather_categories[0])

Dropped columns (if present): ['Route', 'Origin', 'Destination', 'Customer_Segment', 'Order_Date', 'Special_Handling']
Preprocessor created.
Numeric features: ['Promised_Delivery_Days', 'Order_Value_INR', 'Delivery_Cost_INR', 'Distance_KM', 'Fuel_Consumption_L', 'Toll_Charges_INR']
One-hot features: ['Carrier', 'Product_Category']
Ordinal features: Priority -> ['Economy', 'Standard', 'Express', 'Unknown'] ; Weather -> ['Unknown', 'Fog', 'Light_Rain', 'Heavy_Rain']


In [34]:
df["Weather_Impact"].unique()

array([nan, 'Light_Rain', 'Fog', 'Heavy_Rain'], dtype=object)

In [35]:
from sklearn.model_selection import train_test_split

# Drop columns user asked to remove if present
drop_cols = ["Route", "Origin", "Destination", "Customer_Segment", "Order_Date", "Special_Handling"]
df = df.drop(columns=[c for c in drop_cols if c in df.columns], errors="ignore")

# Exclude ID and target columns from features
exclude_cols = {"Order_ID", "delay_days", "is_delayed", "Actual_Delivery_Days"}
all_cols = df.columns.tolist()
feature_candidates = [c for c in all_cols if c not in exclude_cols]

# For safety, explicitly ensure our preprocessing columns exist (numeric_feats + categorical lists from previous preprocessor)
# numeric_feats, categorical_ohe_feats etc. should exist from previous cell; if not, define a conservative subset:
try:
    feature_list = numeric_feats + categorical_ohe_feats + ["Priority", "Weather_Impact"]
except NameError:
    # fallback: auto-detect reasonable features
    numeric_features_auto = df.select_dtypes(include=[np.number]).columns.tolist()
    feature_list = [c for c in numeric_features_auto if c not in exclude_cols]
    # add a few categorical candidates if present
    for c in ["Carrier", "Priority", "Product_Category", "Delivery_Status", "Quality_Issue", "Weather_Impact"]:
        if c in df.columns and c not in feature_list:
            feature_list.append(c)

# Final feature list: keep only present columns
feature_list = [c for c in feature_list if c in df.columns]
print("Using features:", feature_list)

# Create trainable subset (rows with known Actual_Delivery_Days)
df_trainable = df[df["Actual_Delivery_Days"].notna()].copy()
X = df_trainable[feature_list]
y = df_trainable["is_delayed"]

RANDOM_SEED = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=RANDOM_SEED)
print("Train/test sizes:", X_train.shape, X_test.shape)


Using features: ['Promised_Delivery_Days', 'Order_Value_INR', 'Delivery_Cost_INR', 'Distance_KM', 'Fuel_Consumption_L', 'Toll_Charges_INR', 'Carrier', 'Product_Category', 'Priority', 'Weather_Impact']
Train/test sizes: (120, 10) (30, 10)


In [36]:
# Cell D: fit preprocessor (if not fitted) and train RandomForest in a Pipeline
import joblib
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, accuracy_score


# Build pipeline with RandomForest
rf_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(n_estimators=200, random_state=RANDOM_SEED, n_jobs=-1))
])

print("Training RandomForest pipeline...")
rf_pipeline.fit(X_train, y_train)

# Evaluate on test set
y_pred = rf_pipeline.predict(X_test)
y_proba = rf_pipeline.predict_proba(X_test)[:, 1]
print("RandomForest Classification report:")
print(classification_report(y_test, y_pred, digits=4))
print(accuracy_score(y_test, y_pred))
try:
    roc = roc_auc_score(y_test, y_proba)
    print("ROC-AUC:", roc)
except Exception as e:
    print("ROC-AUC not available:", e)

# # Save the pipeline
# rf_model_fp = processed_dir / "rf_pipeline.joblib"
# joblib.dump(rf_pipeline, rf_model_fp)
# print("Saved RandomForest pipeline to:", rf_model_fp)


Fitting preprocessor on training data...
Training RandomForest pipeline...
RandomForest Classification report:
              precision    recall  f1-score   support

           0     0.6667    0.6250    0.6452        16
           1     0.6000    0.6429    0.6207        14

    accuracy                         0.6333        30
   macro avg     0.6333    0.6339    0.6329        30
weighted avg     0.6356    0.6333    0.6337        30

0.6333333333333333
ROC-AUC: 0.6026785714285714


In [37]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [38]:
# Cell E: Train CatBoost on transformed arrays (preprocessor -> raw arrays)
from catboost import CatBoostClassifier

# transform to numpy arrays (preprocessor outputs dense arrays due to sparse=False)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)

# Train CatBoost on transformed arrays
cb = CatBoostClassifier(iterations=500, learning_rate=0.05, depth=6, random_seed=RANDOM_SEED, verbose=0)
print("Training CatBoost on preprocessed arrays (this may take a while)...")
cb.fit(X_train_trans, y_train)

# Evaluate
y_pred_cb = cb.predict(X_test_trans)
y_proba_cb = cb.predict_proba(X_test_trans)[:, 1]
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
prec, rec, f1, _ = precision_recall_fscore_support(y_test, y_pred_cb, average="binary", zero_division=0)
acc = accuracy_score(y_test, y_pred_cb)
roc_cb = roc_auc_score(y_test, y_proba_cb)
print(f"CatBoost — acc: {acc:.4f}, prec: {prec:.4f}, rec: {rec:.4f}, f1: {f1:.4f}, roc_auc: {roc_cb:.4f}")

# Save CatBoost model (joblib for fallback)
# cb_model_fp = processed_dir / "catboost_model.cbm"
# cb.save_model(str(cb_model_fp))
# print("Saved CatBoost model to:", cb_model_fp)


Training CatBoost on preprocessed arrays (this may take a while)...
CatBoost — acc: 0.7000, prec: 0.6667, rec: 0.7143, f1: 0.6897, roc_auc: 0.6875


In [47]:
!pip install -q dagshub 'mlflow>=2,<3'

In [50]:
import dagshub
import mlflow

# Initialize MLflow tracking with DagsHub
dagshub.init(
    repo_owner="AbdurRahman22224",          # your DagsHub username
    repo_name="Predictive_Delivery_Optimizer",  # your repo name on DagsHub
    mlflow=True
)

mlflow.set_tracking_uri("https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow")

In [51]:
print("Current tracking URI:", mlflow.get_tracking_uri())


Current tracking URI: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow


In [52]:
# Cell 1: directories & save preprocessor
import os
from pathlib import Path
import joblib

# Base directories (adjust DATA_DIR if you mounted Drive)
BASE_DIR = Path("/content")  # change to Path("/content/drive/MyDrive/nexgen_data") if you want Drive
PROCESSED_DIR = BASE_DIR / "nexgen_processed"
MODELS_DIR = BASE_DIR / "nexgen_models"
MLRUNS_DIR = BASE_DIR / "mlruns"  # local mlruns fallback

for d in [PROCESSED_DIR, MODELS_DIR, MLRUNS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

print("Directories ready:")
print("Processed:", PROCESSED_DIR)
print("Models:", MODELS_DIR)
print("MLruns:", MLRUNS_DIR)

# Save preprocessor (if defined)
try:
    preprocessor  # referencing variable from previous cells
    preprocessor_fp = PROCESSED_DIR / "preprocessor.joblib"
    joblib.dump(preprocessor, preprocessor_fp)
    print("Saved preprocessor to:", preprocessor_fp)
except NameError:
    print("Warning: `preprocessor` not found in scope. Make sure to run the preprocessing cell first.")

Directories ready:
Processed: /content/nexgen_processed
Models: /content/nexgen_models
MLruns: /content/mlruns
Saved preprocessor to: /content/nexgen_processed/preprocessor.joblib


In [56]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.5.0


In [58]:
import optuna
import mlflow
import mlflow.sklearn
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report
from sklearn.svm import SVC
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
import joblib
import os

mlflow.set_experiment("Predictive_Delivery_Optimizer_HP_Tuning")

def objective(trial):
    with mlflow.start_run(nested=True):
        model_name = trial.suggest_categorical("model", ["RF", "CatBoost", "SVM"])

        # Log model name immediately
        mlflow.log_param("model", model_name)

        if model_name == "RF":
            n_estimators = trial.suggest_int("n_estimators", 50, 300)
            max_depth = trial.suggest_int("max_depth", 3, 15)
            min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
            model = RandomForestClassifier(
                n_estimators=n_estimators,
                max_depth=max_depth,
                min_samples_split=min_samples_split,
                random_state=42,
                n_jobs=-1
            )
            # Log RF specific hyperparameters
            mlflow.log_params({
                "n_estimators": n_estimators,
                "max_depth": max_depth,
                "min_samples_split": min_samples_split
            })

        elif model_name == "CatBoost":
            depth = trial.suggest_int("depth", 4, 10)
            learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
            iterations = trial.suggest_int("iterations", 100, 400)
            model = CatBoostClassifier(
                depth=depth,
                learning_rate=learning_rate,
                iterations=iterations,
                verbose=0,
                random_seed=42
            )
            # Log CatBoost specific hyperparameters
            mlflow.log_params({
                "depth": depth,
                "learning_rate": learning_rate,
                "iterations": iterations
            })


        elif model_name == "SVM":
            kernel = trial.suggest_categorical("kernel", ["linear", "rbf", "poly"])
            C = trial.suggest_float("C", 0.1, 10.0)
            gamma = "scale" if kernel == "linear" else trial.suggest_categorical("gamma", ["scale", "auto"])
            model = SVC(kernel=kernel, C=C, gamma=gamma, probability=True, random_state=42)
            # Log SVM specific hyperparameters
            mlflow.log_params({
                "kernel": kernel,
                "C": C,
                "gamma": gamma
            })


        # Train
        model.fit(X_train_trans, y_train)

        # Predict
        y_pred = model.predict(X_test_trans)
        y_pred_proba = model.predict_proba(X_test_trans)[:, 1] if hasattr(model, "predict_proba") else None

        # Metrics
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        roc_auc = roc_auc_score(y_test, y_pred_proba) if y_pred_proba is not None else 0

        # Log metrics to MLflow
        mlflow.log_metric("Accuracy", acc)
        mlflow.log_metric("F1_Score", f1)
        mlflow.log_metric("ROC_AUC", roc_auc)

        # Save the model locally instead of logging to MLflow artifact store directly
        model_dir = f"models/{trial.number}"
        os.makedirs(model_dir, exist_ok=True)
        model_path = os.path.join(model_dir, f"{model_name}.joblib")
        joblib.dump(model, model_path)
        # Log the path to the saved model as a parameter
        mlflow.log_param("model_path", model_path)


        # Return the metric for optimization
        return f1


# Optimize
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

print("Best Trial:")
print(study.best_trial.params)

# Save the best model (Optuna trial object)
best_model_name = study.best_trial.params["model"]
os.makedirs("models", exist_ok=True)
best_trial_path = f"models/best_trial_{best_model_name}.joblib"
joblib.dump(study.best_trial, best_trial_path)
print(f"✅ Best trial object saved at: {best_trial_path}")


[I 2025-10-27 06:13:04,899] A new study created in memory with name: no-name-ce1962f2-27be-4fd5-a4a4-8cb70d022f71
[I 2025-10-27 06:13:06,184] Trial 0 finished with value: 0.4314253647586981 and parameters: {'model': 'SVM', 'kernel': 'linear', 'C': 4.494902966647239}. Best is trial 0 with value: 0.4314253647586981.


🏃 View run sneaky-ape-693 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/759d5bd5e66c4dfab62ba47615aeb472
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1
🏃 View run fortunate-moose-476 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/c49938ce6a5f4cfeb2d993488eeb881c
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:13:12,897] Trial 1 finished with value: 0.6666666666666666 and parameters: {'model': 'CatBoost', 'depth': 10, 'learning_rate': 0.014256902948256489, 'iterations': 137}. Best is trial 1 with value: 0.6666666666666666.


🏃 View run popular-midge-961 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/9a958adf970c485086462bb9aeac7f40
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:13:21,907] Trial 2 finished with value: 0.6321097515758249 and parameters: {'model': 'RF', 'n_estimators': 167, 'max_depth': 12, 'min_samples_split': 4}. Best is trial 1 with value: 0.6666666666666666.


🏃 View run burly-dog-227 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/95ce0e48f352436baf117bc3c0aceef2
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:13:30,891] Trial 3 finished with value: 0.4618401206636501 and parameters: {'model': 'SVM', 'kernel': 'linear', 'C': 6.523317653565173}. Best is trial 1 with value: 0.6666666666666666.


🏃 View run skittish-mink-741 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/5547504d891645c695d2ffdcab7cb776
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:13:39,894] Trial 4 finished with value: 0.7003337041156841 and parameters: {'model': 'CatBoost', 'depth': 7, 'learning_rate': 0.15423000550132251, 'iterations': 217}. Best is trial 4 with value: 0.7003337041156841.


🏃 View run honorable-horse-906 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/aa31d71b65b14e25bc40677bba58e648
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:13:48,896] Trial 5 finished with value: 0.6337411939191694 and parameters: {'model': 'CatBoost', 'depth': 10, 'learning_rate': 0.25018122378157837, 'iterations': 207}. Best is trial 4 with value: 0.7003337041156841.


🏃 View run selective-gnat-8 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/37d6ca3d9a49442db0bbdd5128ca6668
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:13:57,888] Trial 6 finished with value: 0.7003337041156841 and parameters: {'model': 'CatBoost', 'depth': 8, 'learning_rate': 0.20998973991935302, 'iterations': 352}. Best is trial 4 with value: 0.7003337041156841.


🏃 View run angry-penguin-154 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/cec8b8abe9f741d6ab3492d1b7a75e81
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:14:06,891] Trial 7 finished with value: 0.6320987654320988 and parameters: {'model': 'CatBoost', 'depth': 10, 'learning_rate': 0.06084560286444943, 'iterations': 156}. Best is trial 4 with value: 0.7003337041156841.


🏃 View run masked-sow-62 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/db9b16e84f684711b0b848ca09298e5b
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:14:15,901] Trial 8 finished with value: 0.5592380952380952 and parameters: {'model': 'SVM', 'kernel': 'rbf', 'C': 4.053483339885692, 'gamma': 'auto'}. Best is trial 4 with value: 0.7003337041156841.


🏃 View run righteous-lynx-391 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/203f2da2409146ca92cf47d643491e80
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:14:24,889] Trial 9 finished with value: 0.5333333333333333 and parameters: {'model': 'RF', 'n_estimators': 165, 'max_depth': 15, 'min_samples_split': 10}. Best is trial 4 with value: 0.7003337041156841.


🏃 View run vaunted-eel-535 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/645d8ba78c884e68941c281f7cc91aea
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:14:33,894] Trial 10 finished with value: 0.6989898989898989 and parameters: {'model': 'CatBoost', 'depth': 4, 'learning_rate': 0.1310554531849288, 'iterations': 297}. Best is trial 4 with value: 0.7003337041156841.


🏃 View run melodic-cod-474 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/4ffb0629b7b7486dbc8a50843d9be3a0
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:14:42,902] Trial 11 finished with value: 0.7003337041156841 and parameters: {'model': 'CatBoost', 'depth': 7, 'learning_rate': 0.20835560890747798, 'iterations': 386}. Best is trial 4 with value: 0.7003337041156841.


🏃 View run salty-lynx-520 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/6508233f91864d408a86fe27062a4b34
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:14:51,899] Trial 12 finished with value: 0.6666666666666666 and parameters: {'model': 'CatBoost', 'depth': 7, 'learning_rate': 0.16035872006791024, 'iterations': 300}. Best is trial 4 with value: 0.7003337041156841.


🏃 View run upbeat-eel-199 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/b19a1b112e59479484b23135e09cd91d
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:15:00,885] Trial 13 finished with value: 0.7333333333333333 and parameters: {'model': 'CatBoost', 'depth': 8, 'learning_rate': 0.29839567716810317, 'iterations': 400}. Best is trial 13 with value: 0.7333333333333333.


🏃 View run adventurous-fish-619 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/656c9cab3a1b4c7a8185e1b4d781b478
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:15:09,914] Trial 14 finished with value: 0.6666666666666666 and parameters: {'model': 'CatBoost', 'depth': 5, 'learning_rate': 0.2952240345266324, 'iterations': 229}. Best is trial 13 with value: 0.7333333333333333.


🏃 View run melodic-carp-512 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/23ed5da4912f4837bfdb19e490894978
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:15:18,897] Trial 15 finished with value: 0.5888888888888888 and parameters: {'model': 'RF', 'n_estimators': 292, 'max_depth': 3, 'min_samples_split': 9}. Best is trial 13 with value: 0.7333333333333333.


🏃 View run inquisitive-lamb-490 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/28bc28ed6f36447b99333c9bfbe751fc
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:15:27,904] Trial 16 finished with value: 0.6666666666666666 and parameters: {'model': 'CatBoost', 'depth': 8, 'learning_rate': 0.12438151760926232, 'iterations': 278}. Best is trial 13 with value: 0.7333333333333333.


🏃 View run invincible-auk-855 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/6bfae001abcd41ecbbed34c8d06f0839
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:15:36,905] Trial 17 finished with value: 0.7003337041156841 and parameters: {'model': 'CatBoost', 'depth': 6, 'learning_rate': 0.19118077748851783, 'iterations': 195}. Best is trial 13 with value: 0.7333333333333333.


🏃 View run bald-pug-926 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/fefc0b9fa3884513b044b0fe27570d4c
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:15:45,899] Trial 18 finished with value: 0.5333333333333333 and parameters: {'model': 'SVM', 'kernel': 'poly', 'C': 9.929471697790362, 'gamma': 'scale'}. Best is trial 13 with value: 0.7333333333333333.


🏃 View run exultant-rat-573 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/12c8eb8343d447d08b7c8127c61c8af0
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1


[I 2025-10-27 06:15:54,905] Trial 19 finished with value: 0.49831649831649827 and parameters: {'model': 'RF', 'n_estimators': 51, 'max_depth': 6, 'min_samples_split': 2}. Best is trial 13 with value: 0.7333333333333333.


Best Trial:
{'model': 'CatBoost', 'depth': 8, 'learning_rate': 0.29839567716810317, 'iterations': 400}
✅ Best trial object saved at: models/best_trial_CatBoost.joblib


In [63]:
study.trials_dataframe().head(5)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_C,params_depth,params_gamma,params_iterations,params_kernel,params_learning_rate,params_max_depth,params_min_samples_split,params_model,params_n_estimators,state
0,0,0.431425,2025-10-27 06:13:04.900629,2025-10-27 06:13:06.184662,0 days 00:00:01.284033,4.494903,,,,linear,,,,SVM,,COMPLETE
1,1,0.666667,2025-10-27 06:13:06.185690,2025-10-27 06:13:12.897334,0 days 00:00:06.711644,,10.0,,137.0,,0.014257,,,CatBoost,,COMPLETE
2,2,0.63211,2025-10-27 06:13:12.898438,2025-10-27 06:13:21.907509,0 days 00:00:09.009071,,,,,,,12.0,4.0,RF,167.0,COMPLETE
3,3,0.46184,2025-10-27 06:13:21.908899,2025-10-27 06:13:30.890988,0 days 00:00:08.982089,6.523318,,,,linear,,,,SVM,,COMPLETE
4,4,0.700334,2025-10-27 06:13:30.891979,2025-10-27 06:13:39.893903,0 days 00:00:09.001924,,7.0,,217.0,,0.15423,,,CatBoost,,COMPLETE


In [64]:
# model frequency
opt_df = pd.DataFrame(
    {
        "model": study.trials_dataframe()['params_model'].value_counts().index,
        "freq": study.trials_dataframe()['params_model'].value_counts().values,
        "F1": study.trials_dataframe().groupby("params_model")['value'].mean().sort_values(ascending=False).values
    }
)
print(opt_df)
best_trial = study.best_trial
print("Best Model:", best_trial.params["model"])
print()
print("Best F1-Score:", best_trial.value)



      model  freq        F1
0  CatBoost    12  0.680514
1       SVM     4  0.563162
2        RF     4  0.496459
Best Model: CatBoost
Best F1-Score: 0.7333333333333333


In [67]:
opt_df.to_csv("model_frequency_summary.csv", index=False)
with mlflow.start_run():

    # Log the DataFrame CSV as an artifact
    mlflow.log_artifact("/content/model_frequency_summary.csv", artifact_path="tables")

🏃 View run intrigued-ant-414 at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1/runs/5d0eb6a9462e4016ba649e26455b08c5
🧪 View experiment at: https://dagshub.com/AbdurRahman22224/Predictive_Delivery_Optimizer.mlflow/#/experiments/1
