# Improved Delivery Time Prediction Model
## تحليل بيانات الاستلام والتوصيل وتوقع وقت التوصيل المحسن

**الهدف:** توقع ETA (Estimated Time of Arrival) بدقة عالية
ETA = وقت التوصيل - وقت الاستلام

**التحسينات:**
- تنظيف البيانات بشكل أفضل
- إزالة الـ outliers
- تحليل استكشافي شامل
- هندسة خصائص متقدمة
- تحسين النماذج

### Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import (
    train_test_split,
    cross_val_score,
    GridSearchCV,
    RandomizedSearchCV,
)
from sklearn.linear_model import Ridge, ElasticNet, LassoCV
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    IsolationForest,
)
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, RobustScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_regression
import warnings
from scipy import stats
from datetime import datetime, timedelta
from sklearn.neighbors import KNeighborsRegressor
import catboost as cb
import pickle

warnings.filterwarnings("ignore")

# Set plotting style
try:
    plt.style.use("seaborn-v0_8")
except:
    plt.style.use("seaborn")
sns.set_palette("husl")
plt.rcParams["figure.figsize"] = (12, 8)
plt.rcParams["font.size"] = 10

## 1. DATA LOADING AND EXPLORATION

In [None]:
def load_data():
    """Load pickup and delivery data from all cities with better error handling"""
    cities = ["cq", "sh", "hz", "jl", "yt"]
    pickup_dfs = []
    delivery_dfs = []

    for city in cities:
        try:
            # Load pickup data
            pickup_df = pd.read_csv(f"Pickup Five Cities Datasets/pickup_{city}.csv")
            pickup_df["source_city"] = city
            pickup_dfs.append(pickup_df)

            # Load delivery data
            delivery_df = pd.read_csv(
                f"Delivery Five Cities Datasets/delivery_{city}.csv"
            )
            delivery_df["source_city"] = city
            delivery_dfs.append(delivery_df)

        except Exception as e:
            pass

    if not pickup_dfs or not delivery_dfs:
        raise ValueError("لم يتم تحميل أي بيانات بنجاح")

    # Combine data
    df_pickup = pd.concat(pickup_dfs, ignore_index=True)
    df_delivery = pd.concat(delivery_dfs, ignore_index=True)

    return df_pickup, df_delivery

def comprehensive_data_exploration(df_pickup, df_delivery):
    """تحليل شامل للبيانات"""
    # This function originally had print statements for exploration.
    # In a notebook, we would typically use df.info(), df.head(), df.describe() directly in cells.
    return df_pickup, df_delivery

## 2. ADVANCED DATA PREPARATION

In [None]:
def advanced_data_preparation(df_pickup, df_delivery):
    """تحضير البيانات المتقدم مع تنظيف شامل"""
    pickup_clean = df_pickup.copy()
    delivery_clean = df_delivery.copy()

    def parse_datetime_flexible(date_series, formats=None):
        if formats is None:
            formats = [
                "%m-%d %H:%M:%S",
                "%Y-%m-%d %H:%M:%S",
                "%d-%m-%Y %H:%M:%S",
                "%m/%d %H:%M:%S",
                "%Y/%m/%d %H:%M:%S",
            ]

        parsed_series = pd.NaT
        for fmt in formats:
            try:
                parsed_series = pd.to_datetime(date_series, format=fmt, errors="coerce")
                valid_count = parsed_series.notna().sum()
                if valid_count > 0:
                    break
            except:
                continue
        return parsed_series

    pickup_clean["pickup_time"] = parse_datetime_flexible(pickup_clean["pickup_time"])
    pickup_clean["accept_time"] = parse_datetime_flexible(pickup_clean["accept_time"])
    delivery_clean["delivery_time"] = parse_datetime_flexible(delivery_clean["delivery_time"])
    delivery_clean["accept_time"] = parse_datetime_flexible(delivery_clean["accept_time"])

    if "time_window_start" in pickup_clean.columns:
        pickup_clean["time_window_start"] = parse_datetime_flexible(pickup_clean["time_window_start"])
        pickup_clean["time_window_end"] = parse_datetime_flexible(pickup_clean["time_window_end"])

    pickup_clean = pickup_clean.dropna(subset=["pickup_time", "accept_time"])
    delivery_clean = delivery_clean.dropna(subset=["delivery_time", "accept_time"])

    df_merged = pd.merge(
        pickup_clean,
        delivery_clean,
        on="order_id",
        how="inner",
        suffixes=["_pickup", "_delivery"],
    )

    if "source_city" not in df_merged.columns:
        if "source_city_pickup" in df_merged.columns:
            df_merged["source_city"] = df_merged["source_city_pickup"]
        elif "source_city_delivery" in df_merged.columns:
            df_merged["source_city"] = df_merged["source_city_delivery"]

    if len(df_merged) == 0:
        raise ValueError("فشل في دمج البيانات - لا توجد مطابقات")

    df_merged["ETA_seconds"] = (df_merged["delivery_time"] - df_merged["pickup_time"]).dt.total_seconds()
    df_merged["ETA_hours"] = df_merged["ETA_seconds"] / 3600
    df_merged["ETA_minutes"] = df_merged["ETA_seconds"] / 60

    df_merged = df_merged[df_merged["ETA_hours"] > 0]
    df_merged = df_merged[
        (df_merged["ETA_hours"] >= 1 / 60) & (df_merged["ETA_hours"] <= 168)
    ]

    Q1 = df_merged["ETA_hours"].quantile(0.25)
    Q3 = df_merged["ETA_hours"].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    df_merged = df_merged[
        (df_merged["ETA_hours"] >= lower_bound) & (df_merged["ETA_hours"] <= upper_bound)
    ]

    return df_merged

## 3. COMPREHENSIVE EDA

In [None]:
def comprehensive_eda(df_merged):
    """تحليل استكشافي شامل ومتقدم"""
    fig = plt.figure(figsize=(20, 16))

    # 1. ETA Distribution
    plt.subplot(3, 3, 1)
    sns.histplot(df_merged['ETA_hours'], bins=50, kde=True, color='skyblue')
    plt.axvline(df_merged['ETA_hours'].mean(), color='red', linestyle='--', label=f'المتوسط: {df_merged["ETA_hours"].mean():.2f}h')
    plt.axvline(df_merged['ETA_hours'].median(), color='green', linestyle='--', label=f'الوسيط: {df_merged["ETA_hours"].median():.2f}h')
    plt.title('توزيع ETA (بالساعات)')
    plt.xlabel('ETA (ساعات)')
    plt.ylabel('التكرار')
    plt.legend()

    # 2. ETA by City
    plt.subplot(3, 3, 2)
    sns.barplot(x='source_city', y='ETA_hours', data=df_merged, estimator=np.mean, ci='sd', capsize=.2, color='lightcoral')
    plt.title('متوسط ETA حسب المدينة')
    plt.xlabel('المدينة')
    plt.ylabel('متوسط ETA (ساعات)')
    plt.xticks(rotation=45)

    # 3. ETA by Hour
    plt.subplot(3, 3, 3)
    df_merged["pickup_hour"] = df_merged["pickup_time"].dt.hour
    sns.lineplot(x='pickup_hour', y='ETA_hours', data=df_merged, marker='o')
    plt.title('متوسط ETA حسب ساعة الاستلام')
    plt.xlabel('الساعة')
    plt.ylabel('متوسط ETA (ساعات)')
    plt.grid(True, alpha=0.3)

    # 4. ETA by Day of Week
    plt.subplot(3, 3, 4)
    df_merged["pickup_dayofweek"] = df_merged["pickup_time"].dt.day_name()
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    sns.barplot(x='pickup_dayofweek', y='ETA_hours', data=df_merged, order=day_order, color='lightgreen')
    plt.title('متوسط ETA حسب اليوم')
    plt.xlabel('اليوم')
    plt.ylabel('متوسط ETA (ساعات)')
    plt.xticks(rotation=45)

    # 5. Box plot for ETA distribution by city
    plt.subplot(3, 3, 5)
    sns.boxplot(x='source_city', y='ETA_hours', data=df_merged, ax=plt.gca())
    plt.title('توزيع ETA حسب المدينة (Box Plot)')
    plt.suptitle('')

    # 6. Correlation heatmap
    plt.subplot(3, 3, 6)
    numerical_cols = df_merged.select_dtypes(include=np.number).columns
    correlation_matrix = df_merged[numerical_cols].corr()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, fmt='.2f', cbar_kws={'shrink': 0.8})
    plt.title('مصفوفة الارتباط')
    
    # 9. Accept to Pickup time analysis
    plt.subplot(3, 3, 9)
    df_merged["accept_to_pickup_hours"] = (df_merged["pickup_time"] - df_merged["accept_time_pickup"]).dt.total_seconds() / 3600
    accept_to_pickup_clean = df_merged[(df_merged["accept_to_pickup_hours"] >= 0) & (df_merged["accept_to_pickup_hours"] <= 24)]
    if len(accept_to_pickup_clean) > 0:
        sns.histplot(accept_to_pickup_clean["accept_to_pickup_hours"], bins=30, kde=True)
        plt.title('توزيع الوقت من القبول للاستلام')
        plt.xlabel('الوقت (ساعات)')
        plt.ylabel('التكرار')

    plt.tight_layout()
    plt.show()

    return df_merged

## 4. ADVANCED FEATURE ENGINEERING

In [None]:
def advanced_feature_engineering(df_merged):
    """هندسة خصائص متقدمة ومحسنة"""
    df_features = df_merged.copy()

    # Time-based features
    df_features["pickup_hour"] = df_features["pickup_time"].dt.hour
    df_features["pickup_minute"] = df_features["pickup_time"].dt.minute
    df_features["pickup_dayofweek"] = df_features["pickup_time"].dt.dayofweek
    df_features["pickup_day"] = df_features["pickup_time"].dt.day
    df_features["pickup_month"] = df_features["pickup_time"].dt.month
    df_features["pickup_quarter"] = df_features["pickup_time"].dt.quarter

    # Weekend and business hours
    df_features["is_weekend"] = df_features["pickup_dayofweek"].isin([5, 6]).astype(int)
    df_features["is_business_hours"] = ((df_features["pickup_hour"] >= 9) & (df_features["pickup_hour"] <= 17)).astype(int)

    # Cyclical encoding for time features
    df_features["hour_sin"] = np.sin(2 * np.pi * df_features["pickup_hour"] / 24)
    df_features["hour_cos"] = np.cos(2 * np.pi * df_features["pickup_hour"] / 24)

    # Accept to pickup time
    df_features["accept_to_pickup_hours"] = (df_features["pickup_time"] - df_features["accept_time_pickup"]).dt.total_seconds() / 3600
    df_features["accept_to_pickup_hours"] = df_features["accept_to_pickup_hours"].clip(0, 48)

    # Distance features
    if all(col in df_features.columns for col in ["lng_pickup", "lat_pickup", "lng_delivery", "lat_delivery"]):
        def haversine_distance(lat1, lon1, lat2, lon2):
            R = 6371
            lat1_rad, lon1_rad, lat2_rad, lon2_rad = map(np.radians, [lat1, lon1, lat2, lon2])
            dlat = lat2_rad - lat1_rad
            dlon = lon2_rad - lon1_rad
            a = np.sin(dlat / 2) ** 2 + np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(dlon / 2) ** 2
            c = 2 * np.arcsin(np.sqrt(a))
            return R * c

        df_features["distance_km"] = haversine_distance(
            df_features["lat_pickup"], df_features["lng_pickup"], df_features["lat_delivery"], df_features["lng_delivery"]
        )

    # Categorical encoding
    label_encoders = {}
    for col in df_features.select_dtypes(include=['object']).columns:
        if df_features[col].nunique() > 1:
            le = LabelEncoder()
            df_features[f"{col}_encoded"] = le.fit_transform(df_features[col].astype(str))
            label_encoders[col] = le
            
    # Aggregated features
    if "courier_id_pickup" in df_features.columns:
        courier_stats = df_features.groupby("courier_id_pickup")["ETA_hours"].agg(["mean", "std"]).add_prefix("courier_eta_")
        df_features = df_features.join(courier_stats, on="courier_id_pickup")
    
    # Select relevant features
    feature_columns = [
        'pickup_hour', 'pickup_dayofweek', 'is_weekend', 'accept_to_pickup_hours',
        'hour_sin', 'hour_cos'
    ]
    if 'distance_km' in df_features.columns:
        feature_columns.append('distance_km')
    if 'source_city_encoded' in df_features.columns:
        feature_columns.append('source_city_encoded')
    if 'courier_eta_mean' in df_features.columns:
        feature_columns.extend(['courier_eta_mean', 'courier_eta_std'])
        
    feature_columns = [col for col in feature_columns if col in df_features.columns]

    return df_features, feature_columns, label_encoders

## 5. ADVANCED OUTLIER DETECTION

In [None]:
def advanced_outlier_detection(df_features, feature_columns):
    """كشف وإزالة الـ outliers بطرق متقدمة"""
    df_clean = df_features.copy()
    initial_count = len(df_clean)

    # 1. Z-score for ETA
    z_scores = np.abs(stats.zscore(df_clean["ETA_hours"]))
    df_clean = df_clean[z_scores < 3]

    # 2. Isolation Forest for multivariate outliers
    numerical_features = [col for col in feature_columns if df_clean[col].dtype in ['int64', 'float64'] and not df_clean[col].isna().all()]
    if len(numerical_features) > 1:
        iso_data = df_clean[numerical_features].fillna(df_clean[numerical_features].median())
        iso_forest = IsolationForest(contamination=0.05, random_state=42)
        outlier_labels = iso_forest.fit_predict(iso_data)
        df_clean = df_clean[outlier_labels != -1]

    return df_clean

## 6. MODELING DATA PREPARATION

In [None]:
def prepare_advanced_modeling_data(df_features, feature_columns):
    """تحضير بيانات النمذجة المتقدمة"""
    df_modeling = df_features.copy()
    
    # Handle missing values
    for col in feature_columns:
        if df_modeling[col].dtype in ['int64', 'float64']:
            df_modeling[col].fillna(df_modeling[col].median(), inplace=True)
        else:
            df_modeling[col].fillna(df_modeling[col].mode()[0], inplace=True)

    df_modeling = df_modeling.dropna(subset=["ETA_hours"])
    
    available_features = [col for col in feature_columns if col in df_modeling.columns]
    X = df_modeling[available_features]
    y = df_modeling["ETA_hours"]

    non_numeric_cols = X.select_dtypes(exclude=np.number).columns
    if not non_numeric_cols.empty:
        X = X.drop(columns=non_numeric_cols)
        available_features = [c for c in available_features if c not in non_numeric_cols]

    # Stratified split
    eta_bins = pd.qcut(y, q=5, labels=False, duplicates='drop')
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=eta_bins
    )

    # Scaling
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Convert scaled arrays back to DataFrame to preserve column names
    X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

    return X_train_scaled, X_test_scaled, y_train, y_test, scaler, available_features

## 7. MODEL TRAINING & TUNING

In [None]:
def train_improved_models(X_train, X_test, y_train, y_test, feature_columns):
    """تدريب نماذج محسنة مع ضبط المعاملات"""
    models_results = {}

    # Models setup
    models = {
        "Ridge": (GridSearchCV(Ridge(random_state=42), {'alpha': [0.1, 1.0, 10.0]}, cv=5, scoring='r2'), {}),
        "RandomForest": (RandomizedSearchCV(RandomForestRegressor(random_state=42), {'n_estimators': [100, 150], 'max_depth': [10, 20]}, n_iter=2, cv=3, scoring='r2', random_state=42), {}),
        "GradientBoosting": (GridSearchCV(GradientBoostingRegressor(random_state=42), {'n_estimators': [100], 'learning_rate': [0.1]}, cv=3, scoring='r2'), {}),
        "CatBoost": (cb.CatBoostRegressor(iterations=200, depth=6, learning_rate=0.1, loss_function='RMSE', verbose=False, random_state=42), {})
    }

    for name, (model, params) in models.items():
        model.fit(X_train, y_train)
        
        best_estimator = model.best_estimator_ if hasattr(model, 'best_estimator_') else model
        
        pred_train = best_estimator.predict(X_train)
        pred_test = best_estimator.predict(X_test)

        feature_imp = None
        if hasattr(best_estimator, 'feature_importances_'):
            feature_imp = pd.DataFrame({'feature': feature_columns, 'importance': best_estimator.feature_importances_}).sort_values('importance', ascending=False)

        models_results[name] = {
            'model': best_estimator,
            'train_mae': mean_absolute_error(y_train, pred_train),
            'test_mae': mean_absolute_error(y_test, pred_test),
            'train_rmse': np.sqrt(mean_squared_error(y_train, pred_train)),
            'test_rmse': np.sqrt(mean_squared_error(y_test, pred_test)),
            'train_r2': r2_score(y_train, pred_train),
            'test_r2': r2_score(y_test, pred_test),
            'best_params': model.best_params_ if hasattr(model, 'best_params_') else model.get_params(),
            'feature_importance': feature_imp
        }

    return models_results

## 8. ENSEMBLE METHODS

In [None]:
def create_ensemble_model(models_results, X_train, X_test, y_train, y_test):
    """إنشاء نموذج مجمع من أفضل النماذج"""
    model_scores = sorted([(name, res['test_r2']) for name, res in models_results.items()], key=lambda x: x[1], reverse=True)
    top_models = model_scores[:3]
    
    ensemble_train_preds = []
    ensemble_test_preds = []
    weights = [score for _, score in top_models]
    weights = np.array(weights) / np.sum(weights)
    
    component_model_names = []
    for i, (name, _) in enumerate(top_models):
        model = models_results[name]['model']
        ensemble_train_preds.append(model.predict(X_train))
        ensemble_test_preds.append(model.predict(X_test))
        component_model_names.append(name)

    ensemble_train_pred = np.average(ensemble_train_preds, axis=0, weights=weights)
    ensemble_test_pred = np.average(ensemble_test_preds, axis=0, weights=weights)

    ensemble_results = {
        'model': 'Weighted Ensemble',
        'train_mae': mean_absolute_error(y_train, ensemble_train_pred),
        'test_mae': mean_absolute_error(y_test, ensemble_test_pred),
        'train_rmse': np.sqrt(mean_squared_error(y_train, ensemble_train_pred)),
        'test_rmse': np.sqrt(mean_squared_error(y_test, ensemble_test_pred)),
        'train_r2': r2_score(y_train, ensemble_train_pred),
        'test_r2': r2_score(y_test, ensemble_test_pred),
        'weights': weights.tolist(),
        'component_models': component_model_names
    }
    models_results['Ensemble'] = ensemble_results
    return models_results, ensemble_train_pred, ensemble_test_pred

## 9. ADVANCED MODEL EVALUATION (Output Enabled)

In [None]:
def advanced_model_evaluation(
    models_results, y_test, best_test_pred
):
    """تقييم متقدم للنماذج مع رسوم بيانية"""
    comparison_data = []
    for model_name, results in models_results.items():
        comparison_data.append(
            {
                "النموذج": model_name,
                "Test MAE": results["test_mae"],
                "Test RMSE": results["test_rmse"],
                "Test R²": results["test_r2"],
                "Train R²": results["train_r2"],
                "Overfitting": results["train_r2"] - results["test_r2"],
            }
        )

    comparison_df = pd.DataFrame(comparison_data).sort_values("Test R²", ascending=False)

    print("\n📊 مقارنة شاملة للنماذج:")
    print(comparison_df.round(4).to_string(index=False))

    best_model_name = comparison_df.iloc[0]["النموذج"]
    print(f"\n🏆 أفضل نموذج: {best_model_name}")
    print(f"   Test R²: {comparison_df.iloc[0]['Test R²']:.4f}")
    print(f"   Test MAE: {comparison_df.iloc[0]['Test MAE']:.4f} ساعة")
    
    fig = plt.figure(figsize=(20, 12))

    # 1. Model comparison chart (R²)
    plt.subplot(2, 3, 1)
    sns.barplot(x='Test R²', y='النموذج', data=comparison_df, palette='viridis')
    plt.title('مقارنة النماذج - R² Score')
    plt.xlabel('R² Score')

    # 2. Actual vs Predicted
    plt.subplot(2, 3, 4)
    plt.scatter(y_test, best_test_pred, alpha=0.5, label='التنبؤات')
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2, label='الخط المثالي')
    plt.xlabel("ETA الفعلي (ساعات)")
    plt.ylabel("ETA المتوقع (ساعات)")
    plt.title(f"الفعلي مقابل المتوقع - {best_model_name}")
    plt.legend()
    plt.grid(True)
    
    # 3. Residuals plot
    plt.subplot(2, 3, 5)
    residuals = y_test - best_test_pred
    sns.scatterplot(x=best_test_pred, y=residuals, alpha=0.5)
    plt.axhline(y=0, color='red', linestyle='--')
    plt.xlabel("ETA المتوقع (ساعات)")
    plt.ylabel("البواقي (ساعات)")
    plt.title("رسم البواقي")
    plt.grid(True)

    # 4. Feature importance
    plt.subplot(2, 3, 6)
    if 'feature_importance' in models_results[best_model_name] and models_results[best_model_name]['feature_importance'] is not None:
        importance_df = models_results[best_model_name]['feature_importance'].head(10)
        sns.barplot(x='importance', y='feature', data=importance_df, palette='plasma')
        plt.title('أهم 10 خصائص')
        plt.xlabel('الأهمية')
        plt.ylabel('الخاصية')

    plt.tight_layout()
    plt.show()
    
    # Error analysis
    mae = mean_absolute_error(y_test, best_test_pred)
    print("\n🔍 تحليل الأخطاء:")
    print(f"   متوسط الخطأ المطلق: {mae:.2f} ساعة = {mae*60:.0f} دقيقة")
    for bound in [0.5, 1.0, 2.0]:
        within_bound = (np.abs(residuals) <= bound).mean() * 100
        print(f"   التنبؤات ضمن ±{bound} ساعة: {within_bound:.1f}%")
        
    return comparison_df, best_model_name

## 10. MAIN EXECUTION PIPELINE

In [None]:
def main_pipeline():
    """التدفق الرئيسي لتدريب النموذج"""
    try:
        df_pickup, df_delivery = load_data()
        df_merged = advanced_data_preparation(df_pickup, df_delivery)
        df_merged_eda = comprehensive_eda(df_merged)
        df_features, feature_columns, label_encoders = advanced_feature_engineering(df_merged_eda)
        df_clean = advanced_outlier_detection(df_features, feature_columns)
        X_train, X_test, y_train, y_test, scaler, available_features = prepare_advanced_modeling_data(df_clean, feature_columns)
        models_results = train_improved_models(X_train, X_test, y_train, y_test, available_features)
        models_results, ensemble_train_pred, ensemble_test_pred = create_ensemble_model(models_results, X_train, X_test, y_train, y_test)
        
        # The evaluation function is called here, and it's the only one with prints
        comparison_df, best_model_name = advanced_model_evaluation(models_results, y_test, ensemble_test_pred)
        
        best_model = None
        if best_model_name == 'Ensemble':
            # Logic for creating a callable ensemble model object would go here
            pass
        else:
            best_model = models_results[best_model_name]['model']
            
        return {
            "model": best_model,
            "scaler": scaler,
            "feature_columns": available_features,
            "label_encoders": label_encoders,
            "comparison_results": comparison_df,
            "models_results": models_results,
            "best_model_name": best_model_name,
        }

    except Exception as e:
        import traceback
        traceback.print_exc()
        return None

## 11. PRODUCTION UTILITIES & EXECUTION

In [None]:
def save_model_for_production(results, filepath_prefix="delivery_prediction_model"):
    """حفظ النموذج للاستخدام في الإنتاج"""
    model_components = {
        "model": results["model"],
        "scaler": results["scaler"],
        "feature_columns": results["feature_columns"],
        "label_encoders": results["label_encoders"],
        "model_name": results["best_model_name"],
    }
    with open(f"{filepath_prefix}.pkl", "wb") as f:
        pickle.dump(model_components, f)
    results["comparison_results"].to_csv(f"{filepath_prefix}_performance.csv", index=False)
    

if __name__ == "__main__":
    # To run the full pipeline and see the evaluation output:
    # final_results = main_pipeline()
    # if final_results:
    #    save_model_for_production(final_results)
    pass # The execution is meant to be done cell by cell in the notebook