In [2]:
import numpy as np
import pandas as pd

In [3]:
df=pd.read_csv('..\\dataset\\food_delivery_cleaned_fn.csv')

In [4]:
df.head()

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Weather_conditions,Road_traffic_density,Vehicle_condition,Type_of_order,...,day_of_week,is_month_start,is_month_end,is_quarter_start,is_quarter_end,is_year_start,is_year_end,is_weekend,order_prepare_time,distance
0,37.0,4.9,22.745049,75.892471,22.765049,75.912471,Sunny,High,2,Snack,...,5,0,0,0,0,0,0,1,15.0,3
1,34.0,4.5,12.913041,77.683237,13.043041,77.813237,Stormy,Jam,2,Snack,...,4,0,0,0,0,0,0,0,5.0,20
2,23.0,4.4,12.914264,77.6784,12.924264,77.6884,Sandstorms,Low,0,Drinks,...,5,0,0,0,0,0,0,1,15.0,1
3,38.0,4.7,11.003669,76.976494,11.053669,77.026494,Sunny,Medium,0,Buffet,...,1,0,0,0,0,0,0,0,10.0,7
4,32.0,4.6,12.972793,80.249982,13.012793,80.289982,Cloudy,High,1,Snack,...,5,0,0,0,0,0,0,1,15.0,6


In [5]:
df = df.drop([
    'Restaurant_latitude', 'Restaurant_longitude',
    'Delivery_location_latitude', 'Delivery_location_longitude'
], axis=1)


In [6]:
df.head()

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Weather_conditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,...,day_of_week,is_month_start,is_month_end,is_quarter_start,is_quarter_end,is_year_start,is_year_end,is_weekend,order_prepare_time,distance
0,37.0,4.9,Sunny,High,2,Snack,motorcycle,0.0,No,Urban,...,5,0,0,0,0,0,0,1,15.0,3
1,34.0,4.5,Stormy,Jam,2,Snack,scooter,1.0,No,Metropolitian,...,4,0,0,0,0,0,0,0,5.0,20
2,23.0,4.4,Sandstorms,Low,0,Drinks,motorcycle,1.0,No,Urban,...,5,0,0,0,0,0,0,1,15.0,1
3,38.0,4.7,Sunny,Medium,0,Buffet,motorcycle,1.0,No,Metropolitian,...,1,0,0,0,0,0,0,0,10.0,7
4,32.0,4.6,Cloudy,High,1,Snack,scooter,1.0,No,Metropolitian,...,5,0,0,0,0,0,0,1,15.0,6


In [7]:
X = df.drop('Time_taken(min)', axis=1)
y = df['Time_taken(min)']


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)


In [9]:
num_cols = X_train.select_dtypes(include=['int64','float64']).columns.tolist()
cat_cols = X_train.select_dtypes(include=['object','category']).columns.tolist()


In [11]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols),
])


In [12]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, mean_absolute_error

# Define models
models = {
    'Ridge': Ridge(random_state=42),
    'RandomForest': RandomForestRegressor(n_jobs=-1, random_state=42),
    'XGBoost': XGBRegressor(n_jobs=-1, random_state=42, verbosity=0),
}

# MAE scorer (negated for sklearn)
mae_scorer = make_scorer(mean_absolute_error, greater_is_better=False)

# Evaluate each
results = {}
for name, estimator in models.items():
    pipe = Pipeline([('prep', preprocessor), ('model', estimator)])
    cv_scores = cross_val_score(
        pipe, X_train, y_train,
        cv=5, scoring=mae_scorer, n_jobs=-1
    )
    mean_mae = -cv_scores.mean()
    results[name] = mean_mae
    print(f"{name:12s} CV MAE: {mean_mae:.2f}")

# Identify best model by lowest MAE
best_name = min(results, key=results.get)
print(f"\nBest performer: {best_name} (MAE = {results[best_name]:.2f})")


Ridge        CV MAE: 4.97
RandomForest CV MAE: 3.20
XGBoost      CV MAE: 3.20

Best performer: RandomForest (MAE = 3.20)


In [13]:
from sklearn.model_selection import RandomizedSearchCV

# Parameter grid for XGBoost
param_dist = {
    'model__n_estimators': [100, 300, 500, 800],
    'model__max_depth': [3, 5, 7, 9],
    'model__learning_rate': [0.01, 0.05, 0.1, 0.2],
    'model__subsample': [0.6, 0.8, 1.0],
    'model__colsample_bytree': [0.6, 0.8, 1.0],
}

# Rebuild pipeline with XGBoost placeholder
pipe = Pipeline([
    ('prep', preprocessor),
    ('model', XGBRegressor(random_state=42, verbosity=0)),
])

# Randomized search
rs = RandomizedSearchCV(
    pipe,
    param_distributions=param_dist,
    n_iter=30,             # try 30 random combinations
    cv=3,
    scoring=mae_scorer,
    verbose=1,
    n_jobs=-1,
    random_state=42
)

rs.fit(X_train, y_train)

print("Best parameters:\n", rs.best_params_)
print("Best CV MAE:   ", -rs.best_score_)


Fitting 3 folds for each of 30 candidates, totalling 90 fits
Best parameters:
 {'model__subsample': 1.0, 'model__n_estimators': 100, 'model__max_depth': 9, 'model__learning_rate': 0.05, 'model__colsample_bytree': 1.0}
Best CV MAE:    3.1236610412597656


In [14]:
# Remove the 'model__' prefix
tuned_params = {k.replace('model__',''): v for k,v in rs.best_params_.items()}

final_pipe = Pipeline([
    ('prep', preprocessor),
    ('model', XGBRegressor(random_state=42, verbosity=0, **tuned_params))
])


In [15]:
# Fit
final_pipe.fit(X_train, y_train)

# Predict + Evaluate
from sklearn.metrics import mean_absolute_error, r2_score

y_pred = final_pipe.predict(X_test)
test_mae = mean_absolute_error(y_test, y_pred)
test_r2  = r2_score(y_test, y_pred)

print(f"Test  MAE: {test_mae:.2f}")
print(f"Test  R2:  {test_r2:.3f}")


Test  MAE: 3.13
Test  R2:  0.824


In [16]:
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, mean_absolute_error

# 1. Define your neural network model
nn = MLPRegressor(
    hidden_layer_sizes=(100, 50),  # two layers: 100 neurons then 50
    activation='relu',             # ReLU activations
    solver='adam',                 # Adam optimizer
    learning_rate_init=0.001,      # initial learning rate
    max_iter=200,                  # epochs (iterations over data)
    random_state=42,
    early_stopping=True,           # stop if no improvement
    n_iter_no_change=10            # rounds to wait before stopping
)

# 2. Build a pipeline exactly as before, swapping in our MLP
pipe_nn = Pipeline([
    ('prep', preprocessor),  # your ColumnTransformer from earlier
    ('model', nn)
])

# 3. Evaluate via 5‑fold CV using MAE
mae_scorer = make_scorer(mean_absolute_error, greater_is_better=False)
cv_scores_nn = cross_val_score(
    pipe_nn, X_train, y_train,
    cv=5, scoring=mae_scorer, n_jobs=-1
)
mean_mae_nn = -cv_scores_nn.mean()
print(f"Neural Net CV MAE: {mean_mae_nn:.2f}")


Neural Net CV MAE: 4.15


In [17]:
from sklearn.model_selection import RandomizedSearchCV

param_dist_nn = {
    'model__hidden_layer_sizes': [(50,50), (100,50), (100,100,50)],
    'model__learning_rate_init': [0.0001, 0.001, 0.01],
    'model__alpha': [1e-5, 1e-4, 1e-3],            # L2 penalty
    'model__activation': ['relu', 'tanh'],
    'model__solver': ['adam', 'sgd']
}

rs_nn = RandomizedSearchCV(
    pipe_nn,
    param_distributions=param_dist_nn,
    n_iter=20,
    cv=3,
    scoring=mae_scorer,
    n_jobs=-1,
    random_state=42,
    verbose=1
)

rs_nn.fit(X_train, y_train)

print("Best NN params:", rs_nn.best_params_)
print("Best NN CV MAE:", -rs_nn.best_score_)


Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best NN params: {'model__solver': 'sgd', 'model__learning_rate_init': 0.001, 'model__hidden_layer_sizes': (100, 100, 50), 'model__alpha': 1e-05, 'model__activation': 'tanh'}
Best NN CV MAE: 3.9999502018332787
