In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data= pd.read_csv(r"data\scaled_data.csv")

In [3]:
data.drop(columns=['Unnamed: 0'], inplace=True)
data.head()

Unnamed: 0,Agent_Age,Agent_Rating,Weather,Traffic,Vehicle,Area,Delivery_Time,Category,Distance_km,Order_Year,Order_Month,Pickup_Time_minutes
0,1.29227,0.843511,4,0,0,3,120,2,-0.079267,2022,3,-1.097211
1,0.771493,-0.431093,3,1,1,0,165,4,-0.021837,2022,3,0.411266
2,-1.138023,-0.749744,2,2,0,3,130,14,-0.084201,2022,3,-1.657058
3,1.465863,0.206209,4,3,0,0,105,3,-0.063323,2022,4,0.100239
4,0.424309,-0.112442,0,0,1,0,150,15,-0.068611,2022,3,-0.72398


In [4]:
# Feature Selection
X= data.drop(columns=['Delivery_Time','Order_Year'])

y= data['Delivery_Time']

In [5]:
# Train and Test Split

from sklearn.model_selection import train_test_split

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Gradient Boost

In [6]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define the Gradient Boosting model
model = GradientBoostingRegressor()

# Define the hyperparameters grid
param_grid = {
    'n_estimators': [300, 500],  # Number of boosting stages
    'learning_rate': [0.01, 0.1],  # Step size
    'max_depth': [3, 4],  # Maximum depth of the tree
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Get the best parameters and best score
print("Best Hyperparameters:", grid_search.best_params_)
print("Best Score (Negative MSE):", grid_search.best_score_)

# Evaluate the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the model using different metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

Best Hyperparameters: {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 500}
Best Score (Negative MSE): -537.5828174831066
Mean Absolute Error (MAE): 17.935201109375093
Mean Squared Error (MSE): 522.3282484474632
R² Score: 0.8013825854885417
