In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [7]:
df = pd.read_csv("C:/Users/Rakshitha/Downloads/Larger_Package_Delivery_Route_Optimization.csv")
df


Unnamed: 0,PackageID,Origin,Destination,Distance_km,TimeWindow_Start,TimeWindow_End,Priority,DeliveryStatus
0,PKG0001,Warehouse A,Location 14,10.3,09:00,12:00,Medium,Pending
1,PKG0002,Warehouse B,Location 5,7.0,07:30,12:30,Medium,Delivered
2,PKG0003,Warehouse C,Location 8,6.7,10:00,13:30,Medium,Delivered
3,PKG0004,Warehouse C,Location 17,49.8,10:30,12:30,High,Pending
4,PKG0005,Warehouse B,Location 14,10.2,06:30,16:00,Low,Delivered
...,...,...,...,...,...,...,...,...
195,PKG0196,Warehouse B,Location 30,39.5,10:00,12:30,Low,Delivered
196,PKG0197,Warehouse A,Location 23,39.4,08:30,11:30,Low,Pending
197,PKG0198,Warehouse B,Location 10,18.0,09:00,17:00,Low,Delivered
198,PKG0199,Warehouse B,Location 21,10.4,10:30,13:30,Low,Pending


In [8]:
priority_to_time = {"High": 15, "Medium": 10, "Low": 5}
df["DeliveryTime"] = df["Distance_km"] * 2 + df["Priority"].map(priority_to_time) + np.random.normal(0, 5, len(df))

In [9]:
label_encoders = {}
categorical_columns = ["Origin", "Destination", "Priority"]
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


In [10]:
df["TimeWindow_Start"] = df["TimeWindow_Start"].apply(lambda x: int(x[:2]) + int(x[3:]) / 60)
df["TimeWindow_End"] = df["TimeWindow_End"].apply(lambda x: int(x[:2]) + int(x[3:]) / 60)


In [11]:
X = df.drop(columns=["PackageID", "DeliveryStatus", "DeliveryTime"])  # Drop irrelevant columns
y = df["DeliveryTime"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [13]:
model = RandomForestRegressor(random_state=42)


In [14]:
model.fit(X_train, y_train)


In [15]:
y_pred = model.predict(X_test)
y_pred

array([ 54.31594333,  76.08966458,  44.36448683,  33.93477223,
       102.74067339,  56.57672589,  56.69424985,  53.5994803 ,
        34.64925176,  73.41580611,  27.17879495,  34.38306377,
        73.56449897,  77.21181452,  80.03931528,  29.42836524,
        29.04898766,  85.8702079 ,  60.8305303 ,  58.22283885,
        72.37196335,  90.57617584,  83.80064944,  54.74360868,
        59.54588874,  86.55372558,  83.66438473,  19.01028921,
        79.97691249,  45.35888322, 103.60635059, 108.33908379,
        86.64828151, 104.53524981,  77.07980906,  55.58121045,
        61.92623913,  60.74670577,  28.20322918,  76.13142277])

In [16]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")

Mean Absolute Error (MAE): 4.807881505396568
Mean Squared Error (MSE): 44.23712108112544
R² Score: 0.9352958106832977
