In [248]:
import pandas as pd

In [249]:
df = pd.read_csv('data/clean_data.csv')

In [250]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44564 entries, 0 to 44563
Data columns (total 17 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Delivery_person_Age             44564 non-null  float64
 1   Delivery_person_Ratings         44564 non-null  float64
 2   Weather_conditions              44564 non-null  object 
 3   Road_traffic_density            44564 non-null  object 
 4   Vehicle_condition               44564 non-null  int64  
 5   Type_of_order                   44564 non-null  object 
 6   Type_of_vehicle                 44564 non-null  object 
 7   multiple_deliveries             44564 non-null  float64
 8   Festival                        44564 non-null  object 
 9   City                            44564 non-null  object 
 10  Time_taken (min)                44564 non-null  int64  
 11  Week_days                       44564 non-null  object 
 12  Time_Orderd_Hours               

In [251]:
X = df.drop('Time_taken (min)', axis=1)

In [252]:
X

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Weather_conditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Week_days,Time_Orderd_Hours,Time_Orderd_Minutes,Time_Order_picked_Hours,Time_Order_picked_Minutes,Distance_Resturant_to_Location
0,36.0,4.2,Fog,Jam,2,Snack,motorcycle,3.0,No,Metropolitian,Saturday,21,55,22,10,10.280582
1,21.0,4.7,Stormy,High,1,Meal,motorcycle,1.0,No,Metropolitian,Sunday,14,55,15,5,6.242319
2,23.0,4.7,Sandstorms,Medium,1,Drinks,scooter,1.0,No,Metropolitian,Friday,17,30,17,40,13.787860
3,34.0,4.3,Sandstorms,Low,0,Buffet,motorcycle,0.0,No,Metropolitian,Sunday,9,20,9,30,2.930258
4,24.0,4.7,Fog,Jam,1,Snack,scooter,1.0,No,Metropolitian,Monday,19,50,20,5,19.396618
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44559,30.0,4.8,Windy,High,1,Meal,motorcycle,0.0,No,Metropolitian,Thursday,11,35,11,45,1.489846
44560,21.0,4.6,Windy,Jam,0,Buffet,motorcycle,1.0,No,Metropolitian,Wednesday,19,55,20,10,11.007735
44561,30.0,4.9,Cloudy,Low,1,Drinks,scooter,0.0,No,Metropolitian,Friday,23,50,0,0,4.657195
44562,20.0,4.7,Cloudy,High,0,Snack,motorcycle,1.0,No,Metropolitian,Monday,13,35,13,40,6.232393


In [253]:
y = df['Time_taken (min)']

In [254]:
y

0        46
1        23
2        21
3        20
4        41
         ..
44559    32
44560    36
44561    16
44562    26
44563    36
Name: Time_taken (min), Length: 44564, dtype: int64

In [255]:
cat_cols = X.select_dtypes(include='object').columns

In [256]:
num_cols = X.select_dtypes(exclude='object').columns

In [257]:
cat_cols

Index(['Weather_conditions', 'Road_traffic_density', 'Type_of_order',
       'Type_of_vehicle', 'Festival', 'City', 'Week_days'],
      dtype='object')

In [258]:
Weather_conditions_categories =['Fog', 'Stormy', 'Sandstorms', 'Windy', 'Cloudy', 'Sunny']
Road_traffic_density_categories=['Jam', 'High', 'Medium', 'Low']
Type_of_order_categories=['Snack', 'Meal', 'Drinks', 'Buffet']
Type_of_vehicle_categories=['motorcycle', 'scooter', 'electric_scooter']
Festival_categories = ['No', 'Yes']
City_categories = ['Metropolitian', 'Urban', 'Semi-Urban']
Week_days_categories = ['Saturday', 'Sunday', 'Friday', 'Monday', 'Tuesday', 'Wednesday','Thursday']

In [259]:
df['Week_days'].unique()

array(['Saturday', 'Sunday', 'Friday', 'Monday', 'Tuesday', 'Wednesday',
       'Thursday'], dtype=object)

In [260]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [261]:
num_pipeline= Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy='median')),
        ("scaler", StandardScaler())
    ]
)

In [262]:
cat_pipeline = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('ordinalencoder', OrdinalEncoder(categories=[Weather_conditions_categories,Road_traffic_density_categories,Type_of_order_categories,Type_of_vehicle_categories,Festival_categories,City_categories,Week_days_categories])),
        ('scaler', StandardScaler())
    ]
)

In [263]:
preprocessor=ColumnTransformer([
    ('num_pipeline', num_pipeline,num_cols ),
    ('cat_pipeline', cat_pipeline, cat_cols)
])

In [264]:
from sklearn.model_selection import train_test_split

In [265]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [266]:
X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

In [267]:
X_train

Unnamed: 0,num_pipeline__Delivery_person_Age,num_pipeline__Delivery_person_Ratings,num_pipeline__Vehicle_condition,num_pipeline__multiple_deliveries,num_pipeline__Time_Orderd_Hours,num_pipeline__Time_Orderd_Minutes,num_pipeline__Time_Order_picked_Hours,num_pipeline__Time_Order_picked_Minutes,num_pipeline__Distance_Resturant_to_Location,cat_pipeline__Weather_conditions,cat_pipeline__Road_traffic_density,cat_pipeline__Type_of_order,cat_pipeline__Type_of_vehicle,cat_pipeline__Festival,cat_pipeline__City,cat_pipeline__Week_days
0,-0.802865,-1.729512,-0.002163,-1.323731,0.127274,0.549163,0.154622,1.140123,1.231220,-1.447440,0.302936,0.454767,0.790130,-0.138607,-0.536459,0.487949
1,1.653156,-0.120772,-1.229126,-1.323731,0.127274,-0.077447,0.154622,0.848545,-0.331386,0.307332,0.302936,-0.438727,-0.772101,-0.138607,1.795199,-1.521369
2,-0.276575,0.200976,-0.002163,-1.323731,1.163123,-0.704058,1.108332,-0.317770,-0.387460,-0.277592,1.106283,-0.438727,-0.772101,-0.138607,1.795199,0.487949
3,-1.153725,0.200976,1.224799,-1.323731,-1.944422,0.235858,-1.752797,1.140123,-1.459405,-0.277592,1.106283,-1.332221,2.352361,-0.138607,1.795199,0.487949
4,0.074285,0.200976,1.224799,-1.323731,-1.944422,-0.704058,-1.752797,0.265388,-1.179239,1.477179,1.106283,-0.438727,0.790130,-0.138607,-0.536459,-1.019039
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31189,0.951436,-1.086016,-1.229126,0.441734,0.334444,-1.330668,0.345364,-0.609348,0.735838,0.307332,-1.303759,0.454767,-0.772101,-0.138607,-0.536459,-1.019039
31190,-0.978295,0.844472,-1.229126,0.441734,-1.322913,-0.077447,-1.180571,0.556966,-1.459822,0.892255,-0.500412,0.454767,-0.772101,-0.138607,-0.536459,-0.516710
31191,-1.329155,0.844472,-0.002163,-1.323731,-0.908574,0.235858,-0.799087,0.556966,-0.657833,1.477179,-0.500412,-0.438727,0.790130,-0.138607,1.795199,1.492608
31192,-0.276575,1.166220,1.224799,-1.323731,1.163123,0.862468,1.108332,1.431702,-0.921806,1.477179,1.106283,1.348261,-0.772101,-0.138607,1.795199,-0.516710


In [268]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [277]:
regression = LinearRegression()
regression.fit(X_train, y_train)

In [278]:
regression.score(X_test,y_test)

0.5436580697318331

In [271]:
X_train

Unnamed: 0,num_pipeline__Delivery_person_Age,num_pipeline__Delivery_person_Ratings,num_pipeline__Vehicle_condition,num_pipeline__multiple_deliveries,num_pipeline__Time_Orderd_Hours,num_pipeline__Time_Orderd_Minutes,num_pipeline__Time_Order_picked_Hours,num_pipeline__Time_Order_picked_Minutes,num_pipeline__Distance_Resturant_to_Location,cat_pipeline__Weather_conditions,cat_pipeline__Road_traffic_density,cat_pipeline__Type_of_order,cat_pipeline__Type_of_vehicle,cat_pipeline__Festival,cat_pipeline__City,cat_pipeline__Week_days
0,-0.802865,-1.729512,-0.002163,-1.323731,0.127274,0.549163,0.154622,1.140123,1.231220,-1.447440,0.302936,0.454767,0.790130,-0.138607,-0.536459,0.487949
1,1.653156,-0.120772,-1.229126,-1.323731,0.127274,-0.077447,0.154622,0.848545,-0.331386,0.307332,0.302936,-0.438727,-0.772101,-0.138607,1.795199,-1.521369
2,-0.276575,0.200976,-0.002163,-1.323731,1.163123,-0.704058,1.108332,-0.317770,-0.387460,-0.277592,1.106283,-0.438727,-0.772101,-0.138607,1.795199,0.487949
3,-1.153725,0.200976,1.224799,-1.323731,-1.944422,0.235858,-1.752797,1.140123,-1.459405,-0.277592,1.106283,-1.332221,2.352361,-0.138607,1.795199,0.487949
4,0.074285,0.200976,1.224799,-1.323731,-1.944422,-0.704058,-1.752797,0.265388,-1.179239,1.477179,1.106283,-0.438727,0.790130,-0.138607,-0.536459,-1.019039
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31189,0.951436,-1.086016,-1.229126,0.441734,0.334444,-1.330668,0.345364,-0.609348,0.735838,0.307332,-1.303759,0.454767,-0.772101,-0.138607,-0.536459,-1.019039
31190,-0.978295,0.844472,-1.229126,0.441734,-1.322913,-0.077447,-1.180571,0.556966,-1.459822,0.892255,-0.500412,0.454767,-0.772101,-0.138607,-0.536459,-0.516710
31191,-1.329155,0.844472,-0.002163,-1.323731,-0.908574,0.235858,-0.799087,0.556966,-0.657833,1.477179,-0.500412,-0.438727,0.790130,-0.138607,1.795199,1.492608
31192,-0.276575,1.166220,1.224799,-1.323731,1.163123,0.862468,1.108332,1.431702,-0.921806,1.477179,1.106283,1.348261,-0.772101,-0.138607,1.795199,-0.516710


In [272]:
import numpy as np

In [273]:
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_squre = r2_score(true, predicted)

    return mae, rmse, r2_squre


In [274]:
models = {
    "LinearRegression": LinearRegression(),
    "Lasso": Lasso(),
    "Ridge": Ridge(),
    "ElasticNet": ElasticNet(),
}

train_model_list =[]
model_list = []
r2_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)

    y_predict = model.predict(X_test)

    mae, rmse, r2_squre = evaluate_model(y_test, y_predict)

    print(list(models.keys())[i])

    model_list.append(list(models.keys())[i])

    print("model Training performance")
    print("RMSE: ", rmse)
    print("MAE: ", mae)
    print("R2_Square: ", r2_squre)

    r2_list.append(r2_squre)

    print("="*35)
    print("\n")

LinearRegression
model Training performance
RMSE:  6.358630945013565
MAE:  5.0422017196325
R2_Square:  0.5428388087952705


Lasso
model Training performance
RMSE:  6.836808868665619
MAE:  5.459370100179052
R2_Square:  0.47149512844514885


Ridge
model Training performance
RMSE:  6.358626906796199
MAE:  5.042197958440312
R2_Square:  0.5428393894597848


ElasticNet
model Training performance
RMSE:  6.88177800608301
MAE:  5.506707629404143
R2_Square:  0.4645197776711877


