In [77]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OrdinalEncoder, LabelEncoder
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, ElasticNet, Lasso
from sklearn.ensemble import RandomForestRegressor

## Model Training

In [150]:
df = pd.read_csv('.\data\Clean_data.csv')
df.head()


Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,...,Weather_conditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken (min),Displacement
0,0xcdcd,DEHRES17DEL01,36.0,4.2,30.327968,78.046106,30.397968,78.116106,12-02-2022,21:55,...,Fog,Jam,2,Snack,motorcycle,3.0,No,Metropolitian,46,10.280596
1,0xd987,KOCRES16DEL01,21.0,4.7,10.003064,76.307589,10.043064,76.347589,13-02-2022,14:55,...,Stormy,High,1,Meal,motorcycle,1.0,No,Metropolitian,23,6.242327
2,0x2784,PUNERES13DEL03,23.0,4.7,18.56245,73.916619,18.65245,74.006619,04-03-2022,17:30,...,Sandstorms,Medium,1,Drinks,scooter,1.0,No,Metropolitian,21,13.787879
3,0xc8b6,LUDHRES15DEL02,34.0,4.3,30.899584,75.809346,30.919584,75.829346,13-02-2022,09:20,...,Sandstorms,Low,0,Buffet,motorcycle,0.0,No,Metropolitian,20,2.930262
4,0xdb64,KNPRES14DEL02,24.0,4.7,26.463504,80.372929,26.593504,80.502929,14-02-2022,19:50,...,Fog,Jam,1,Snack,scooter,1.0,No,Metropolitian,41,19.396645


In [142]:

def drop_features(df, feature):
    df = df.drop([feature], axis=1, inplace=True)
    return df
feature=['ID','Delivery_person_ID','Restaurant_latitude', 'Restaurant_longitude', 'Delivery_location_latitude', 'Delivery_location_longitude', 'Order_Date', 'Time_Orderd', 'Time_Order_picked']
for i in feature:
    drop_features(df,i)

In [143]:
class encoding(BaseEstimator, TransformerMixin):
    def __init__(self, columns=['Weather_conditions', 'Road_traffic_density', 'Type_of_order', 'Type_of_vehicle', 'Festival', 'City']):
        self.columns = columns
    def fit(self, df):
        return self
    def transform(self, df):
        oe = OrdinalEncoder()
        df[self.columns] = oe.fit_transform(df[self.columns])
        return df
class feature_scaling(BaseEstimator, TransformerMixin):
    def __init__(self, columns=['Delivery_person_Age', 'Delivery_person_Ratings', 'Displacement']):
        self.columns = columns
    def fit(self, df):
        return self
    def transform(self, df):
        sc = StandardScaler()
        df[self.columns] = sc.fit_transform(df[self.columns])
        return df

In [144]:

pipe = Pipeline([
    ('encoding', encoding()),
    ('feature_scaling', feature_scaling())
])
pipe

In [145]:
pipe.fit_transform(df)

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Weather_conditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken (min),Displacement
0,1.129501,-1.323855,1.0,1.0,2,3.0,2.0,3.0,0.0,0.0,46,-0.080959
1,-1.504150,0.202119,3.0,0.0,1,2.0,2.0,1.0,0.0,0.0,23,-0.084631
2,-1.152997,0.202119,2.0,3.0,1,1.0,3.0,1.0,0.0,0.0,21,-0.077770
3,0.778347,-1.018660,2.0,2.0,0,0.0,2.0,0.0,0.0,0.0,20,-0.087642
4,-0.977420,0.202119,1.0,1.0,1,3.0,3.0,1.0,0.0,0.0,41,-0.072670
...,...,...,...,...,...,...,...,...,...,...,...,...
45579,0.076040,0.507314,5.0,0.0,1,2.0,2.0,0.0,0.0,0.0,32,-0.088952
45580,-1.504150,-0.103076,5.0,1.0,0,0.0,2.0,1.0,0.0,0.0,36,-0.080298
45581,0.076040,0.812509,0.0,2.0,1,1.0,3.0,0.0,0.0,0.0,16,-0.086072
45582,-1.679727,0.202119,0.0,0.0,0,3.0,2.0,1.0,0.0,0.0,26,-0.084640


In [146]:
df['Vehicle_condition'] = df['Vehicle_condition'].astype(float)
x = df.drop(['Time_taken (min)'], axis=1)
y = df['Time_taken (min)']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [147]:

models = {
    "LinearRegression": LinearRegression(n_jobs=-1),
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "ElasticNet": ElasticNet(),
    "RandomForest": RandomForestRegressor()
}

In [133]:

models = {
    "LinearRegression": LinearRegression(n_jobs=-1),
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "ElasticNet": ElasticNet(),
    "RandomForest": RandomForestRegressor()
}


In [149]:

def train_model(model, model_name, x_train= x_train, x_test = x_test, y_train = y_train, y_test = y_test):
    print('='*35)
    print('\n')
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    print(model_name,":")
    print("r2_score", r2_score(y_test, y_pred)*100)
    print("Score", ":", mean_squared_error(y_pred, y_test))
    print("Mean score", ":", np.sqrt(mean_squared_error(y_pred, y_test)))
for model_name, model in models.items():
    train_model(model, model_name)



LinearRegression :
r2_score 41.02740697049602
Score : 52.68117717241389
Mean score : 7.258180017911783


Ridge :
r2_score 41.02723871252733
Score : 52.68132747999582
Mean score : 7.258190372261933


Lasso :
r2_score 28.335900629876186
Score : 64.0187063494081
Mean score : 8.001169061418969


ElasticNet :
r2_score 27.842459414388145
Score : 64.45950541829578
Mean score : 8.028667723744443


RandomForest :
r2_score 81.39374104464403
Score : 16.621273954369393
Mean score : 4.076919664939376
