In [1]:
import os
%pwd

'e:\\Food_Delivery\\notebooks'

In [2]:
os.chdir('../')

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path

In [4]:
from src.constants import *
from src.utils import read_yaml, create_directories

In [5]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
        
    def get_data_transformation_config(self) -> ModelTrainerConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path=config.data_path
        )

        return data_transformation_config

In [27]:
import os
from src.logging import logger
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

scaler=StandardScaler()


In [7]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        
    def train_data(self, data):
        pass
        

In [8]:
data = pd.read_csv(r'E:\Food_Delivery\artifacts\data_ingestion\Final_train.csv')

In [9]:
data.head()

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Weatherconditions,Road_traffic_density,Vehicle_condition,multiple_deliveries,Time_taken(min),Distance(kms),Type_of_order_Drinks,Type_of_order_Meal,Type_of_order_Snack,Type_of_vehicle_electric_scooter,Type_of_vehicle_motorcycle,Type_of_vehicle_scooter,Festival_No,Festival_Yes,City_Metropolitian,City_Semi-Urban,City_Urban
0,37.0,4.9,0,2,2.0,0.0,24.0,3.020737,0,0,1,0,1,0,1,0,0,0,1
1,34.0,4.5,4,3,2.0,1.0,33.0,20.143737,0,0,1,0,0,1,1,0,1,0,0
2,23.0,4.4,5,0,0.0,1.0,26.0,1.549693,1,0,0,0,1,0,1,0,0,0,1
3,38.0,4.7,0,1,0.0,1.0,21.0,7.774497,0,0,0,0,1,0,1,0,1,0,0
4,32.0,4.6,1,2,1.0,1.0,30.0,6.197898,0,0,1,0,0,1,1,0,1,0,0


In [10]:
X = data.drop('Time_taken(min)', axis = 1)
y = data['Time_taken(min)']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [12]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [29]:
rf = RandomForestRegressor(n_estimators=200)

In [30]:
rf.fit(X_train, y_train)

In [31]:
predict = rf.predict(X_test)

In [32]:
def evaluate_reg(true, predicted):
    r2 = r2_score(true, predicted) # Calculate r2 score
    MAE = mean_absolute_error(true, predicted) # Calculate MAE
    MSE = mean_squared_error(true, predicted) # Calculate MSE
    rmse = np.sqrt(mean_squared_error(true, predicted))
   
    return r2, MAE , MSE,rmse

In [33]:
evaluate_reg(y_test, predict)

(0.8104738411108923, 3.2176786121589607, 16.615887547077715, 4.076259013737684)

In [13]:
from sklearn.linear_model import LinearRegression

from sklearn.linear_model import Lasso,Ridge,ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor 

from xgboost import XGBRegressor



from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [14]:
def evaluate_reg(true, predicted):
    r2 = r2_score(true, predicted) # Calculate r2 score
    MAE = mean_absolute_error(true, predicted) # Calculate MAE
    MSE = mean_squared_error(true, predicted) # Calculate MSE
    rmse = np.sqrt(mean_squared_error(true, predicted))
   
    return r2, MAE , MSE,rmse

In [15]:
models = {
    "Random Forest": RandomForestRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "linear Regression": LinearRegression(),
    "Ridge":Ridge(),
    "XGBRegressor": XGBRegressor(), 
  
}

r2_list=[]
mse_list=[]
models_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train) # Train model

    

    
    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)



    # Test set performance
    r2,MAE,MSE,rmse=evaluate_reg(y_test, y_test_pred)
    r2_train,MAE_train,MSE_train,rmse_train=evaluate_reg(y_train, y_train_pred)
    

    print(list(models.keys())[i])
    models_list.append(list(models.keys())[i])
    

    
    print('Model performance for Test set')
    print('- r2_score: {:.4f}'.format(r2))
    print('- MAE: {:.4f}'.format(MAE))
    print('- MSE: {:.4f}'.format(MSE))
    print('- rmse: {:.4f}'.format(rmse))
    r2_list.append(r2)
    mse_list.append(MSE)
    
    print('-'*35)
        
    print('Model performance for train set')
    print('- r2_score: {:.4f}'.format(r2_train))
    print('- MAE: {:.4f}'.format(MAE_train))
    print('- MSE: {:.4f}'.format(MSE_train))
    print('- rmse: {:.4f}'.format(rmse_train))

    print('='*35)
    print('='*35)
    print('\n')

Random Forest
Model performance for Test set
- r2_score: 0.8089
- MAE: 3.2313
- MSE: 16.7513
- rmse: 4.0928
-----------------------------------
Model performance for train set
- r2_score: 0.9737
- MAE: 1.1830
- MSE: 2.3178
- rmse: 1.5224


Decision Tree
Model performance for Test set
- r2_score: 0.6597
- MAE: 4.1265
- MSE: 29.8365
- rmse: 5.4623
-----------------------------------
Model performance for train set
- r2_score: 1.0000
- MAE: 0.0005
- MSE: 0.0031
- rmse: 0.0557


Gradient Boosting
Model performance for Test set
- r2_score: 0.7645
- MAE: 3.6140
- MSE: 20.6499
- rmse: 4.5442
-----------------------------------
Model performance for train set
- r2_score: 0.7660
- MAE: 3.6155
- MSE: 20.6347
- rmse: 4.5425


linear Regression
Model performance for Test set
- r2_score: 0.5348
- MAE: 5.0598
- MSE: 40.7814
- rmse: 6.3860
-----------------------------------
Model performance for train set
- r2_score: 0.5340
- MAE: 5.0869
- MSE: 41.0922
- rmse: 6.4103


Ridge
Model performance for Te

In [16]:
Results = pd.DataFrame(list(zip(models_list, r2_list)), columns=['Model Name', 'r2_score'])
Results.sort_values(by=["r2_score"],ascending=False)

Unnamed: 0,Model Name,r2_score
5,XGBRegressor,0.817541
0,Random Forest,0.808929
2,Gradient Boosting,0.764461
1,Decision Tree,0.659676
4,Ridge,0.534841
3,linear Regression,0.534834


In [17]:
Results = pd.DataFrame(list(zip(models_list, mse_list)), columns=['Model Name', 'Mse'])
Results.sort_values(by=["Mse"],ascending=True)

Unnamed: 0,Model Name,Mse
5,XGBRegressor,15.996267
0,Random Forest,16.751303
2,Gradient Boosting,20.649866
1,Decision Tree,29.836477
4,Ridge,40.780767
3,linear Regression,40.781431


In [18]:
from sklearn.model_selection import GridSearchCV
rf = RandomForestRegressor()

In [19]:
param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]
}

In [22]:
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, cv = 2, n_jobs = -1, verbose=3)

In [23]:
grid_search.fit(X_train, y_train)

Fitting 2 folds for each of 288 candidates, totalling 576 fits


KeyboardInterrupt: 