In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

### Reading the files from the data folder : X_train , X_test , y_train and y_test

In [9]:
X_train = pd.read_csv(r"../data/X_train.csv")
X_test = pd.read_csv(r"../data/X_test.csv")
y_train = pd.read_csv(r"../data/y_train.csv")
y_test = pd.read_csv(r"../data/y_test.csv")

In [10]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((59954, 92), (59954, 1), (14989, 92), (14989, 1))

In [11]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

### Creating a function to calculate the mean squared error of the model , mean absolute error , root mean square error and r square value 

In [12]:
##Create a Function to Evaluate Model
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square

### Building an Adaboost Regressor Model for predicting the charging time of a BEV.

In [13]:
## Beginning Model Training
models = {
    "Linear Regression": LinearRegression(),
    "Lasso": Lasso(),
    "Ridge": Ridge(),
    "K-Neighbors Regressor": KNeighborsRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "Adaboost Regressor":AdaBoostRegressor(),
    "Graident BoostRegressor":GradientBoostingRegressor(),
    "Xgboost Regressor":XGBRegressor()
   
}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train) # Train model

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    # Evaluate Train and Test dataset
    model_train_mae , model_train_rmse, model_train_r2 = evaluate_model(y_train, y_train_pred)

    model_test_mae , model_test_rmse, model_test_r2 = evaluate_model(y_test, y_test_pred)

    
    print(list(models.keys())[i])
    
    print('Model performance for Training set')
    print("- Root Mean Squared Error: {:.4f}".format(model_train_rmse))
    print("- Mean Absolute Error: {:.4f}".format(model_train_mae))
    print("- R2 Score: {:.4f}".format(model_train_r2))

    print('----------------------------------')
    
    print('Model performance for Test set')
    print("- Root Mean Squared Error: {:.4f}".format(model_test_rmse))
    print("- Mean Absolute Error: {:.4f}".format(model_test_mae))
    print("- R2 Score: {:.4f}".format(model_test_r2))
    
    print('='*35)
    print('\n')

Linear Regression
Model performance for Training set
- Root Mean Squared Error: 28.9276
- Mean Absolute Error: 14.0904
- R2 Score: 0.8905
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 27.8526
- Mean Absolute Error: 14.1743
- R2 Score: 0.8984


Lasso
Model performance for Training set
- Root Mean Squared Error: 44.7685
- Mean Absolute Error: 30.7339
- R2 Score: 0.7378
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 45.0154
- Mean Absolute Error: 31.0201
- R2 Score: 0.7346


Ridge
Model performance for Training set
- Root Mean Squared Error: 43.6847
- Mean Absolute Error: 30.5221
- R2 Score: 0.7504
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 44.1744
- Mean Absolute Error: 30.8423
- R2 Score: 0.7444


K-Neighbors Regressor
Model performance for Training set
- Root Mean Squared Error: 49.1697
- Mean Absolute Error: 28.5445
- R2 Score: 0.6838
---------

  model.fit(X_train, y_train) # Train model


Random Forest Regressor
Model performance for Training set
- Root Mean Squared Error: 2.1381
- Mean Absolute Error: 1.1155
- R2 Score: 0.9994
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 5.6811
- Mean Absolute Error: 2.9770
- R2 Score: 0.9958




  y = column_or_1d(y, warn=True)


Adaboost Regressor
Model performance for Training set
- Root Mean Squared Error: 25.5731
- Mean Absolute Error: 21.9112
- R2 Score: 0.9145
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 25.7171
- Mean Absolute Error: 22.0133
- R2 Score: 0.9134




  y = column_or_1d(y, warn=True)


Graident BoostRegressor
Model performance for Training set
- Root Mean Squared Error: 5.5939
- Mean Absolute Error: 3.6396
- R2 Score: 0.9959
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 5.8633
- Mean Absolute Error: 3.7172
- R2 Score: 0.9955


Xgboost Regressor
Model performance for Training set
- Root Mean Squared Error: 2.7680
- Mean Absolute Error: 1.9658
- R2 Score: 0.9990
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 4.4011
- Mean Absolute Error: 2.5253
- R2 Score: 0.9975




In [14]:

for i in range(len(list(models))):
    model_name = list(models.keys())[i]
    model = list(models.values())[i]
    
    model.fit(X_train, y_train)  # Train model

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    model_train_mae, model_train_rmse, model_train_r2 = evaluate_model(y_train, y_train_pred)
    model_test_mae, model_test_rmse, model_test_r2 = evaluate_model(y_test, y_test_pred)
    
    print(f"Model: {model_name}")
    
    # Print all parameters of the model
    print("Model parameters:")
    for param, value in model.get_params().items():
        print(f"  {param}: {value}")
    
    print('\nModel performance for Training set')
    print(f"- Root Mean Squared Error: {model_train_rmse:.4f}")
    print(f"- Mean Absolute Error: {model_train_mae:.4f}")
    print(f"- R2 Score: {model_train_r2:.4f}")

    print('----------------------------------')
    
    print('Model performance for Test set')
    print(f"- Root Mean Squared Error: {model_test_rmse:.4f}")
    print(f"- Mean Absolute Error: {model_test_mae:.4f}")
    print(f"- R2 Score: {model_test_r2:.4f}")
    
    print('='*35)
    print('\n')

  


Model: Linear Regression
Model parameters:
  copy_X: True
  fit_intercept: True
  n_jobs: None
  positive: False

Model performance for Training set
- Root Mean Squared Error: 28.9276
- Mean Absolute Error: 14.0904
- R2 Score: 0.8905
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 27.8526
- Mean Absolute Error: 14.1743
- R2 Score: 0.8984


Model: Lasso
Model parameters:
  alpha: 1.0
  copy_X: True
  fit_intercept: True
  max_iter: 1000
  positive: False
  precompute: False
  random_state: None
  selection: cyclic
  tol: 0.0001
  warm_start: False

Model performance for Training set
- Root Mean Squared Error: 44.7685
- Mean Absolute Error: 30.7339
- R2 Score: 0.7378
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 45.0154
- Mean Absolute Error: 31.0201
- R2 Score: 0.7346


Model: Ridge
Model parameters:
  alpha: 1.0
  copy_X: True
  fit_intercept: True
  max_iter: None
  positive: False
  random_sta

  model.fit(X_train, y_train)  # Train model


Model: Random Forest Regressor
Model parameters:
  bootstrap: True
  ccp_alpha: 0.0
  criterion: squared_error
  max_depth: None
  max_features: 1.0
  max_leaf_nodes: None
  max_samples: None
  min_impurity_decrease: 0.0
  min_samples_leaf: 1
  min_samples_split: 2
  min_weight_fraction_leaf: 0.0
  n_estimators: 100
  n_jobs: None
  oob_score: False
  random_state: None
  verbose: 0
  warm_start: False

Model performance for Training set
- Root Mean Squared Error: 2.1435
- Mean Absolute Error: 1.1129
- R2 Score: 0.9994
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 5.6428
- Mean Absolute Error: 2.9742
- R2 Score: 0.9958




  y = column_or_1d(y, warn=True)


Model: Adaboost Regressor
Model parameters:
  base_estimator: deprecated
  estimator: None
  learning_rate: 1.0
  loss: linear
  n_estimators: 50
  random_state: None

Model performance for Training set
- Root Mean Squared Error: 26.2008
- Mean Absolute Error: 21.6214
- R2 Score: 0.9102
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 26.3746
- Mean Absolute Error: 21.7530
- R2 Score: 0.9089




  y = column_or_1d(y, warn=True)


Model: Graident BoostRegressor
Model parameters:
  alpha: 0.9
  ccp_alpha: 0.0
  criterion: friedman_mse
  init: None
  learning_rate: 0.1
  loss: squared_error
  max_depth: 3
  max_features: None
  max_leaf_nodes: None
  min_impurity_decrease: 0.0
  min_samples_leaf: 1
  min_samples_split: 2
  min_weight_fraction_leaf: 0.0
  n_estimators: 100
  n_iter_no_change: None
  random_state: None
  subsample: 1.0
  tol: 0.0001
  validation_fraction: 0.1
  verbose: 0
  warm_start: False

Model performance for Training set
- Root Mean Squared Error: 5.5939
- Mean Absolute Error: 3.6396
- R2 Score: 0.9959
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 5.8633
- Mean Absolute Error: 3.7172
- R2 Score: 0.9955


Model: Xgboost Regressor
Model parameters:
  objective: reg:squarederror
  base_score: None
  booster: None
  callbacks: None
  colsample_bylevel: None
  colsample_bynode: None
  colsample_bytree: None
  device: None
  early_stopping_rounds: None


In [15]:
from xgboost import XGBRegressor
import pickle

# Create and train AdaBoost model
Xgboost_model = XGBRegressor()
Xgboost_model.fit(X_train, y_train.values.ravel())  # Flatten y if warning arises

# Save to a .pkl file
with open(r"../Saving The Model Results/Xgboost_model.pkl", "wb") as file:
    pickle.dump(Xgboost_model, file)
