## **Training of MODEL2**

In [1]:
import pandas as pd
import numpy as np
from keras.src.ops import dtype
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error
import joblib
import os

In [2]:
TARGET_VARIABLE = 'Time to Depletion'

In [3]:
data = pd.read_csv("../Final_codes/DATASET2.csv")

In [4]:
data.head()

Unnamed: 0,Current,Voltage,Ah Out,Cumulative Actual Disch Ah,Power,Remaining Capacity,Time to Depletion,type,capacity,charged,prediction
0,3.44,12.35,0.057333,0.057333,42.484,53.942667,56451.627907,tn1,85.0,54.0,[52487.35731419]
1,6.88,12.22,0.114667,0.172,84.0736,53.828,28165.813953,tn1,85.0,54.0,[28194.30398865]
2,6.88,12.16,0.114667,0.286667,83.6608,53.713333,28105.813953,tn1,85.0,54.0,[28024.92132622]
3,6.88,12.17,0.114667,0.401333,83.7296,53.598667,28045.813953,tn1,85.0,54.0,[28033.51799506]
4,6.87,12.18,0.1145,0.515833,83.6766,53.484167,28026.637555,tn1,85.0,54.0,[28064.55257681]


In [5]:
Y = data[TARGET_VARIABLE]
X = data.drop(TARGET_VARIABLE, axis=1)

numerical_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()

print("Numerical Featrures are : ", numerical_features)
print("Categorical Featrures are : ", categorical_features)

Numerical Featrures are :  ['Current', 'Voltage', 'Ah Out', 'Cumulative Actual Disch Ah', 'Power', 'Remaining Capacity', 'capacity', 'charged']
Categorical Featrures are :  ['type', 'prediction']


In [6]:
print("NaN locations:")
for column in data.columns:
    if data[column].isna().any():
        print(f"\n{column}:")
        print(data[data[column].isna()].index)


NaN locations:


In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=42)

In [8]:
numerical_transformer = Pipeline(steps=[
    ('pass',
     'passthrough')
])
categorical_transformer = Pipeline(steps=[
    ('onehot',
     OneHotEncoder(handle_unknown='ignore',
                   sparse_output=False))
])

In [9]:
preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_transformer, numerical_features),
    ('cat', categorical_transformer, categorical_features)
]
    ,remainder='passthrough')

In [10]:
rf_model = RandomForestRegressor(
    random_state=42,
    bootstrap=True,
    criterion='absolute_error',
    n_jobs=-1,
    n_estimators=100
)

In [11]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', rf_model)
])

In [12]:
param_grid = {
    'regressor__n_estimators' : [100,150,200],
    'regressor__max_depth': [10,12,14],
    'regressor__min_samples_split': [2,3,4],
    'regressor__min_samples_leaf': [1,2,3]
}

In [13]:
grid_search = GridSearchCV(
    estimator = pipeline,
    param_grid = param_grid,
    scoring = 'neg_mean_absolute_error',
    cv = 2,
    verbose = 2,
    n_jobs = -1,
    return_train_score = True,
    refit = True
)

In [14]:
print("Initiating the Grid Search...")
grid_search.fit(X_train, Y_train)
print("Search Finished")

Initiating the Grid Search...
Fitting 2 folds for each of 81 candidates, totalling 162 fits


: 

: 

In [None]:
best_match = grid_search.best_estimator_

In [None]:
Y_pred = best_match.predict(X_test)

In [None]:
mae = mean_absolute_error(Y_test, Y_pred)

print(f"Mean Absolute Error is : {mae:.2f}")

Mean Absolute Error is : 122.26


In [None]:
joblib.dump(best_match, "../Final_codes/battery_random_forest_model2.joblib")

['../models/battery_random_forest_model2.joblib']

In [None]:
loaded_model = joblib.load("../Final_codes/battery_random_forest_model2.joblib")


In [None]:
y_loaded_pred = loaded_model.predict(X_test)

mae_loaded = mean_absolute_error(Y_test, y_loaded_pred)
print(f"Mean Absolute Error is : {mae_loaded:.2f}")

Mean Absolute Error is : 122.26


In [None]:
parameters=grid_search.best_params_
print("Best Parameters:", parameters)

Best Parameters: {'regressor__max_depth': 14, 'regressor__min_samples_leaf': 1, 'regressor__min_samples_split': 4, 'regressor__n_estimators': 100}
