In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as pl
import warnings
#from pandas_profiling import ProfileReport
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import plot_tree
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from sklearn.metrics import roc_curve, auc

In [44]:
data = pd.read_csv("Battery_RUL.csv")

In [45]:
data.shape

(15064, 9)

In [40]:
data=data.drop(['Cycle_Index','Discharge Time (s)', 'Decrement 3.6-3.4V (s)', 'Time constant current (s)','Charging time (s)'],axis=1)

In [41]:
data.head()

Unnamed: 0,Max. Voltage Dischar. (V),Min. Voltage Charg. (V),Time at 4.15V (s),RUL
0,3.67,3.211,5460.001,1112
1,4.246,3.22,5508.992,1111
2,4.249,3.224,5508.993,1110
3,4.25,3.225,5502.016,1109
4,4.29,3.398,5480.992,1107


In [42]:
X = data.drop(['RUL'], axis=1)
y = data['RUL']

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=2023, shuffle =True)

In [44]:
class Pipeline:
    def __init__(self, scalar):
        self.scalar = scalar
    
    def fit(self, X, y):
        X = self.scalar.fit_transform(X)
        return X, y
    
    def transform(self, X, y):
        X = self.scalar.transform(X)
        return X, y

In [45]:
robust = RobustScaler()
pipeline = Pipeline(robust)

In [46]:
X_train, y_train = pipeline.fit(X_train, y_train)
X_test, y_test = pipeline.transform(X_test, y_test)

### Random Forest with Time at 4.15V (s)

In [17]:
import pickle
random_forest = RandomForestRegressor()

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, scoring='r2', cv=5)

# Fit the model to the training data
grid_search.fit(X_train, y_train)

# Print the best parameters found by the grid search
print("Best Parameters: ", grid_search.best_params_)

best_model = grid_search.best_estimator_

# Calculate and print the R^2 score on the training set
train_score = best_model.score(X_train, y_train)
print("R^2 Score on Training Set: {:.2%}".format(train_score))

# Calculate and print the R^2 score on the test set
test_score = best_model.score(X_test, y_test)
print("R^2 Score on Test Set: {:.2%}".format(test_score))

with open('random_forest_model_4.15V.pkl', 'wb') as file:
    pickle.dump(best_model, file)

Best Parameters:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 150}
R^2 Score on Training Set: 99.86%
R^2 Score on Test Set: 99.15%


### LSTM with Time at 4.15V (s)

In [37]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [47]:
look_back = 10
model = Sequential()
model.add(LSTM(50, return_sequences=False, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(**kwargs)


In [52]:
from sklearn.metrics import mean_squared_error

model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test), verbose=2)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

model.save('lstm_model_4.15V.h5')

Epoch 1/20
631/631 - 1s - 956us/step - loss: 6227.7031 - val_loss: 6148.0264
Epoch 2/20
631/631 - 1s - 915us/step - loss: 6213.7734 - val_loss: 6156.6313
Epoch 3/20
631/631 - 1s - 898us/step - loss: 6184.5146 - val_loss: 6094.5146
Epoch 4/20
631/631 - 1s - 917us/step - loss: 6088.0938 - val_loss: 5980.6948
Epoch 5/20
631/631 - 1s - 892us/step - loss: 6051.4629 - val_loss: 5958.8872
Epoch 6/20
631/631 - 1s - 912us/step - loss: 6019.3154 - val_loss: 5953.3506
Epoch 7/20
631/631 - 1s - 891us/step - loss: 6003.1606 - val_loss: 5983.4463
Epoch 8/20
631/631 - 1s - 899us/step - loss: 5986.9072 - val_loss: 5891.9551
Epoch 9/20
631/631 - 1s - 893us/step - loss: 5967.9824 - val_loss: 5906.5820
Epoch 10/20
631/631 - 1s - 890us/step - loss: 5971.8228 - val_loss: 5878.2461
Epoch 11/20
631/631 - 1s - 899us/step - loss: 5959.4341 - val_loss: 5880.6909
Epoch 12/20
631/631 - 1s - 898us/step - loss: 5939.2842 - val_loss: 5915.1851
Epoch 13/20
631/631 - 1s - 897us/step - loss: 5910.4961 - val_loss: 5865.



Mean Squared Error: 5735.80973323425


### Random Forest with Decrement 3.6-3.4V (s)

In [18]:
data = pd.read_csv("Battery_RUL.csv")
data=data.drop(['Cycle_Index','Discharge Time (s)', 'Time at 4.15V (s)','Time constant current (s)','Charging time (s)'],axis=1)

In [19]:
data.head()

Unnamed: 0,Decrement 3.6-3.4V (s),Max. Voltage Dischar. (V),Min. Voltage Charg. (V),RUL
0,1151.4885,3.67,3.211,1112
1,1172.5125,4.246,3.22,1111
2,1112.992,4.249,3.224,1110
3,1080.320667,4.25,3.225,1109
4,29813.487,4.29,3.398,1107


In [20]:
X = data.drop(['RUL'], axis=1)
y = data['RUL']

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=2023, shuffle =True)

In [22]:
class Pipeline:
    def __init__(self, scalar):
        self.scalar = scalar
    
    def fit(self, X, y):
        X = self.scalar.fit_transform(X)
        return X, y
    
    def transform(self, X, y):
        X = self.scalar.transform(X)
        return X, y

In [23]:
robust = RobustScaler()
pipeline = Pipeline(robust)

In [24]:
X_train, y_train = pipeline.fit(X_train, y_train)
X_test, y_test = pipeline.transform(X_test, y_test)

In [25]:
import pickle
random_forest = RandomForestRegressor()

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, scoring='r2', cv=5)

# Fit the model to the training data
grid_search.fit(X_train, y_train)

# Print the best parameters found by the grid search
print("Best Parameters: ", grid_search.best_params_)

best_model = grid_search.best_estimator_

# Calculate and print the R^2 score on the training set
train_score = best_model.score(X_train, y_train)
print("R^2 Score on Training Set: {:.2%}".format(train_score))

# Calculate and print the R^2 score on the test set
test_score = best_model.score(X_test, y_test)
print("R^2 Score on Test Set: {:.2%}".format(test_score))

with open('random_forest_model_Decrement_3.6_3.4V.pkl', 'wb') as file:
    pickle.dump(best_model, file)

Best Parameters:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
R^2 Score on Training Set: 99.83%
R^2 Score on Test Set: 98.86%


### Random Forest with Decrement 3.6-3.4V (s) and 4.15V (s)

In [53]:
data = pd.read_csv("Battery_RUL.csv")
data=data.drop(['Cycle_Index','Discharge Time (s)', 'Time constant current (s)','Charging time (s)'],axis=1)

In [54]:
data.head()

Unnamed: 0,Decrement 3.6-3.4V (s),Max. Voltage Dischar. (V),Min. Voltage Charg. (V),Time at 4.15V (s),RUL
0,1151.4885,3.67,3.211,5460.001,1112
1,1172.5125,4.246,3.22,5508.992,1111
2,1112.992,4.249,3.224,5508.993,1110
3,1080.320667,4.25,3.225,5502.016,1109
4,29813.487,4.29,3.398,5480.992,1107


In [55]:
X = data.drop(['RUL'], axis=1)
y = data['RUL']

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=2023, shuffle =True)

In [12]:
class Pipeline:
    def __init__(self, scalar):
        self.scalar = scalar
    
    def fit(self, X, y):
        X = self.scalar.fit_transform(X)
        return X, y
    
    def transform(self, X, y):
        X = self.scalar.transform(X)
        return X, y

In [13]:
robust = RobustScaler()
pipeline = Pipeline(robust)

In [59]:
X_train, y_train = pipeline.fit(X_train, y_train)
X_test, y_test = pipeline.transform(X_test, y_test)

In [60]:
import pickle
import gzip

random_forest = RandomForestRegressor()

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, scoring='r2', cv=5)

# Fit the model to the training data
grid_search.fit(X_train, y_train)

# Print the best parameters found by the grid search
print("Best Parameters: ", grid_search.best_params_)

best_model = grid_search.best_estimator_

# Calculate and print the R^2 score on the training set
train_score = best_model.score(X_train, y_train)
print("R^2 Score on Training Set: {:.2%}".format(train_score))

# Calculate and print the R^2 score on the test set
test_score = best_model.score(X_test, y_test)
print("R^2 Score on Test Set: {:.2%}".format(test_score))

with gzip.open('random_forest_model_Decrement_3.6_3.4V_and_4.15V.pkl.gz', 'wb') as file:
    pickle.dump(best_model, file)

Best Parameters:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
R^2 Score on Training Set: 99.90%
R^2 Score on Test Set: 99.26%


### LSTM

#### Only with Cycle_Index

In [8]:
data=data.drop(['Discharge Time (s)', 'Decrement 3.6-3.4V (s)', 'Max. Voltage Dischar. (V)','Min. Voltage Charg. (V)','Time at 4.15V (s)','Time constant current (s)','Charging time (s)'],axis=1)

In [9]:
data.head()

Unnamed: 0,Cycle_Index,RUL
0,1.0,1112
1,2.0,1111
2,3.0,1110
3,4.0,1109
4,6.0,1107


In [10]:
X = data.drop(['RUL'], axis=1)
y = data['RUL']

In [28]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import math

In [21]:
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

In [22]:
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [24]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

  super().__init__(**kwargs)


In [26]:
y_pred = model.predict(X_test)

[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [27]:
mse = mean_squared_error(y_test, y_pred)
rmse = math.sqrt(mse)

print(f'MSE: {mse}')
print(f'RMSE: {rmse}')

MSE: 403858.2011731093
RMSE: 635.4983880177111


#### With Discharge Time (s)	Max. Voltage Dischar. (V)	Min. Voltage Charg. (V)

In [31]:
data=data.drop(['Cycle_Index','Decrement 3.6-3.4V (s)', 'Time at 4.15V (s)','Time constant current (s)','Charging time (s)'],axis=1)

In [32]:
data.head()

Unnamed: 0,Discharge Time (s),Max. Voltage Dischar. (V),Min. Voltage Charg. (V),RUL
0,2595.3,3.67,3.211,1112
1,7408.64,4.246,3.22,1111
2,7393.76,4.249,3.224,1110
3,7385.5,4.25,3.225,1109
4,65022.75,4.29,3.398,1107


In [33]:
X = data.drop(['RUL'], axis=1)
y = data['RUL']

In [34]:
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

In [35]:
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [37]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

  super().__init__(**kwargs)


In [38]:
y_pred = model.predict(X_test)

[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [39]:
mse = mean_squared_error(y_test, y_pred)
rmse = math.sqrt(mse)

print(f'MSE: {mse}')
print(f'RMSE: {rmse}')

MSE: 406407.9880631543
RMSE: 637.5013631853302


#### With Decrement 3.6-3.4V (s) and 4.15V (s)

In [46]:
data=data.drop(['Cycle_Index','Discharge Time (s)', 'Time constant current (s)','Charging time (s)'],axis=1)

In [47]:
data.head()

Unnamed: 0,Decrement 3.6-3.4V (s),Max. Voltage Dischar. (V),Min. Voltage Charg. (V),Time at 4.15V (s),RUL
0,1151.4885,3.67,3.211,5460.001,1112
1,1172.5125,4.246,3.22,5508.992,1111
2,1112.992,4.249,3.224,5508.993,1110
3,1080.320667,4.25,3.225,5502.016,1109
4,29813.487,4.29,3.398,5480.992,1107


In [48]:
X = data.drop(['RUL'], axis=1)
y = data['RUL']

In [49]:
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

In [50]:
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

In [52]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=42)

In [53]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [54]:
y_pred = model.predict(X_test)

[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 923us/step


In [55]:
mse = mean_squared_error(y_test, y_pred)
rmse = math.sqrt(mse)

print(f'MSE: {mse}')
print(f'RMSE: {rmse}')

MSE: 406883.0805829726
RMSE: 637.8738751375327
