In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.model_selection import train_test_split

# adding new libraries
import tensorflow as tf 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras import layers, Input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer, MinMaxScaler  
from sklearn.metrics import mean_squared_error
from tensorflow.keras.optimizers import Adam
from scipy import stats

In [None]:
# Import Data
df = pd.read_csv('batemo_model_data.csv')
df.describe()

# Remove Voltage readings < 2.5 V
df = df[df['V'] >= 2.5]

# Thin the dataset because fuck
df = df.iloc[::20]

In [None]:
# Check to see if our Input data is normally distributed

plt.figure(figsize = (14,5))
sns.distplot(df['V'])
plt.show()

plt.figure(figsize = (14,5))
sns.distplot(df['I'])
plt.show()

In [None]:
# Define training variables
# TODO: need to consider if keeping current (I) is a valuable input feature or not (test if the model performs better w/o it)
X = df[['V', 'I']].values

# TODO: add calculating SOP
# LSTM output with 2 nodes (SOH and SOC) 
# Y = df[['SOC', 'SOH']].values
Y = df[['SOC']].values

# Normalize input data because it is not normally distributed
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)

# # Check to see if our Input data is normally distributed
# plt.figure(figsize = (14,5))
# sns.distplot(V_scaled)
# plt.show()
# 
# plt.figure(figsize = (14,5))
# sns.distplot(X_scaled['I'])
# plt.show()

# Split the data into training and testing
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)  # use normalized features to prevent over-fitting (X_scaled)

# Define function to create sequences
# NOTE: The creation of input-output pairs allows the model to learn from historical context. The input sequences serve as a history of past observations, while the corresponding output (target) provides the next observation in the sequence.This historical context is crucial for making accurate predictions, especially in time-series forecasting or sequence prediction tasks where the future state depends on past states
# def create_sequences(data, seq_length):
#     X = []
#     y = []
#     for i in range(len(data) - seq_length):
#         X.append(data[i:i + seq_length])  # Features (voltage, current, temperature, state of charge) are turned into a list of historical values 
#         y.append(data[i + seq_length])     # Target variables (SOH and SOC) are turned into a list of historical values 
#     return np.array(X), np.array(y)

# Choose sequence length
# seq_length = 1 # sequence length set to 1 to take immediate values from sensor reading during testing 
# NOTE:  Even with seq_length of 1, organizing the data into sequences might provide the model with some historical context. Although the immediate historical context is limited, the model can still potentially learn from patterns and trends in the data over time
# TODO: may need to increase the sequence length if the model performs poorly on testing (sensor) data 

# Create sequences for training and testing data
# X_train_seq, y_train_seq = create_sequences(x_train, seq_length)
# X_test_seq, y_test_seq = create_sequences(x_test, seq_length)

# print(X_train_seq.shape)
# print(y_train_seq.shape)
# print(X_test_seq.shape)
# print(y_test_seq.shape)

# reshape input to be [samples, time steps, features]
x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))

In [None]:
# Define model for predicting SOH and SOC 
model = Sequential()

model.add(LSTM((1,2)))
model.add(LSTM(100, return_sequences = True))     
model.add(LSTM(100, return_sequences = True))
model.add(LSTM(50))
model.add(Dense(8, activation = 'relu'))
model.add(Dense(1, activation = 'relu'))

# Compile model with Adam optimizer and default learning rate
early_stop = EarlyStopping(monitor = 'val_loss', patience = 2)

model.compile(loss = MeanSquaredError(), 
               optimizer = Adam(learning_rate = 0.001), 
               metrics = RootMeanSquaredError())

# NOTE: .summary() is a method used in Keras, a high-level deep learning library, to display a summary of the neural network model's architecture. it will print out the layer name, layer type, output shape, number of parameters and trainable/non-trainable params
model.summary()

# Train the model and store the history
history = model.fit(x_train, y_train, epochs=5, batch_size=32, validation_data=(x_test, y_test))

# Extract loss values from the history
train_loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(train_loss) + 1)

# Plotting the loss
plt.figure(figsize = (14,5))
plt.plot(epochs, train_loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# make predictions
trainPredict = model.predict(x_train)
testPredict = model.predict(x_test)
# invert predictions
# trainPredict = scaler.inverse_transform(trainPredict)
# trainY = scaler.inverse_transform([y_train])
# testPredict = scaler.inverse_transform(testPredict)
# testY = scaler.inverse_transform([y_test])
# calculate root mean squared error
trainScore = np.sqrt(mean_squared_error(y_train, trainPredict))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = np.sqrt(mean_squared_error(y_test, testPredict))
print('Test Score: %.2f RMSE' % (testScore))

In [None]:
# shift train predictions for plotting
trainPredictPlot = np.empty_like(X)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[1:len(trainPredict)+1, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(X)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(1*2)+1:len(df)-1, :] = testPredict
# plot baseline and predictions
plt.figure(figsize = (14,5))
plt.plot(df)
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()