# RUL estimation UNIBO Powertools Dataset

In [1]:
import numpy as np
import pandas as pd
import scipy.io
import math
import os
import ntpath
import sys
import logging
import time
import sys
import random

from importlib import reload
import plotly.graph_objects as go

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.layers import LSTM, Embedding, RepeatVector, TimeDistributed, Masking, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LambdaCallback


IS_COLAB = False
IS_TRAINING = False
RESULT_NAME = "lstm_rul_unibo_powertools"

if IS_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    data_path = "/content/drive/My Drive/battery-rul-estimation/experiments/"
else:
    data_path = "../../"

sys.path.append(data_path)
from data_processing.unibo_powertools_data import UniboPowertoolsData, CycleCols
from data_processing.model_data_handler import ModelDataHandler
from data_processing.prepare_rul_data import RulHandler

### Config logging

In [2]:
reload(logging)
logging.basicConfig(format='%(asctime)s [%(levelname)s]: %(message)s', level=logging.DEBUG, datefmt='%Y/%m/%d %H:%M:%S')

# Load Data

In [3]:
dataset = UniboPowertoolsData(
    test_types=[],
    chunk_size=1000000,
    lines=[37, 40],
    charge_line=37,
    discharge_line=40,
    base_path=data_path
)

2021/03/16 17:32:11 [DEBUG]: Start loading data with lines: [37, 40], types: [] and chunksize: 1000000...
2021/03/16 17:32:50 [DEBUG]: Finish loading data.
2021/03/16 17:32:50 [INFO]: Loaded cem raw data with cycle row count: 15384908 and capacity row count: 39585
2021/03/16 17:32:50 [DEBUG]: Start cleaning cycle raw data...
2021/03/16 17:33:05 [DEBUG]: Finish cleaning cycle raw data.
2021/03/16 17:33:05 [INFO]: Removed 11 rows of abnormal cycle raw data.
2021/03/16 17:33:05 [DEBUG]: Start cleaning capacity raw data...
2021/03/16 17:33:05 [DEBUG]: Finish cleaning capacity raw data.
2021/03/16 17:33:05 [INFO]: Removed 1 rows of abnormal capacity raw data.
2021/03/16 17:33:05 [DEBUG]: Start assigning charging raw data...
2021/03/16 17:33:06 [DEBUG]: Finish assigning charging raw data.
2021/03/16 17:33:06 [INFO]: [Charging] cycle raw count: 12160812, capacity raw count: 19800
2021/03/16 17:33:06 [DEBUG]: Start assigning discharging raw data...
2021/03/16 17:33:07 [DEBUG]: Finish assigning

In [4]:
train_names = [
    '000-DM-3.0-4019-S',#minimum capacity 1.48
    '001-DM-3.0-4019-S',#minimum capacity 1.81
    '002-DM-3.0-4019-S',#minimum capacity 2.06

    '009-DM-3.0-4019-H',#minimum capacity 1.41
    '010-DM-3.0-4019-H',#minimum capacity 1.44

    '014-DM-3.0-4019-P',#minimum capacity 1.7
    '015-DM-3.0-4019-P',#minimum capacity 1.76
    '016-DM-3.0-4019-P',#minimum capacity 1.56
    '017-DM-3.0-4019-P',#minimum capacity 1.29
    #'047-DM-3.0-4019-P',#new 1.98
    #'049-DM-3.0-4019-P',#new 2.19



    '007-EE-2.85-0820-S',#2.5
    '008-EE-2.85-0820-S',#2.49
    '042-EE-2.85-0820-S',#2.51

    '043-EE-2.85-0820-H',#2.31




    '018-DP-2.00-1320-S',#minimum capacity 1.82
    #'019-DP-2.00-1320-S',#minimum capacity 1.61
    '036-DP-2.00-1720-S',#minimum capacity 1.91
    '037-DP-2.00-1720-S',#minimum capacity 1.84
    '038-DP-2.00-2420-S',#minimum capacity 1.854 (to 0)
    '050-DP-2.00-4020-S',#new 1.81
    '051-DP-2.00-4020-S',#new 1.866


    
    '040-DM-4.00-2320-S',#minimum capacity 3.75, cycles 188
]

test_names = [
    '003-DM-3.0-4019-S',#minimum capacity 1.84

    '011-DM-3.0-4019-H',#minimum capacity 1.36

    '013-DM-3.0-4019-P',#minimum capacity 1.6



    '006-EE-2.85-0820-S',# 2.621
    
    '044-EE-2.85-0820-H',# 2.43



    '039-DP-2.00-2420-S',#minimum capacity 1.93



    '041-DM-4.00-2320-S',#minimum capacity 3.76, cycles 190
]

2021/03/16 17:33:07 [DEBUG]: Start preparing data for training: ['000-DM-3.0-4019-S', '001-DM-3.0-4019-S', '002-DM-3.0-4019-S', '009-DM-3.0-4019-H', '010-DM-3.0-4019-H', '014-DM-3.0-4019-P', '015-DM-3.0-4019-P', '016-DM-3.0-4019-P', '017-DM-3.0-4019-P', '007-EE-2.85-0820-S', '008-EE-2.85-0820-S', '042-EE-2.85-0820-S', '043-EE-2.85-0820-H', '018-DP-2.00-1320-S', '036-DP-2.00-1720-S', '037-DP-2.00-1720-S', '038-DP-2.00-2420-S', '050-DP-2.00-4020-S', '051-DP-2.00-4020-S', '040-DM-4.00-2320-S'] and testing: ['003-DM-3.0-4019-S', '011-DM-3.0-4019-H', '013-DM-3.0-4019-P', '006-EE-2.85-0820-S', '044-EE-2.85-0820-H', '039-DP-2.00-2420-S', '041-DM-4.00-2320-S']...
  cyc_data = np.array(cyc_data)
  cyc_data = np.array(cyc_data)
2021/03/16 17:33:32 [DEBUG]: Finish getting training and testing charge data.
  cyc_data = np.array(cyc_data)
  cyc_data = np.array(cyc_data)
2021/03/16 17:33:50 [DEBUG]: Finish getting training and testing discharge data.
2021/03/16 17:33:50 [DEBUG]: Finish cleaning trai

In [5]:
dataset.prepare_data(train_names, test_names)
dataset_handler = ModelDataHandler(dataset, [
    CycleCols.VOLTAGE,
    CycleCols.CURRENT,
    CycleCols.TEMPERATURE
])

rul_handler = RulHandler()

## Data preparation

In [6]:
CAPACITY_THRESHOLDS = {
  3.0 : 2.7,#th 90% - min 2.1, 70%
  2.85 : 2.7,#th 94.7% - min 2.622, 92%
  2.0 : 1.93,#th 96.5% - min 1.93, 96.5%
  4.0 : 3.77,#th 94.2% - min 3.77 94.2%
  4.9 : 4.7,#th 95.9% - min 4.3, 87.7%
  5.0 : 4.5#th 90% - min 3.63, 72.6%
}
N_CYCLE = 500
WARMUP_TRAIN = 15
WARMUP_TEST = 30

(train_x, train_y_soh, test_x, test_y_soh,
  train_battery_range, test_battery_range,
  time_train, time_test, current_train, current_test) = dataset_handler.get_discharge_whole_cycle_future(train_names, test_names)

train_y = rul_handler.prepare_y_future(train_names, train_battery_range, train_y_soh, current_train, time_train, CAPACITY_THRESHOLDS)
del globals()["current_train"]
del globals()["time_train"]
test_y = rul_handler.prepare_y_future(test_names, test_battery_range, test_y_soh, current_test, time_test, CAPACITY_THRESHOLDS)
del globals()["current_test"]
del globals()["time_test"]
train_x, test_x = rul_handler.compress_cycle(train_x, test_x)


x_norm = rul_handler.Normalization()
train_x, test_x = x_norm.fit_and_normalize(train_x, test_x)
train_x = rul_handler.battery_life_to_time_series(train_x, N_CYCLE, train_battery_range)
test_x = rul_handler.battery_life_to_time_series(test_x, N_CYCLE, test_battery_range)

train_x, train_y, train_battery_range, train_y_soh = rul_handler.delete_initial(train_x, train_y, train_battery_range, train_y_soh, WARMUP_TRAIN)
test_x, test_y, test_battery_range, test_y_soh = rul_handler.delete_initial(test_x, test_y, test_battery_range, test_y_soh, WARMUP_TEST)

# first one is SOH, we keep only RUL
train_y = train_y[:,1]
test_y = test_y[:,1]

  x = np.array(
2021/03/16 17:34:16 [INFO]: Train x: (11325, 287, 3), train y soh: (11325, 1) | Test x: (3507, 287, 3), test y soh: (3507, 1) | 
                            battery names cycle train: (11325, 1), battery names steps train: (11325, 287, 1), 
                            battery names cycle test: (3507, 1), battery names steps test: (3507, 287, 1) |
                            time train: (11325, 287, 1), time test: (3507, 287, 1) |
                            raw current train: (11325, 287), raw current test: (3507, 287) |
                            
2021/03/16 17:34:17 [INFO]: battery ranges: [353, 721, 1054, 1553, 2035, 2358, 2671, 2995, 3335, 4335, 5335, 6335, 7335, 8335, 8879, 9417, 9991, 10594, 11197, 11325]
2021/03/16 17:34:17 [INFO]: processing range 0 - 101310
2021/03/16 17:34:17 [INFO]: processing range 101310 - 206926
2021/03/16 17:34:17 [INFO]: processing range 206926 - 302497
2021/03/16 17:34:17 [INFO]: processing range 302497 - 445710
2021/03/16 17:34:17 [IN

### Y normalization

In [7]:
y_norm = rul_handler.Normalization()
train_y, test_y = y_norm.fit_and_normalize(train_y, test_y)

# Model training

In [8]:
if IS_TRAINING:
    EXPERIMENT = "lstm_rul_unibo_powertools"

    experiment_name = time.strftime("%Y-%m-%d-%H-%M-%S") + '_' + EXPERIMENT
    print(experiment_name)

    # Model definition

    opt = tf.keras.optimizers.Adam(lr=0.000003)

    model = Sequential()
    model.add(Masking(input_shape=(train_x.shape[1], train_x.shape[2])))
    model.add(LSTM(128, activation='selu',
                    return_sequences=True,
                    kernel_regularizer=regularizers.l2(0.0002)))
    model.add(LSTM(64, activation='selu', return_sequences=False,
                    kernel_regularizer=regularizers.l2(0.0002)))
    model.add(Dense(64, activation='selu', kernel_regularizer=regularizers.l2(0.0002)))
    model.add(Dense(32, activation='selu', kernel_regularizer=regularizers.l2(0.0002)))
    model.add(Dense(1, activation='linear'))
    model.summary()

    model.compile(optimizer=opt, loss='huber', metrics=['mse', 'mae', 'mape', tf.keras.metrics.RootMeanSquaredError(name='rmse')])

In [9]:
if IS_TRAINING:
    history = model.fit(train_x, train_y, 
                                epochs=500, 
                                batch_size=32, 
                                verbose=1,
                                validation_split=0
                               )

In [10]:
if IS_TRAINING:
    model.save(data_path + 'results/trained_model/%s.h5' % experiment_name)

    hist_df = pd.DataFrame(history.history)
    hist_csv_file = data_path + 'results/trained_model/%s_history.csv' % experiment_name
    with open(hist_csv_file, mode='w') as f:
        hist_df.to_csv(f)
    history = history.history

In [11]:
if not IS_TRAINING:
    history = pd.read_csv(data_path + 'results/trained_model/%s_history.csv' % RESULT_NAME)
    model = keras.models.load_model(data_path + 'results/trained_model/%s.h5' % RESULT_NAME)
    model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking (Masking)            (None, 500, 6)            0         
_________________________________________________________________
lstm (LSTM)                  (None, 500, 128)          69120     
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dense (Dense)                (None, 64)                4160      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 124,801
Trainable params: 124,801
Non-trainable params: 0
__________________________________________________

In [12]:
if not IS_TRAINING:
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        print(history)

     Unnamed: 0      loss       mse       mae          mape      rmse
0             0  0.075547  0.035447  0.131100  5.704869e+07  0.188273
1             1  0.064149  0.012951  0.082281  3.424148e+07  0.113801
2             2  0.062606  0.010222  0.072182  2.900242e+07  0.101104
3             3  0.061686  0.008768  0.066432  2.568977e+07  0.093637
4             4  0.060935  0.007669  0.061829  2.293187e+07  0.087574
5             5  0.060198  0.006607  0.057398  2.111020e+07  0.081286
6             6  0.059415  0.005455  0.051803  1.862587e+07  0.073855
7             7  0.058876  0.004790  0.048068  1.687639e+07  0.069208
8             8  0.058446  0.004349  0.044902  1.517531e+07  0.065944
9             9  0.058033  0.003950  0.042033  1.365744e+07  0.062847
10           10  0.057593  0.003502  0.038991  1.223756e+07  0.059181
11           11  0.057288  0.003338  0.037586  1.150690e+07  0.057778
12           12  0.056989  0.003194  0.036344  1.072647e+07  0.056513
13           13  0.0

### Testing

In [13]:
results = model.evaluate(test_x, test_y, return_dict = True)
print(results)
max_rmse = 0
for index in range(test_x.shape[0]):
    result = model.evaluate(np.array([test_x[index, :, :]]), np.array([test_y[index]]), return_dict = True, verbose=0)
    max_rmse = max(max_rmse, result['rmse'])
print("Max rmse: {}".format(max_rmse))

{'loss': 0.009586389176547527, 'mse': 0.00044241605792194605, 'mae': 0.01339429896324873, 'mape': 2379157.5, 'rmse': 0.021033689379692078}
Max rmse: 0.07951751351356506


# Data Visualization

In [14]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=history['loss'],
                    mode='lines', name='train'))
fig.update_layout(title='Loss trend',
                  xaxis_title='epoch',
                  yaxis_title='loss',
                  width=1400,
                  height=600)
fig.show()

In [15]:
train_predictions = model.predict(train_x)

train_y = y_norm.denormalize(train_y)
train_predictions = y_norm.denormalize(train_predictions)

In [16]:
a = 0
for b in train_battery_range:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=train_y_soh[a:b], y=train_predictions[a:b,0],
                        mode='lines', name='predicted'))
    fig.add_trace(go.Scatter(x=train_y_soh[a:b], y=train_y[a:b],
                        mode='lines', name='actual'))
    fig.update_layout(title='Results on training',
                    xaxis_title='SoH Capacity',
                    yaxis_title='Remaining Ah until EOL',
                    xaxis={'autorange':'reversed'},
                    width=1400,
                    height=600)
    fig.show()
    a = b

In [17]:
a = 0
for b in train_battery_range:
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=train_predictions[a:b,0],
                        mode='lines', name='predicted'))
    fig.add_trace(go.Scatter(y=train_y[a:b],
                        mode='lines', name='actual'))
    fig.update_layout(title='Results on training',
                    xaxis_title='Cycle',
                    yaxis_title='Remaining Capacity until EOL',
                    width=1400,
                    height=600)
    fig.show()
    a = b

In [18]:
test_predictions = model.predict(test_x)

test_y = y_norm.denormalize(test_y)
test_predictions = y_norm.denormalize(test_predictions)

In [19]:
a = 0
for b in test_battery_range:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=test_y_soh[a:b], y=test_predictions[a:b,0],
                        mode='lines', name='predicted'))
    fig.add_trace(go.Scatter(x = test_y_soh[a:b], y=test_y[a:b],
                        mode='lines', name='actual'))
    fig.update_layout(title='Results on testing',
                    xaxis_title='SoH Capacity',
                    yaxis_title='Remaining Capacity until EOL',
                    xaxis={'autorange':'reversed'},
                    width=1400,
                    height=600)
    fig.show()
    a = b

In [20]:
a = 0
for b in test_battery_range:
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=test_predictions[a:b, 0],
                        mode='lines', name='predicted'))
    fig.add_trace(go.Scatter(y=test_y[a:b],
                        mode='lines', name='actual'))
    fig.update_layout(title='Results on testing',
                    xaxis_title='Cycle',
                    yaxis_title='Remaining Capacity until EOL',
                    width=1400,
                    height=600)
    fig.show()
    a = b