In [None]:
%pip install -q numpy pandas matplotlib seaborn scikit-learn tensorflow h5py

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Make NumPy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)

In [None]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

## Read Data

In [None]:
featFileName = "./Data/features_combined.csv"
batch_pd = pd.read_csv(featFileName, index_col=False)

In [None]:
bat_info = batch_pd.loc[:,'policy':'cycle_life']
bat_info

In [None]:
feat = batch_pd.drop(['policy','barcode','cycle_life','QDiffLinVar'], axis=1).to_numpy()
feat

## Clean the Data

In [None]:
# quoted: "data fix"
feat[0,9] = 1.692e-5;
feat[0,10] = 1.0750;

feat[14,1] = 0.00397;
feat[14,9] = -4.8296e-5;
feat[14,10] = 1.0705;

## Train-Test split

In [None]:
feat_train = feat[0::2]
feat_test = feat[1::2]
label_train = bat_info["cycle_life"][0::2]
label_test = bat_info["cycle_life"][1::2]
feat_train

## Normalization

In [None]:
batch_pd.drop(['policy','barcode','QDiffLinVar'], axis=1).describe().transpose()

In [None]:
# label_train = np.log10(label_train)
# label_test = np.log10(label_test)

In [None]:
# equivalent to step "scale data" in original version
normalizer = layers.Normalization(input_shape=[13, ], axis=None)
normalizer.adapt(feat_train)

In [None]:
print(normalizer.mean.numpy())

## Linear regression

In [None]:
# simulate training the model against log10(cycle_life)
power10 = layers.Lambda(lambda x: tf.constant(10.0)**x)

In [None]:
discharge_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1),
    power10
])

discharge_model.summary()

Run the untrained model on the first 10 batteries' features. The output won't be good, but notice that it has the expected shape of `(10, 13)`:

In [None]:
discharge_model.predict(feat_train[:10])

In [None]:
discharge_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mean_squared_error')

In [None]:
%%time
history = discharge_model.fit(
    feat_train,
    label_train,
    epochs=5000,
    # Suppress logging.
    verbose=0,
    # Calculate validation results on 20% of the training data.
    validation_split = 0.2)

## Evaluation
Note: since model has been configured to use Mean Squared Error to evaluate loss/cost, will need to square root the loss statistic to get **Root** Mean Squared Error as measured by authors.

In [None]:
def plot_loss(history):
  plt.plot(np.sqrt(history.history['loss']), label='loss')
  plt.plot(np.sqrt(history.history['val_loss']), label='val_loss')
  # plt.ylim([200, 130])
  plt.ylim([50, 300])
  plt.xlabel('Epoch')
  plt.ylabel('Error [cycles]')
  plt.legend()
  plt.grid(True)

In [None]:
plot_loss(history)

In [None]:
hist = pd.DataFrame(history.history)
hist = hist.pow(0.5) # Power 1/2 is the same as square root
hist['epoch'] = history.epoch
hist.tail()

In [None]:
# Power 1/2 is the same as square root
discharge_model.evaluate(
    feat_test,
    label_test, verbose=0) ** 0.5

In [None]:
def plot_accuracy(y_train, y_test):
  plt.scatter(y_train, label_train, label='Predictions (train)')
  plt.scatter(y_test, label_test, label='Predictions (test)')
  plt.plot([0, 2000], [0, 2000], color='k', label='Perfection')
  plt.xlabel('Predicted Cycle life')
  plt.ylabel('Actual Cycle life')
  plt.legend()

In [None]:
plot_accuracy(discharge_model.predict(feat_train), discharge_model.predict(feat_test))