In [None]:
%pip install -q numpy pandas matplotlib seaborn scikit-learn tensorflow h5py

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from keras.models import Sequential
from keras.layers import Dense
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam



# Dataset

In [None]:
fp = "../data/features_combined.csv"
batch_pd = pd.read_csv(fp, index_col=False)
dataset = batch_pd.copy()
dataset

In [None]:
dataset.isna().sum()
dataset = dataset.dropna().drop(columns=['policy', 'barcode'])
dataset

In [None]:
# feat = dataset.loc[:,'QDiffLinVar'].to_numpy()
# bat_info = dataset.loc[:,'policy':'cycle_life']


## Split Train-Test

In [None]:
train_dataset = dataset.iloc[0::2, :]
test_dataset = dataset.iloc[1::2, :]
sns.pairplot(train_dataset[['cycle_life', 'QDiffLinVar']], diag_kind='kde')

In [None]:
# train_dataset = feat[0::2]
# test_dataset = feat[1::2]
# label_train = bat_info["cycle_life"][0::2]
# label_test = bat_info["cycle_life"][1::2]

In [None]:
train_dataset.describe().transpose()

In [None]:
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('cycle_life')
test_labels = test_features.pop('cycle_life')
train_features

## Normalization Layer

In [None]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_features))
print(normalizer.mean.numpy())

## Linear Regression

### Layering

In [None]:
QDiffLinVar = np.array(train_features['QDiffLinVar'])

QDiffLinVar_normalizer = layers.Normalization(input_shape=[1,], axis=None)
QDiffLinVar_normalizer.adapt(QDiffLinVar)

### Model

In [None]:
variance_model = tf.keras.Sequential([
    QDiffLinVar_normalizer,
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

variance_model.summary()

This model predicts cyclelife from QDiffVariance

In [None]:
variance_model.predict(QDiffLinVar[:10])

In [None]:
variance_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss='mse',
    metrics=['mae']
    )

In [None]:
%%time
history = variance_model.fit(
    train_features['QDiffLinVar'],
    train_labels,
    epochs=1000,
    # Suppress logging.
    verbose=2,
    # Calculate validation results on 20% of the training data.
    validation_split = 0.2)

In [None]:
def plot_loss(history):
  plt.plot(np.sqrt(history.history['loss']), label='loss')
  plt.plot(np.sqrt(history.history['val_loss']), label='val_loss')
  # plt.ylim([200, 130])
  plt.ylim([50, 300])
  plt.xlabel('Epoch')
  plt.ylabel('Error [cycles]')
  plt.legend()
  plt.grid(True)

plot_loss(history)

In [None]:
hist = pd.DataFrame(history.history)
hist = hist.pow(0.5) # Power 1/2 is the same as square root
hist['epoch'] = history.epoch
hist


In [None]:
test_results = {}

test_results['variance_model'] = variance_model.evaluate(
    test_features['QDiffLinVar'],
    test_labels, verbose=0) #sqrt for mse

In [None]:
x = tf.linspace(-5, 2, 100)
y = variance_model.predict(x)

### Predict

In [None]:
def plot_prediction(y_train, y_test):
  plt.axes(aspect='equal')
  plt.scatter(y_train, train_labels, label='Predictions (train)')
  plt.scatter(y_test, test_labels, label='Predictions (test)')
  lims = [0, 2000]
  plt.xlim(lims)
  plt.ylim(lims)
  plt.plot(lims, lims, 'k', )
  plt.xlabel('Predicted Cycle life')
  plt.ylabel('Actual Cycle life')
  plt.legend()

In [None]:
train_prediction = variance_model.predict(train_features['QDiffLinVar'])
test_prediction = variance_model.predict(test_features['QDiffLinVar'])

plot_prediction(train_prediction, test_prediction)

### Evaluate

In [None]:
test_results['variance_model'][0] = test_results['variance_model'][0] ** 0.5
pd.DataFrame(test_results, index=['MSE', 'Mean absolute error']).T

### Error Distribution

In [None]:
error = test_prediction.flatten() - test_labels
plt.hist(error, bins=100)
plt.xlabel('Prediction Error')
_ = plt.ylabel('Count')