<a href="https://colab.research.google.com/github/LeoBaro/phd/blob/main/rtapipe/analysis/Untitled.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
print(f"tf.__version__: {tf.__version__}")
if tf.test.gpu_device_name(): 
    print(f"Default GPU Device:{tf.test.gpu_device_name()}")

from os import getcwd
import os.path
import numpy as np
import pandas as pd
from pathlib import Path
from tensorflow import keras
from tensorflow.keras import layers
from matplotlib import pyplot as plt

## Directories

In [None]:
datapath = Path("/home/baroncelli/phd/rtapipe/analysis/notebook_dataset_generation_for_models_output")
datapath

In [None]:
currentdir = getcwd()
currentdir

In [None]:
outdir = Path(currentdir).joinpath("notebook_lstm_output")
outdir

## The dataset

In [None]:
master_url_root = "https://raw.githubusercontent.com/numenta/NAB/master/data/"

df_small_noise_url_suffix = "artificialNoAnomaly/art_daily_small_noise.csv"
df_small_noise_url = master_url_root + df_small_noise_url_suffix
df_small_noise = pd.read_csv(
    df_small_noise_url, parse_dates=True, index_col="timestamp"
)

df_daily_jumpsup_url_suffix = "artificialWithAnomaly/art_daily_jumpsup.csv"
df_daily_jumpsup_url = master_url_root + df_daily_jumpsup_url_suffix
df_daily_jumpsup = pd.read_csv(
    df_daily_jumpsup_url, parse_dates=True, index_col="timestamp"
)

In [None]:
print(df_small_noise.head())

print(df_daily_jumpsup.head())

In [None]:
fig, ax = plt.subplots()
df_small_noise.plot(legend=False, ax=ax)
plt.show()

In [None]:
fig, ax = plt.subplots()
df_daily_jumpsup.plot(legend=False, ax=ax)
plt.show()

In [None]:
training_mean = df_small_noise.mean()
training_std = df_small_noise.std()
df_training_value = (df_small_noise - training_mean) / training_std
print("Number of training samples:", len(df_training_value))

In [None]:
df_training_value.head()

In [None]:
TIME_STEPS = 288

# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
    output = []
    for i in range(len(values) - time_steps):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)


x_train = create_sequences(df_training_value.values)
print("Training input shape: ", x_train.shape)

## Convolutional Autoencoder model

In [None]:
"""
modelConv = keras.Sequential(
    [
        layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
        layers.Conv1D(
            filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        layers.Dropout(rate=0.2),
        layers.Conv1D(
            filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        layers.Conv1DTranspose(
            filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        layers.Dropout(rate=0.2),
        layers.Conv1DTranspose(
            filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        layers.Conv1DTranspose(filters=1, kernel_size=7, padding="same"),
    ]
)
modelConv.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
modelConv.summary()
"""

## LSTM Autoencoder

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Input, Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import RepeatVector
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.models import Model

from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [None]:
modelLSTM = Sequential()
modelLSTM.add(LSTM(16, activation='relu', input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
modelLSTM.add(LSTM(8, activation='relu', return_sequences=False))
modelLSTM.add(RepeatVector(x_train.shape[1]))
modelLSTM.add(LSTM(8, activation='relu', return_sequences=True))
modelLSTM.add(LSTM(16, activation='relu', return_sequences=True))
modelLSTM.add(TimeDistributed(Dense(x_train.shape[2])))

modelLSTM.compile(optimizer='adam', loss='mse')
modelLSTM.summary()

In [None]:
modelLSTM2 = Sequential()
modelLSTM2.add(LSTM(32, input_shape=(x_train.shape[1], x_train.shape[2])))
modelLSTM2.add(Dropout(rate=0.3))
modelLSTM2.add(RepeatVector(x_train.shape[1]))
modelLSTM2.add(LSTM(32, return_sequences=True))
modelLSTM2.add(Dropout(rate=0.3))
modelLSTM2.add(TimeDistributed(Dense(x_train.shape[2])))

modelLSTM2.compile(optimizer='adam', loss='mae')
modelLSTM2.summary()

## Models Training

In [None]:
checkpoint_path_lstm = Path("./training_lstm/cp.ckpt")
checkpoint_path_lstm.mkdir(exist_ok=True, parents=True)

checkpoint_path_lstm2 = Path("./training_lstm2/cp.ckpt")
checkpoint_path_lstm2.mkdir(exist_ok=True, parents=True)

checkpoint_path_conv = Path("./training_conv/cp.ckpt")
checkpoint_path_conv.mkdir(exist_ok=True, parents=True)

In [None]:
# Create a callback that saves the model's weights
cp_callback_lstm  = keras.callbacks.ModelCheckpoint(filepath=str(checkpoint_path_lstm), save_weights_only=True, verbose=1)
cp_callback_lstm2 = keras.callbacks.ModelCheckpoint(filepath=str(checkpoint_path_lstm2), save_weights_only=True, verbose=1)
cp_callback_conv  = keras.callbacks.ModelCheckpoint(filepath=str(checkpoint_path_conv), save_weights_only=True, verbose=1)

In [None]:
epochs=20

In [None]:
#modelLSTM.load_weights(str(checkpoint_path_lstm))
#modelLSTM2.load_weights(str(checkpoint_path_lstm2))
#modelConv.load_weights(str(checkpoint_path_conv))

In [None]:
modelLSTMHistory = modelLSTM.fit(x_train, x_train, epochs=1, batch_size=128, validation_split=0.1, verbose=1, callbacks=[cp_callback_lstm])

In [None]:
modelLSTM2History = modelLSTM2.fit(x_train, x_train, epochs=100, batch_size=128, validation_split=0.1, verbose=1, callbacks=[cp_callback_lstm2, keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min")])

In [None]:
"""
modelConvHistory = modelConv.fit(x_train, x_train, epochs=50, batch_size=128, validation_split=0.1,
    callbacks=[
        keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min"),
        cp_callback_conv
    ],
)
"""

## Plotting the loss during the training phase

In [None]:
#plt.plot(modelConvHistory.history["loss"], label="CONV Training Loss", color="grey")
#plt.plot(modelConvHistory.history["val_loss"], label="CONV Validation Loss", color="grey", linestyle="--")


plt.plot(modelLSTMHistory.history["loss"], label="LSTM Training Loss", color="orange")
plt.plot(modelLSTMHistory.history["val_loss"], label="LSTM Validation Loss", color="orange", linestyle="--")


plt.plot(modelLSTM2History.history["loss"], label="LSTM2 Training Loss", color="green")
plt.plot(modelLSTM2History.history["val_loss"], label="LSTM2 Validation Loss", color="green", linestyle="--")

plt.legend()
plt.show()

## Plotting the loss distrubution

In [None]:
def plotLoss(pred_data, real, labels=[""]):
    for i, pred in enumerate(pred_data):
        train_mae_loss = np.mean(np.abs(pred - real), axis=1)
        plt.hist(train_mae_loss, bins=50, label=labels[i])
        plt.xlabel("Train MAE loss")
        plt.ylabel("No of samples")
        # Get reconstruction loss threshold.
        threshold = np.max(train_mae_loss)
        print("Reconstruction error threshold: ", threshold)    
    plt.legend()
    plt.show()

In [None]:
#autoencoder_predictions_on_training = modelConv.predict(x_train)


In [None]:
lstm_predictions_on_training = modelLSTM.predict(x_train)

In [None]:
lstm2_predictions_on_training = modelLSTM2.predict(x_train)

In [None]:
#print(autoencoder_predictions_on_training.shape)
print(lstm2_predictions_on_training.shape)

In [None]:
plotLoss([lstm_predictions_on_training, lstm2_predictions_on_training], x_train, labels=["Lstm","Lstm2"])

## Plotting some reconstructed samples

In [None]:
for i in range(1):
    plt.plot(x_train[i])
    #plt.plot(autoencoder_predictions_on_training[0], label="Conv")
    plt.plot(lstm_predictions_on_training[1], label="Lstm")
    plt.plot(lstm2_predictions_on_training[1], label="Lstm")
plt.legend()
plt.show()