In [1]:
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

In [2]:
# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

2.14.0


In [3]:
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [4]:
training_data = np.load("training_data.npy").astype(np.float32)
categories = np.load("categories.npy")
valid_periods=np.load("valid_periods.npy")
categories_unique = np.unique(categories)

print(training_data.shape)
print(valid_periods.shape)
print(categories.shape)

(48000, 2776)
(48000, 2)
(48000,)


In [5]:
def build_dataset(category, window_length=200, telescope=18, from_start=True):
    dataset = []

    skipped_samples = 0
    dataset_window_length = window_length + telescope
    sample_map = categories==category
    
    valid_perios_for_category = valid_periods[sample_map]
    training_data_for_category = training_data[sample_map]

    for i in range(len(valid_perios_for_category)):
        from_time = valid_perios_for_category[i,0]
        to_time = valid_perios_for_category[i,1]
        if(to_time-from_time<dataset_window_length):
            skipped_samples+=1
            continue
        if(from_start):
            sample = training_data_for_category[i][from_time:from_time+dataset_window_length]
        else:
            sample = training_data_for_category[i][to_time-dataset_window_length:to_time]
        dataset.append(sample)

    dataset = np.array(dataset)
    dataset = dataset.reshape((dataset.shape[0], dataset.shape[1], 1))
    print("Skipped samples: ", skipped_samples)

    return dataset

In [6]:
window_length=200
telescope=10

In [7]:
datasets = {}
for category in categories_unique:
    dataset = build_dataset(category, window_length=window_length, telescope=telescope)
    datasets[category] = dataset

Skipped samples:  1891
Skipped samples:  7567
Skipped samples:  5100
Skipped samples:  4507


Skipped samples:  6821
Skipped samples:  178


In [8]:
def build_CONV_LSTM_model(input_shape, output_shape):
    # Ensure the input time steps are at least as many as the output time steps
    assert input_shape[0] >= output_shape[0], "For this exercise we want input time steps to be >= of output time steps"

    # Define the input layer with the specified shape
    input_layer = tfkl.Input(shape=input_shape, name='input_layer')

    # Add a Bidirectional LSTM layer with 64 units
    x = tfkl.Bidirectional(tfkl.LSTM(64, return_sequences=True, name='lstm'), name='bidirectional_lstm')(input_layer)

    # Add a 1D Convolution layer with 128 filters and a kernel size of 3
    x = tfkl.Conv1D(128, 3, padding='same', activation='relu', name='conv')(x)

    # Add a final Convolution layer to match the desired output shape
    output_layer = tfkl.Conv1D(output_shape[1], 3, padding='same', name='output_layer')(x)

    # Calculate the size to crop from the output to match the output shape
    crop_size = output_layer.shape[1] - output_shape[0]

    # Crop the output to the desired length
    output_layer = tfkl.Cropping1D((0, crop_size), name='cropping')(output_layer)

    # Construct the model by connecting input and output layers
    model = tf.keras.Model(inputs=input_layer, outputs=output_layer, name='CONV_LSTM_model')

    # Compile the model with Mean Squared Error loss and Adam optimizer
    model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(), metrics=['mse', "mae"])

    return model

In [9]:
def train_models(datasets):
    for category in categories_unique:
        print("Training model for category: ", category)
        X_data = datasets[category]
        X_train, y_train = X_data[:, :-telescope], X_data[:, -telescope:]

        input_shape = X_train.shape[1:]
        output_shape = y_train.shape[1:]
        batch_size = 64
        epochs = 200

        model = build_CONV_LSTM_model(input_shape, output_shape)
        history = model.fit(
            x = X_train,
            y = y_train,
            batch_size = batch_size,
            epochs = epochs,
            validation_split=.1,
            callbacks = [
                tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=12, restore_best_weights=True),
                tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, factor=0.1, min_lr=1e-5)
            ]
        ).history

        print("Model " + category + " MSE: "+ str(history["val_mse"][-1]))
        print("Model " + category + " MAE: " + str(history["val_mae"][-1]))

        model.save("Vanilla_200Window/Vanilla_"+category)
        del model

In [10]:
train_models(datasets)

Training model for category:  A
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Model A MSE: 0.05247316509485245
Model A MAE: 0.1873892843723297
Training model for category:  B
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Model B MSE: 0.04649817571043968
Model B MAE: 0.18077322840690613
Training model for category:  C
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 2