# **PARAMS**


In [None]:
WINDOW_SIZE = 200
STRIDE_SIZE = 10
TELESCOPE_SIZE = 18

TEST_QUOTA = 0.2
VALIDATION_QUOTA = 0.15

EPOCHS = 250
BATCH_SIZE = 256

MODEL_NAME = "TRANSFORMER"

# **Import libraries**

In [None]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

from datetime import date

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

import math

# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import MinMaxScaler

# **Load the Dataset**

In [None]:
def load_dataset():
    training_data = np.load("Dataset/training_data.npy")
    print(f"Training shape: {training_data.shape}")

    categories = np.load("Dataset/categories.npy")
    print(f"Categories shape: { categories.shape}" )

    valid_periods = np.load("Dataset/valid_periods.npy")
    print(f"Valid_periods shape: { valid_periods.shape}")

    return training_data, categories, valid_periods

In [None]:
training_data, categories, valid_periods = load_dataset()

In [None]:
lengths = valid_periods[:,1] - valid_periods[:,0]


In [None]:
df = pd.DataFrame()
df["Lengths"] = lengths
df["Categories"] = categories

In [None]:
def plotDataLengths(df):

  plt.figure(figsize=(17,4))

  plt.scatter(df["Lengths"], df["Categories"])

  plt.show()

In [None]:
plotDataLengths(df)

In [None]:
del df
del lengths

In [None]:
df = pd.DataFrame(training_data)

In [None]:
df.info()

In [None]:
def getSequenceFromDataset(index):
  return training_data[index,valid_periods[index][0] : valid_periods[index][1]]

In [None]:
getSequenceFromDataset(0)

# **GET ONE HOT CATEGORIES**


In [None]:
categories

In [None]:
np.unique(categories)

In [None]:
# Create a dictionary to map categories to numerical values
category_to_index = {category: index for index, category in enumerate(np.unique(categories))}
category_to_index

In [None]:
# Convert labels to numerical values using the dictionary
numerical_labels = [category_to_index[label] for label in categories]
numerical_labels[:15]

In [None]:
one_hot_categories = tfk.utils.to_categorical(numerical_labels,len(np.unique(categories)))

In [None]:
one_hot_categories[-1]

# **Some Plots**

In [None]:
def inspect_dataset(indexes):
    num_plots = len(indexes)
    fig, axs = plt.subplots(num_plots, 1, figsize=(10, 6*num_plots))

    for i, index in enumerate(indexes):


        sequence = getSequenceFromDataset(index)  # Slice the sequence using iloc

        if num_plots > 1:
            ax = axs[i]
        else:
            ax = axs  # If only one plot, axs is a single axis, not an array

        ax.set_title(f"Sequence {index}, Category {categories[index]}")
        ax.plot(sequence)  # Plot the entire sequence
        ax.set_xlabel("Time")
        ax.set_ylabel("Value")

    plt.tight_layout()
    plt.show()

# Call the function with the DataFrame and a list of indexes of the sequences you want to plot
inspect_dataset([3, 12500, 25000])  # Example: plotting sequences with indexes 3, 5, and 7


# **Normalize and Split the Dataset**

In [None]:
test_quota = TEST_QUOTA
test_quota

In [None]:
from sklearn.model_selection import train_test_split

# Creating indices for splitting
indices = np.arange(len(training_data))

# Splitting indices into train and test indices
train_indices, test_indices = train_test_split(indices, test_size=test_quota, stratify=categories, random_state=seed)

# Extracting data based on the split indices
train_data = training_data[train_indices]
test_data = training_data[test_indices]
train_categories = categories[train_indices]
test_categories = categories[test_indices]
valid_periods_train = valid_periods[train_indices]
valid_periods_test = valid_periods[test_indices]

In [None]:
train_data.shape

In [None]:
test_data.shape

In [None]:
training_data.shape

In [None]:
def getTrainSequenceFromDataset(index):
  return train_data[index,valid_periods_train[index][0] : valid_periods_train[index][1]]

def getTestSequenceFromDataset(index):
  return test_data[index,valid_periods_test[index][0] : valid_periods_test[index][1]]


In [None]:
getTrainSequenceFromDataset(0).shape[0]

In [None]:
getTestSequenceFromDataset(0).shape[0]

In [None]:
import matplotlib.pyplot as plt
index_to_plot = 100

# Assuming X_train_raw and X_test_raw are NumPy arrays
plt.figure(figsize=(17, 5))

# Plot X_train_raw in blue color
plt.plot(getTrainSequenceFromDataset(index_to_plot), label='Train sequence (normalized)', color='blue')


plt.title(f'SEQUENCE {index_to_plot}: Category: {train_categories[index_to_plot]}')
plt.legend()
plt.show()

# **Prepare the Data**


In [None]:
window = WINDOW_SIZE
stride = STRIDE_SIZE

In [None]:
future = train_data[:,-window:]

print(f"future has {future[123].shape} sequences")

In [None]:
future = np.expand_dims(future, axis=0)
future.shape

In [None]:
def build_sequences(train=True, window=200, stride=20, telescope=100):
    # Sanity check to avoid runtime errors
    assert window % stride == 0
    dataset = []
    labels = []

    maxIndex = train_data.shape[0] if train==True else test_data.shape[0]

    for index in range(maxIndex):

      if train == True:
        sequence = getTrainSequenceFromDataset(index)
        temp_sequence = getTrainSequenceFromDataset(index).copy()
      else:
        sequence = getTestSequenceFromDataset(index)
        temp_sequence = getTestSequenceFromDataset(index).copy()

      temp_label = sequence.copy()

      padding_check = len(sequence)%window

      if(padding_check != 0):
        # Compute padding length
        padding_len = window - len(sequence)%window
        padding = np.zeros((padding_len), dtype='float32')
        temp_sequence = np.concatenate((padding,sequence))
        padding = np.zeros((padding_len), dtype='float32')
        temp_label = np.concatenate((padding,temp_label))
        assert len(temp_sequence) % window == 0

      for idx in np.arange(0,len(temp_sequence)-window-telescope,stride):
        dataset.append(temp_sequence[idx:idx+window])
        labels.append(temp_label[idx+window:idx+window+telescope])

      del sequence
      del temp_sequence
      del temp_label

    dataset = np.array(dataset)
    labels = np.array(labels)
    return dataset, labels

In [None]:
telescope = TELESCOPE_SIZE

# **FORECAST**

In [None]:
direct_telescope = telescope

In [None]:
stride

In [None]:
from keras import Layer

class Time2Vector(Layer):
  def __init__(self, seq_len, **kwargs):
    super(Time2Vector, self).__init__()
    self.seq_len = seq_len

  def build(self, input_shape):
    self.weights_linear = self.add_weight(name='weight_linear',
                                shape=(int(self.seq_len),),
                                initializer='uniform',
                                trainable=True)
    
    self.bias_linear = self.add_weight(name='bias_linear',
                                shape=(int(self.seq_len),),
                                initializer='uniform',
                                trainable=True)
    
    self.weights_periodic = self.add_weight(name='weight_periodic',
                                shape=(int(self.seq_len),),
                                initializer='uniform',
                                trainable=True)

    self.bias_periodic = self.add_weight(name='bias_periodic',
                                shape=(int(self.seq_len),),
                                initializer='uniform',
                                trainable=True)

  def call(self, x):
    #x = tf.math.reduce_mean(x[:,:,:4], axis=-1) # Convert (batch, seq_len, 5) to (batch, seq_len)
    time_linear = self.weights_linear * x + self.bias_linear
    time_linear = tf.expand_dims(time_linear, axis=-1) # (batch, seq_len, 1)
    
    time_periodic = tf.math.sin(tf.multiply(x, self.weights_periodic) + self.bias_periodic)
    time_periodic = tf.expand_dims(time_periodic, axis=-1) # (batch, seq_len, 1)
    return tf.concat([time_linear, time_periodic], axis=-1) # (batch, seq_len, 2)

In [None]:
class SingleAttention(Layer):
  def __init__(self, d_k, d_v):
    super(SingleAttention, self).__init__()
    self.d_k = d_k
    self.d_v = d_v

  def build(self, input_shape):
    self.query = tfkl.Dense(self.d_k, input_shape=input_shape, kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')
    self.key = tfkl.Dense(self.d_k, input_shape=input_shape, kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')
    self.value = tfkl.Dense(self.d_v, input_shape=input_shape, kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')

  def call(self, inputs): # inputs = (in_seq, in_seq, in_seq)
    q = self.query(inputs[0])
    k = self.key(inputs[1])

    attn_weights = tf.matmul(q, k, transpose_b=True)
    attn_weights = tf.map_fn(lambda x: x/np.sqrt(self.d_k), attn_weights)
    attn_weights = tf.nn.softmax(attn_weights, axis=-1)
    
    v = self.value(inputs[2])
    attn_out = tf.matmul(attn_weights, v)
    return attn_out  

In [None]:
class MultiAttention(Layer):
  def __init__(self, d_k, d_v, n_heads):
    super(MultiAttention, self).__init__()
    self.d_k = d_k
    self.d_v = d_v
    self.n_heads = n_heads
    self.attn_heads = list()

  def build(self, input_shape):
    for n in range(self.n_heads):
      self.attn_heads.append(SingleAttention(self.d_k, self.d_v))  
    self.linear = tfkl.Dense(3, input_shape=input_shape, kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')

  def call(self, inputs):
    attn = [self.attn_heads[i](inputs) for i in range(self.n_heads)]
    concat_attn = tf.concat(attn, axis=-1)
    multi_linear = self.linear(concat_attn)
    return multi_linear 

In [None]:
class TransformerEncoder(Layer):
  def __init__(self, d_k, d_v, n_heads, ff_dim, dropout=0.1, **kwargs):
    super(TransformerEncoder, self).__init__()
    self.d_k = d_k
    self.d_v = d_v
    self.n_heads = n_heads
    self.ff_dim = ff_dim
    self.attn_heads = list()
    self.dropout_rate = dropout

  def build(self, input_shape):
    self.attn_multi = MultiAttention(self.d_k, self.d_v, self.n_heads)
    self.attn_dropout = tfkl.Dropout(self.dropout_rate)
    self.attn_normalize = tfkl.LayerNormalization(input_shape=input_shape, epsilon=1e-6)

    self.ff_conv1D_1 = tfkl.Conv1D(filters=self.ff_dim, kernel_size=1, activation='relu')
    self.ff_conv1D_2 = tfkl.Conv1D(filters=3, kernel_size=1) # input_shape[0]=(batch, seq_len, 7), input_shape[0][-1]=7 
    self.ff_dropout = tfkl.Dropout(self.dropout_rate)
    self.ff_normalize = tfkl.LayerNormalization(input_shape=input_shape, epsilon=1e-6)    
  
  def call(self, inputs): # inputs = (in_seq, in_seq, in_seq)
    attn_layer = self.attn_multi(inputs)
    attn_layer = self.attn_dropout(attn_layer)
    attn_layer = self.attn_normalize(inputs[0] + attn_layer)

    ff_layer = self.ff_conv1D_1(attn_layer)
    ff_layer = self.ff_conv1D_2(ff_layer)
    ff_layer = self.ff_dropout(ff_layer)
    ff_layer = self.ff_normalize(inputs[0] + ff_layer)
    return ff_layer 

In [None]:
class ExpandLayer(Layer):
    def call(self, x):
        return tf.expand_dims(x, axis=-1) # (batch, seq_len, 1)



In [None]:
seq_len = window
n_output = telescope

d_k = 32
d_v = 32
n_heads = 64
ff_dim = 64


def create_model():
  '''Initialize time and transformer layers'''
  time_embedding = Time2Vector(seq_len)
  attn_layer1 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)
  attn_layer2 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)
  attn_layer3 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)

  '''Construct model'''
  in_seq = tfkl.Input(shape=(seq_len,))

  x = time_embedding(in_seq)

  expanded = ExpandLayer()(in_seq)
  x = tfkl.Concatenate(axis=-1)([expanded, x])
  x = attn_layer1((x, x, x))
  x = attn_layer2((x, x, x))
  x = attn_layer3((x, x, x))
  x = tfkl.GlobalAveragePooling1D(data_format='channels_first')(x)
  x = tfkl.Dropout(0.1)(x)
  x = tfkl.Dense(64, activation='relu')(x)
  x = tfkl.Dropout(0.1)(x)
  out = tfkl.Dense(n_output, activation='linear')(x)

  model = tfk.Model(inputs=in_seq, outputs=out)
  model.compile(loss='mse', optimizer='adam', metrics=['mae', 'mape'])
  return model

In [None]:
model = create_model()
model.summary()
tfk.utils.plot_model(model, expand_nested=True, show_shapes=True)

In [None]:
epochs = EPOCHS
batch_size = BATCH_SIZE

In [None]:
X_train, y_train = build_sequences(True,window, stride, telescope)
#X_test, y_test = build_sequences(False,window, stride, autoregressive_telescope)

In [None]:
input_shape = X_train.shape[1:]
output_shape = y_train.shape[1:]

In [None]:
# Train the model
history = model.fit(
    x = X_train,
    y = y_train,
    batch_size = 128,
    epochs = 250,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=15, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, factor=0.1, min_lr=1e-5)
    ]
).history

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

plt.figure(figsize=(18,3))
plt.plot(history['learning_rate'], label='Learning Rate', alpha=.8, color='#ff7f0e')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
model.save("Transformer.keras")

In [None]:
X_test_reg, y_test_reg = build_sequences(False,window, stride, telescope)
X_test_reg.shape, y_test_reg.shape

In [None]:
# Prediction
predictions = model.predict(X_test_reg)

In [None]:
# Print the shape of the predictions
print(f"Predictions shape: {predictions.shape}")

# Calculate and print Mean Squared Error (MSE)
mean_squared_error = tfk.metrics.mean_squared_error(y_test_reg.flatten(), predictions.flatten()).numpy()
print(f"Mean Squared Error: {mean_squared_error}")

# Calculate and print Mean Absolute Error (MAE)
mean_absolute_error = tfk.metrics.mean_absolute_error(y_test_reg.flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Error: {mean_absolute_error}")