# Import Lib
Neural machine translation with a Transformer and Keras - https://www.tensorflow.org/text/tutorials/transformer#the_transformer

In [1]:
#!pip install tensorflow_text

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [15,8]
import seaborn as sns
import plotly.graph_objects as go

In [90]:
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Embedding, Layer, MultiHeadAttention, LayerNormalization, Add, Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.optimizers.schedules import LearningRateSchedule
from tensorflow.math import rsqrt, minimum
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model, save_model
from tensorflow.keras.ops import round as tf_round
from tensorflow.keras.regularizers import l2, l1

In [4]:
from tensorflow.signal import stft, hann_window

In [5]:
import math
import joblib

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, mean_absolute_error, mean_squared_error

In [7]:
from tensorflow.keras import initializers

initializer_for_relu = initializers.HeNormal() # For layers with activation function Relu
initializer_for_sigmoid = initializers.GlorotNormal() # For layers with activation function Sigmoid

In [8]:
import warnings
warnings.filterwarnings('ignore')

In [9]:
from scipy.signal import ShortTimeFFT
from scipy.signal.windows import gaussian

In [10]:
from tensorflow.python.framework import tensor_util

# Data Preprocessing

## Import Data

In [11]:
# Mount google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
path = '/content/drive/MyDrive/MyColabProject/Data'
data_df = pd.read_csv(path+'/raw_nsepy_inp512_differencedVal_fourierTransform.csv')
data_df.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,512
0,-4.45,6.45,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,...,-3.25,5.7,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35
1,6.45,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,-4.0,...,5.7,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35,2.05
2,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,-4.0,1.5,...,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35,2.05,1.7


In [13]:
data_df.shape

(41412, 513)

## Train Test Split
* Length of the input 39 <br>
* Length of Output and Context 1 <br>

In [14]:
data_df.iloc[:600,256:].shape

(600, 257)

In [15]:
data_df.iloc[:600,256:].head(3)

Unnamed: 0,256,257,258,259,260,261,262,263,264,265,...,503,504,505,506,507,508,509,510,511,512
0,2.75,-0.4,-1.65,-0.85,-1.0,0.3,3.9,7.85,2.75,4.3,...,-3.25,5.7,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35
1,-0.4,-1.65,-0.85,-1.0,0.3,3.9,7.85,2.75,4.3,-5.35,...,5.7,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35,2.05
2,-1.65,-0.85,-1.0,0.3,3.9,7.85,2.75,4.3,-5.35,-0.25,...,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35,2.05,1.7


In [16]:
data_q1 = data_df.iloc[:4800,:129].copy()
data_q2 = data_df.iloc[:4800,128:257].copy()
data_q3 = data_df.iloc[:4800,256:385].copy()
data_q4 = data_df.iloc[:4800,384:].copy()
data_df_merged = pd.DataFrame()
data_df_merged = pd.concat([pd.DataFrame(data_q1.values),pd.DataFrame(data_q2.values),pd.DataFrame(data_q3.values),pd.DataFrame(data_q4.values)],
                           ignore_index=True)
print(data_df_merged.shape)
data_df_merged.head(3)

(19200, 129)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,128
0,-4.45,6.45,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,...,-7.65,-1.25,0.0,8.0,-6.05,1.8,-0.75,-0.9,-0.1,-2.8
1,6.45,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,-4.0,...,-1.25,0.0,8.0,-6.05,1.8,-0.75,-0.9,-0.1,-2.8,-1.5
2,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,-4.0,1.5,...,0.0,8.0,-6.05,1.8,-0.75,-0.9,-0.1,-2.8,-1.5,0.5


In [17]:
data_df_merged.tail(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,128
19197,-1.45,2.0,-1.35,2.95,-10.8,10.45,-2.45,4.85,2.85,11.6,...,14.4,2.25,4.75,6.95,5.55,7.15,5.15,-15.35,10.75,-43.0
19198,2.0,-1.35,2.95,-10.8,10.45,-2.45,4.85,2.85,11.6,-15.1,...,2.25,4.75,6.95,5.55,7.15,5.15,-15.35,10.75,-43.0,-3.05
19199,-1.35,2.95,-10.8,10.45,-2.45,4.85,2.85,11.6,-15.1,0.2,...,4.75,6.95,5.55,7.15,5.15,-15.35,10.75,-43.0,-3.05,-8.5


In [27]:
inp_len = 128 # Length of the input
out_len = 1 # Length of Output and Context

In [28]:
prediction_percentage = 0.055 #0.025
#train_df, test_df = train_test_split(data_df, test_size=prediction_percentage, random_state=1, shuffle=True)
train_df, test_df = train_test_split(data_df_merged, test_size=prediction_percentage, random_state=1, shuffle=True)

In [29]:
print('Train shape',train_df.shape)
print('Test shape',test_df.shape)

Train shape (18144, 129)
Test shape (1056, 129)


In [30]:
train_df, val_df = train_test_split(train_df, test_size=0.25, random_state=1, shuffle=True)

In [31]:
print('Train shape',train_df.shape)
print('Validation shape',val_df.shape)
print('Test shape',test_df.shape)

Train shape (13608, 129)
Validation shape (4536, 129)
Test shape (1056, 129)


Keras Model.fit training expects (inputs, labels) pairs. The inputs is tokenized sequences. The labels are the same sequences shifted by 1. This shift is so that at each location input sequence, the label in the next token.

In [32]:
X_train = train_df.drop(columns=train_df.columns[inp_len:].values).values
c_train = train_df.drop(columns=train_df.columns[:inp_len].values).values
y_train = c_train
X_val = val_df.drop(columns=val_df.columns[inp_len:].values).values
c_val = val_df.drop(columns=val_df.columns[:inp_len].values).values
y_val = c_val
X_test = test_df.drop(columns=test_df.columns[inp_len:].values).values
c_test = test_df.drop(columns=test_df.columns[:inp_len].values).values
y_test = c_test

In [33]:
print('X Train shape',X_train.shape)
print('Context Train shape',c_train.shape)
print('Y Train shape',y_train.shape)
print('X Validation shape',X_val.shape)
print('Context Validation shape',c_val.shape)
print('Y Validation shape',y_val.shape)
print('X Test shape',X_test.shape)
print('Context Test shape',c_test.shape)
print('Y Test shape',y_test.shape)

X Train shape (13608, 128)
Context Train shape (13608, 1)
Y Train shape (13608, 1)
X Validation shape (4536, 128)
Context Validation shape (4536, 1)
Y Validation shape (4536, 1)
X Test shape (1056, 128)
Context Test shape (1056, 1)
Y Test shape (1056, 1)


In [34]:
X_train_f = X_train.astype(np.float32)
y_train_t = tf.convert_to_tensor(y_train)
X_val_f = X_val.astype(np.float32)
y_val_t = tf.convert_to_tensor(y_val)
X_test_f = X_test.astype(np.float32)
y_test_t = tf.convert_to_tensor(y_test)

In [35]:
X_train_f[0]

array([ -0.3 ,  -1.55,  -3.15,   0.75,  -0.6 ,   3.8 ,   4.  ,   6.3 ,
        -4.2 ,  -2.85,  -1.95,  -1.8 ,  -3.6 ,   1.6 ,  10.  ,  -0.4 ,
        -4.95,   2.85,  -3.3 ,  -7.95,  -2.2 ,   1.55,   6.2 ,   6.75,
         5.95,   0.05,  -8.05,  -4.9 ,   4.75,   4.4 ,   1.35,   7.55,
         5.45,  -3.25,   3.85,   4.8 ,   3.2 ,   9.8 ,  -9.  ,   0.9 ,
         5.85,   3.75,  -1.  ,  -2.4 , -43.  ,  -3.45, -13.15,  -6.15,
         1.7 ,   2.65,  -5.1 ,   4.5 ,   6.45,   3.  ,   5.  ,  -2.  ,
         4.5 ,   2.2 ,   9.45,   2.85,  -3.4 ,   4.9 ,   1.6 ,   2.2 ,
         4.7 ,   3.05, -11.1 , -13.6 ,   6.7 ,  -0.55, -12.65,  -1.8 ,
        -1.05,  -7.95,  -5.45,   6.3 ,  -4.3 ,  -1.4 ,   3.9 ,   3.35,
         5.55,   3.15,  -8.1 ,   4.3 ,  -1.7 ,  -6.5 ,  -5.95,  -7.2 ,
       -11.9 ,   0.95,   4.75,  -0.8 , -23.  ,  -8.  ,   1.  ,  -2.2 ,
         3.25,  12.25,  -0.3 ,   4.25,   3.9 ,   3.15,  -1.3 ,   0.05,
        -1.95,  -6.  ,   1.35,  -2.35, -23.1 ,  -1.6 ,   7.8 ,   6.8 ,
      

In [36]:
y_train[0]

array([6.1])

In [37]:
y_train_t[0]

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([6.1])>

## Testing tf.signal.stft

In [38]:
spectrogram1 = tf.signal.stft(signals=X_train_f[0],
                              frame_length=512,
                              frame_step=1)

print(X_train[0].shape[0])
print(spectrogram1.shape)
print(spectrogram1)

128
(0, 257)
tf.Tensor([], shape=(0, 257), dtype=complex64)


In [39]:
spectrogram2 = tf.signal.stft(X_train_f[0], frame_length=512, frame_step=256)

print(X_train[0].shape[0])
print(spectrogram2.shape)
print(spectrogram2)

128
(0, 257)
tf.Tensor([], shape=(0, 257), dtype=complex64)


In [40]:
spectrogram3 = tf.signal.stft(X_train_f[0], frame_length=256, frame_step=128)

print(X_train[0].shape[0])
print(spectrogram3.shape)
print(spectrogram3)

128
(0, 129)
tf.Tensor([], shape=(0, 129), dtype=complex64)


In [41]:
spectrogram4 = tf.signal.stft(X_train_f[0], frame_length=16, frame_step=8)

print(X_train[0].shape[0])
print(spectrogram4.shape)
print(spectrogram4)

128
(15, 9)
tf.Tensor(
[[  1.8054483 +0.00000000e+00j   0.28586268-1.04153728e+01j
   -1.2964115 +1.61822300e+01j   4.036834  -7.43957186e+00j
   -9.052942  +4.71164608e+00j   5.419055  -5.98073912e+00j
   -2.9035883 +2.17340231e+00j   7.0582476 -2.95654058e+00j
   -8.899563  +0.00000000e+00j]
 [ -3.4707904 +0.00000000e+00j   0.9565315 -1.02915363e+01j
    2.8802102 +1.59932203e+01j   3.724063  -2.52650785e+00j
  -14.191206  -1.03972435e+01j  13.38335   +1.32400932e+01j
   -6.9802094 -9.09837627e+00j   1.7360549 +2.67506433e+00j
    0.45320177+0.00000000e+00j]
 [  8.88564   +0.00000000e+00j -10.245959  -7.64000702e+00j
   11.99267   +2.33764400e+01j -10.343744  -1.71355629e+01j
    9.089645  -1.64720321e+00j  -3.385545  +6.05266047e+00j
   -2.6426702 -2.31178284e+00j   0.1752491 +1.64821625e+00j
    1.8350694 +0.00000000e+00j]
 [ 23.41183   +0.00000000e+00j -15.997355  +1.11959183e+00j
   -1.1414855 +8.10876465e+00j   2.3638813 -9.22490692e+00j
    7.4834356 +6.16056061e+00j  -3.455758

# Model Architecture <br>
* The querys is what you're trying to find.
* The keys what sort of information the dictionary has.
* The value is that information.


<b>fft_length and frame_length</b>
* The number of frequency bins depends on the FFT size (fft_length), which is often set to the next power of 2 that is greater than or equal to frame_length
* example, if frame_lenght = 39, Next power of 2 greater than 39 is 64. Setting fft_length = 64 would allow the FFT algorithm to work more efficiently by padding the input to a length of 64.
* or choose frame_lenght any of 2, 4, 8, 16, 32, 64, ... <br>

<b>frame_step</b>
* To choose frame_step, 50% Overlap (a common choice): To achieve 50% overlap, set frame_step to half the frame_length: frame_step = frame_length/2
* example, if frame_length=8, then frame_step=8/2 = 4. This will give you overlapping frames, which improves frequency resolution.<br>

<b>Frequency_bins v/s time_bins</b>
*  have more Frequency_bins if identifying a long-term uptrend or downtrend based on weekly or monthly stock price data, or detecting cyclic behavior like seasonality.
* have more time_bins if Detecting intraday trends or anomalies (like a sudden price surge due to a news release).

In [152]:
# Hyperparameters for Fourier Transform
sample_signal_len = X_train.shape[1] # Length of the input time series
sample_frame_length = 8 # window size for Fourier Transform
sample_frame_step = 4 # Hop size for Fourier Transform
sample_fft_length = sample_frame_length

# Hyperparameters for Attention Layer and DNN Layer
sample_num_layers = 2 # number of TransformerEncoderLayer layers (Original paper = 6)
sample_num_heads = 2 # number of self-attention heads in the MultiheadAttention layer (Original paper = 8)
sample_dropout_rate = 0.2 # Dropout rate

In [153]:
# The STFT output shape can be defined as:
# Output Shape = (batch_size, 𝐹, 𝑇)

# Number of Time Frames (T)
sample_time_bins = int(((sample_signal_len - sample_fft_length)/sample_frame_step )+1 )
print('Time Bins =',sample_time_bins)

# Number of Frequency Bins (F)
sample_frequency_bins = int(tf.math.floor((sample_fft_length/2) +1))
print('Frequency Bins =', sample_frequency_bins)

print('output_shape=[batch_size, time_bins, frequency_bins]')
print('Output shape = (batch_size,',sample_time_bins,',',sample_frequency_bins,')')

Time Bins = 31
Frequency Bins = 5
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 31 , 5 )


## The Fourier Transform layer

The output is a 2D array of shape (n_freqs, n_times), where n_freqs is the number of frequency bins (same as the length of f), and n_times is the number of time segments (same as the length of t).
* Rows of Zxx correspond to different frequencies (i.e., values in the f array).
* Columns of Zxx correspond to different time segments (i.e., values in the t array).
* The magnitude of the complex numbers in Zxx (np.abs(Zxx)) represents the strength or amplitude of each frequency component at that time segment.
* The phase of the complex numbers in Zxx (np.angle(Zxx)) represents the phase information of the signal at that frequency and time.
*  A larger window (windows_size, hops) gives better frequency resolution but poorer time resolution. You may need to experiment with different values based on your data.

* tf.py_function allows TensorFlow to execute scipy_stft_fn, passing a tensor as input, converting it to a NumPy array internally, and returning a tensor that is compatible with TensorFlow.
* In the build method of the FourierTransform class, the shape of self.kernel should be a tuple, but it is set as a single value (self.windows_size). self.windows_size should be wrapped in parentheses to specify the shape correctly, assuming it's a 1D kernel.

In [154]:
class FourierTransform(Layer):
  def __init__(self, signal_len, frame_length, frame_step):
    super(FourierTransform, self).__init__()
    self.signal_len = signal_len
    self.frame_length = frame_length
    self.frame_step = frame_step

  def build(self, input_shape):
    # Define weights
    self.kernel = self.add_weight(
        shape=(self.signal_len,), # the use of ',' after self.signal_len is a must, read the above mentioned comments point-2
        initializer=initializer_for_relu,
        trainable=False
    )

  def call(self, x):
    #window_gen = hann_window(self.windows_size)  # symmetric Gaussian window
    # Convert the waveform to a spectrogram via a STFT.
    spectrogram = tf.signal.stft(signals=x, frame_length=self.frame_length, frame_step=self.frame_step)
    magnitude_x = tf.math.abs(spectrogram)
    angle_x = tf.math.angle(spectrogram) # Disable it if using only magnitude as output
    magnitude_x = tf_round(magnitude_x, 4)
    angle_x = tf_round(angle_x, 4) # Disable it if using only magnitude as output
    return magnitude_x, angle_x

In [155]:
# Create an Embedding Object
sft_layer = FourierTransform(signal_len=sample_signal_len, frame_length=sample_frame_length, frame_step=sample_frame_step)
sft_layer.build(X_train_f[:3].shape)
# Calling the function
out_sft_mag, out_sft_ang = sft_layer(X_train_f[:3])

In [156]:
print(out_sft_mag.shape)

(3, 31, 5)


In [157]:
X_train_f[0,:]

array([ -0.3 ,  -1.55,  -3.15,   0.75,  -0.6 ,   3.8 ,   4.  ,   6.3 ,
        -4.2 ,  -2.85,  -1.95,  -1.8 ,  -3.6 ,   1.6 ,  10.  ,  -0.4 ,
        -4.95,   2.85,  -3.3 ,  -7.95,  -2.2 ,   1.55,   6.2 ,   6.75,
         5.95,   0.05,  -8.05,  -4.9 ,   4.75,   4.4 ,   1.35,   7.55,
         5.45,  -3.25,   3.85,   4.8 ,   3.2 ,   9.8 ,  -9.  ,   0.9 ,
         5.85,   3.75,  -1.  ,  -2.4 , -43.  ,  -3.45, -13.15,  -6.15,
         1.7 ,   2.65,  -5.1 ,   4.5 ,   6.45,   3.  ,   5.  ,  -2.  ,
         4.5 ,   2.2 ,   9.45,   2.85,  -3.4 ,   4.9 ,   1.6 ,   2.2 ,
         4.7 ,   3.05, -11.1 , -13.6 ,   6.7 ,  -0.55, -12.65,  -1.8 ,
        -1.05,  -7.95,  -5.45,   6.3 ,  -4.3 ,  -1.4 ,   3.9 ,   3.35,
         5.55,   3.15,  -8.1 ,   4.3 ,  -1.7 ,  -6.5 ,  -5.95,  -7.2 ,
       -11.9 ,   0.95,   4.75,  -0.8 , -23.  ,  -8.  ,   1.  ,  -2.2 ,
         3.25,  12.25,  -0.3 ,   4.25,   3.9 ,   3.15,  -1.3 ,   0.05,
        -1.95,  -6.  ,   1.35,  -2.35, -23.1 ,  -1.6 ,   7.8 ,   6.8 ,
      

In [158]:
# Inspecting the time-component when frequency-component = 3
out_sft_mag[0,2,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([0.2217, 8.9456, 8.038 , 5.2937, 1.0717], dtype=float32)>

In [159]:
# Inspecting the time-component when frequency-component = 12
out_sft_mag[0,6,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 2.0862, 11.6784, 10.6015,  6.0784,  0.7138], dtype=float32)>

In [160]:
# Inspecting the frequency-component when time-component = 3
out_sft_mag[0,:,1]

<tf.Tensor: shape=(31,), dtype=float32, numpy=
array([ 6.4447,  9.3704,  8.9456,  6.3364, 12.9746, 15.7572, 11.6784,
        8.8106, 12.6117,  9.0619, 46.924 , 10.1729, 11.5079,  6.2152,
        2.8106, 11.7051,  6.8868,  7.5537,  1.4782, 10.711 ,  7.0179,
       18.4504, 28.9323, 13.4685,  7.4964,  6.3258, 26.0518,  5.5945,
        0.9124,  4.0801,  9.6956], dtype=float32)>

In [161]:
# Inspecting the frequency-component when time-component = 12
out_sft_mag[0,:,4]

<tf.Tensor: shape=(31,), dtype=float32, numpy=
array([ 4.7543,  6.4127,  1.0717,  2.7613,  3.3069,  0.2886,  0.7138,
        3.0324, 11.4927,  4.2027, 44.7302,  4.5913,  0.0968, 10.6976,
        5.1344,  3.2571,  6.7197,  2.6199,  8.5838,  2.5228,  6.2541,
        6.0962, 12.4307,  4.429 ,  5.0176,  3.0365, 15.2706,  2.4552,
        4.3883,  2.2542,  2.1553], dtype=float32)>

In [162]:
print(out_sft_ang.shape)

(3, 31, 5)


In [163]:
# Inspecting the time-component when frequency-component = 3
out_sft_ang[0,3,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 3.1416, -1.1107, -2.6758,  0.8218,  3.1416], dtype=float32)>

In [164]:
# Inspecting the time-component when frequency-component = 12
out_sft_ang[0,6,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 0.    ,  1.8897, -0.7012,  2.8599,  0.    ], dtype=float32)>

In [165]:
# Inspecting the frequency-component when time-component = 3
out_sft_ang[0,:,1]

<tf.Tensor: shape=(31,), dtype=float32, numpy=
array([ 1.8304, -1.3201,  1.1828, -1.1107,  0.9957, -2.2917,  1.8897,
       -2.5065, -2.9033,  2.6274, -0.1656,  1.5195,  2.8176,  2.2731,
       -1.8676, -2.4177,  1.2807,  0.0792,  0.9447, -2.599 , -1.19  ,
        0.6293, -0.2283,  2.3753, -2.8119, -0.4755,  0.1932, -1.6886,
       -2.3821,  2.5358, -0.4922], dtype=float32)>

In [166]:
# Inspecting the frequency-component when time-component = 12
out_sft_ang[0,:,4]

<tf.Tensor: shape=(31,), dtype=float32, numpy=
array([3.1416, 3.1416, 0.    , 3.1416, 0.    , 3.1416, 0.    , 0.    ,
       3.1416, 3.1416, 3.1416, 3.1416, 3.1416, 0.    , 3.1416, 3.1416,
       0.    , 3.1416, 3.1416, 3.1416, 3.1416, 3.1416, 3.1416, 3.1416,
       3.1416, 0.    , 3.1416, 0.    , 0.    , 3.1416, 3.1416],
      dtype=float32)>

## Custom Attention Block

In [167]:
class BaseAttention(Layer):
  def __init__(self, frequency_bins, time_bins, **kwargs):
    super().__init__()
    self.mha = MultiHeadAttention(key_dim=frequency_bins, kernel_initializer=initializer_for_relu, **kwargs)
    self.layernorm = LayerNormalization()
    self.add = Add()
    self.frequency_bins = frequency_bins
    self.time_bins = time_bins

  def build(self, input_shape):
    # Define weights
    self.kernel = self.add_weight(
        shape=(self.time_bins, self.frequency_bins),
        initializer=initializer_for_relu,
        trainable=True
    )

## self-attention layer - Magnitude

* tf.ensure_shape Updates the shape of a tensor and checks at runtime that the shape holds.
* When executed, this operation asserts that the input tensor x's shape is compatible with the shape argument.

In [168]:
class SelfAttentionMagnitude(BaseAttention):
  def __init__(self, frequency_bins, time_bins, **kwargs):
      # Call the parent class (BaseAttention) constructor
      super().__init__(frequency_bins, time_bins, **kwargs)

  def call(self, magnitude):
    magnitude = tf.ensure_shape(magnitude, [None, self.time_bins, self.frequency_bins])
    attn_output = self.mha(
        query=magnitude,  # The querys is what you're trying to find.
        key=magnitude,  # The keys what sort of information the dictionary has.
        value=magnitude # The value is that information.
        )
    # Cache the attention scores for plotting later.
    #self.last_attn_scores = attn_scores

    x = self.add([magnitude, attn_output])
    x = self.layernorm(x)
    x = tf_round(x, 4)
    return x

In [169]:
sample_lsa = SelfAttentionMagnitude(frequency_bins=sample_frequency_bins,
                                    time_bins=sample_time_bins,
                                    num_heads=sample_num_heads,
                                    dropout=sample_dropout_rate,
                                    kernel_regularizer=l2(0.01),
                                    bias_regularizer=l2(0.01),
                                    activity_regularizer=l2(0.01)
                                    )

In [170]:
#sample_lsa.build(out_sft_mag.shape)
out_lsa_mag = sample_lsa(out_sft_mag)
print(out_sft_mag.shape)
print(out_lsa_mag.shape)

(3, 31, 5)
(3, 31, 5)


In [171]:
out_lsa_mag[0,0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 0.4305,  0.4741,  0.6849, -1.9901,  0.4005], dtype=float32)>

## self-attention layer - Angle

* tf.ensure_shape Updates the shape of a tensor and checks at runtime that the shape holds.
* When executed, this operation asserts that the input tensor x's shape is compatible with the shape argument.

In [172]:
class SelfAttentionAngle(BaseAttention):
  def __init__(self, frequency_bins, time_bins, **kwargs):
      # Call the parent class (BaseAttention) constructor
      super().__init__(frequency_bins, time_bins, **kwargs)

  def call(self, angle):
    angle = tf.ensure_shape(angle, [None, self.time_bins, self.frequency_bins])
    attn_output = self.mha(
        query=angle,  # The querys is what you're trying to find.
        key=angle,  # The keys what sort of information the dictionary has.
        value=angle # The value is that information.
        )
    # Cache the attention scores for plotting later.
    #self.last_attn_scores = attn_scores

    x = self.add([angle, attn_output])
    x = self.layernorm(x)
    x = tf_round(x, 4)
    return x

In [173]:
sample_lsa_angle = SelfAttentionAngle(frequency_bins=sample_frequency_bins,
                                      time_bins=sample_time_bins,
                                      num_heads=sample_num_heads,
                                      dropout=sample_dropout_rate,
                                      #key_dim=sample_d_model
                                      kernel_regularizer=l2(0.01),
                                      bias_regularizer=l2(0.01),
                                      activity_regularizer=l2(0.01)
                                )

In [174]:
#sample_lsa.build(out_sft_mag.shape)
out_lsa_ang = sample_lsa_angle(out_sft_ang)
print(out_sft_ang.shape)
print(out_lsa_ang.shape)

(3, 31, 5)
(3, 31, 5)


In [175]:
out_lsa_ang[0,0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([-1.9134,  0.7932,  0.1177,  0.191 ,  0.8116], dtype=float32)>

## The global cross-attention layer

The similarity (or dot product) between the Query and each Key is computed to determine an attention score. These scores measure how relevant each item (Key) is to the current item (Query).

In [176]:
class GlobalCrossAttention(BaseAttention):
  def __init__(self, frequency_bins, time_bins, **kwargs):
      # Call the parent class (BaseAttention) constructor
      super().__init__(frequency_bins, time_bins, **kwargs)

  def call(self, magnitude, angle):
    magnitude = tf.ensure_shape(angle, [None, self.time_bins, self.frequency_bins])
    angle = tf.ensure_shape(angle, [None, self.time_bins, self.frequency_bins])
    attn_output, attn_scores = self.mha(
        query=magnitude,  # The querys is what you're trying to find.
        key=angle,  # The keys what sort of information the dictionary has.
        value=angle, # The value is that information.
        return_attention_scores=True
        )

    # Cache the attention scores for plotting later.
    self.last_attn_scores = attn_scores

    x = self.add([magnitude, attn_output])
    x = self.layernorm(x)
    x = tf_round(x, 4)
    return x

In [177]:
sample_gca = GlobalCrossAttention(frequency_bins=sample_frequency_bins,
                                  time_bins=sample_time_bins,
                                  num_heads=sample_num_heads,
                                  dropout=sample_dropout_rate,
                                  kernel_regularizer=l2(0.01),
                                  bias_regularizer=l2(0.01),
                                  activity_regularizer=l2(0.01)
                                  #key_dim=sample_d_model
                                  )
#sample_gca.build(out_lsa_ang.shape)

In [178]:
out_gca_mag = sample_gca(magnitude=out_lsa_mag, angle=out_lsa_ang)
print(out_lsa_mag.shape)
print(out_gca_mag.shape)

(3, 31, 5)
(3, 31, 5)


In [179]:
out_gca_mag[0,0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([-1.297 ,  1.2911, -0.063 , -0.8712,  0.9401], dtype=float32)>

## Feed Forward Network Layer

In [180]:
class FeedForward(Layer):
  # dff - dence feed forward neurons
  # sft_len - output shape Fourier Transform
  def __init__(self, frequency_bins, dropout_rate=0.1):
    super().__init__()
    self.seq = Sequential([
      Dense(frequency_bins*3, activation='relu', kernel_initializer=initializer_for_relu),
      Dropout(dropout_rate),
      Dense(frequency_bins*2, activation='relu', kernel_initializer=initializer_for_relu),
      Dropout(dropout_rate),
      Dense(frequency_bins, activation='relu', kernel_initializer=initializer_for_relu),
      Dropout(dropout_rate)
    ])
    self.add = Add()
    self.layer_norm = LayerNormalization()

  def call(self, x):
    x = self.add([x, self.seq(x)])
    x = self.layer_norm(x)
    return x

In [181]:
out_gca_mag.shape[2]

5

In [182]:
sample_ffn = FeedForward(frequency_bins=sample_frequency_bins)
sample_ffn_out = sample_ffn(out_gca_mag)
print(out_gca_mag.shape)
print(sample_ffn_out.shape)

(3, 31, 5)
(3, 31, 5)


In [183]:
sample_ffn_out[0,0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([-1.2179532 ,  0.9815976 ,  1.2526033 , -0.99094856, -0.02529895],
      dtype=float32)>

## Complete Encoder Layer

In [184]:
class EncoderLayer(Layer):
  def __init__(self,*, frequency_bins, time_bins, num_heads, dropout_rate):
    super().__init__()

    self.self_attention = SelfAttentionAngle(frequency_bins=frequency_bins,
                                             time_bins=time_bins,
                                             num_heads=num_heads,
                                             dropout=dropout_rate,
                                             kernel_regularizer=l2(0.01),
                                             bias_regularizer=l2(0.01),
                                             activity_regularizer=l2(0.01)
                                             )

    self.ffn = FeedForward(frequency_bins=frequency_bins)

  def call(self, angle):
    angle = self.self_attention(angle)
    angle = self.ffn(angle)
    return angle

In [185]:
sample_encoder_layer = EncoderLayer(frequency_bins=sample_frequency_bins,
                                    time_bins=sample_time_bins,
                                    num_heads=sample_num_heads,
                                    dropout_rate=sample_dropout_rate)
#sample_encoder_layer.build(out_sft_ang.shape)

In [186]:
sample_encoder_output = sample_encoder_layer(out_sft_ang, training=False)
# Print the shape.
print(out_sft_ang.shape)
print(sample_encoder_output.shape)

(3, 31, 5)
(3, 31, 5)


In [187]:
class Encoder(Layer):
  def __init__(self, *, frequency_bins, time_bins, num_layers, num_heads, dropout_rate):
    super().__init__()

    self.num_layers=num_layers

    self.enc_layers = [ EncoderLayer(frequency_bins=frequency_bins,
                                     time_bins=time_bins,
                                     num_heads=num_heads,
                                     dropout_rate=dropout_rate
                                     ) for _ in range(num_layers)]

  def call(self, angle):
    for i in range(self.num_layers):
      angle = self.enc_layers[i](angle)

    #self.last_attn_scores = self.enc_layers[-1].last_attn_scores
    return angle

In [188]:
# Testing the encoder

# Instantiate the encoder.
sample_encoder = Encoder(frequency_bins=sample_frequency_bins,
                         time_bins=sample_time_bins,
                         num_layers=sample_num_layers,
                         num_heads=sample_num_heads,
                         dropout_rate=sample_dropout_rate)
#sample_encoder.build(out_sft_ang.shape)

In [189]:
sample_encoder_output = sample_encoder(out_sft_ang, training=False)
# Print the shape.
print(out_sft_ang.shape)
print(sample_encoder_output.shape)

(3, 31, 5)
(3, 31, 5)


In [190]:
sample_encoder_output[0,0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([ 1.8190227 , -0.03909664, -0.83668303, -0.9927522 ,  0.04950908],
      dtype=float32)>

## Complete Decoder Layer

Each DecoderLayer containing a CausalSelfAttention, a CrossAttention, and a FeedForward layer:

In [191]:
class DecoderLayer(Layer):
  def __init__(self, *, frequency_bins, time_bins, num_heads, dropout_rate):
    super().__init__()

    self.local_self_att = SelfAttentionAngle(
        frequency_bins=frequency_bins,
        time_bins=time_bins,
        num_heads=num_heads,
        dropout=dropout_rate,
        kernel_regularizer=l2(0.01),
        bias_regularizer=l2(0.01),
        activity_regularizer=l2(0.01)
    )

    self.global_cross_att = GlobalCrossAttention(
        frequency_bins=frequency_bins,
        time_bins=time_bins,
        num_heads=num_heads,
        dropout=dropout_rate,
        kernel_regularizer=l2(0.01),
        bias_regularizer=l2(0.01),
        activity_regularizer=l2(0.01)
    )

    self.ffn = FeedForward(frequency_bins, dropout_rate)

  def call(self, magnitude, angle):
    self_att_out = self.local_self_att(magnitude)
    cross_att_out = self.global_cross_att(magnitude=self_att_out, angle=angle)

    # Cache the last attention scores for plotting later
    self.last_attn_scores = self.global_cross_att.last_attn_scores

    fnn_out = self.ffn(cross_att_out)

    return fnn_out


In [192]:
# Testing decoder layer
sample_decoder_layer = DecoderLayer(frequency_bins=sample_frequency_bins,
                                    time_bins=sample_time_bins,
                                    num_heads=sample_num_heads,
                                    dropout_rate=sample_dropout_rate)
#sample_decoder_layer.build(out_sft_mag.shape)
sample_decoder_layer_output = sample_decoder_layer(magnitude=out_sft_mag, angle=sample_encoder_output)

In [193]:
print(out_sft_mag.shape)
print(sample_decoder_layer_output.shape)

(3, 31, 5)
(3, 31, 5)


In [194]:
sample_decoder_layer_output[0,0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([ 1.2486713 ,  0.9936389 , -0.11534961, -1.3593313 , -0.76762915],
      dtype=float32)>

In [195]:
class Decoder(Layer):
  def __init__(self, *, frequency_bins, time_bins, num_heads, num_layers, dropout_rate=0.1):
    super().__init__()

    self.num_layers=num_layers

    self.decoder_layer = [ DecoderLayer(frequency_bins=frequency_bins,
                                        time_bins=time_bins,
                                        num_heads=num_heads,
                                        dropout_rate=dropout_rate
                                        ) for _ in range(num_layers)]

  def call(self, magnitude, angle):
    for i in range(self.num_layers):
      magnitude  = self.decoder_layer[i](magnitude, angle)

    self.last_attn_scores = self.decoder_layer[-1].last_attn_scores

    return magnitude

In [196]:
# Test the decoder

# Instantiate the decoder.
sample_decoder = Decoder(frequency_bins=sample_frequency_bins,
                         time_bins=sample_time_bins,
                         num_layers=sample_num_layers,
                         num_heads=sample_num_heads,
                         dropout_rate=sample_dropout_rate)
#sample_decoder.build(out_sft_mag.shape)
output = sample_decoder(magnitude=out_sft_mag, angle=sample_encoder_output)

In [197]:
output[0,0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([ 1.1732035 ,  0.2831353 , -0.436536  ,  0.67755735, -1.6973603 ],
      dtype=float32)>

In [198]:
print(out_sft_mag.shape)
print(sample_encoder_output.shape)
print(output.shape)

(3, 31, 5)
(3, 31, 5)
(3, 31, 5)


## Transformer

In [199]:
class Transformer(Model):
  def __init__(self, *, signal_len, frame_length, frame_step,
               frequency_bins, time_bins,
               num_heads, num_layers, dropout_rate=0.1):
    super().__init__()

    self.sft_layer = FourierTransform(signal_len=signal_len,
                                      frame_length=frame_length,
                                      frame_step=frame_step)

    self.encoder = Encoder(frequency_bins=frequency_bins,
                           time_bins=time_bins,
                           num_layers=num_layers,
                           num_heads=num_heads,
                           dropout_rate=dropout_rate)

    self.decoder = Decoder(frequency_bins=frequency_bins,
                           time_bins=time_bins,
                           num_layers=num_layers,
                           num_heads=num_heads,
                           dropout_rate=dropout_rate)

    self.flatten_layer = Flatten()

    self.final_layer = Dense(1) # STUPID !!!!!! - using activation='relu' will limit the output between 0 and infinity, it won't give -ve outputs

  def call(self, inputs):
    # To use a Keras model with `.fit` you must pass all your inputs in the
    # first argument.
    #self.sft_layer.build(inputs.shape)
    magnitude_x, angle_x = self.sft_layer(inputs)

    #self.encoder.build(angle_x.shape)
    enc_out = self.encoder(angle_x)  # (batch_size, frequency_bins, time_bins)

    #self.decoder.build(magnitude_x.shape)
    dec_out = self.decoder(magnitude_x, enc_out)  # (batch_size, frequency_bins, time_bins)

    # Final linear layer output.
    dec_out = self.flatten_layer(dec_out) # (batch_size, frequency_bins * time_bins)
    logits = self.final_layer(dec_out)  # (batch_size, target_len)

    try:
      # Drop the keras mask, so it doesn't scale the losses/metrics.
      # b/250038731
      del logits._keras_mask
    except AttributeError:
      pass

    # Return the final output and the attention weights.
    return logits

# Model 1

## Model building

In [200]:
#Define a model with Transformer layer
tf.keras.backend.clear_session()
#model = tf.keras.Sequential()

In [201]:
# Hyperparameters for Fourier Transform
signal_len = X_train.shape[1] # Length of the input time series
frame_length = 128 # window size for Fourier Transform
frame_step = 64 # Hop size for Fourier Transform
fft_length = frame_length

# Hyperparameters for Attention Layer and DNN Layer
num_layers = 1 # number of TransformerEncoderLayer layers (Original paper = 6)
num_heads = 1 # number of self-attention heads in the MultiheadAttention layer (Original paper = 8)
dropout_rate = 0.3 # Dropout rate

In [202]:
# The STFT output shape can be defined as:
# Output Shape = (batch_size, 𝐹, 𝑇)

# Number of Time Frames (T)
time_bins = int(((signal_len - fft_length)/frame_step )+1 )
print('Time Bins =',time_bins)

# Number of Frequency Bins (F)
frequency_bins = int(tf.math.floor((fft_length/2) +1))
print('Frequency Bins =', frequency_bins)

print('output_shape=[batch_size, time_bins, frequency_bins]')
print('Output shape = (batch_size,',time_bins,',',frequency_bins,')')

Time Bins = 1
Frequency Bins = 65
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 1 , 65 )


In [203]:
transformer = Transformer(signal_len=signal_len,
                          frame_length=frame_length,
                          frame_step=frame_step,
                          frequency_bins=frequency_bins,
                          time_bins=time_bins,
                          num_layers=num_layers,
                          num_heads=num_heads,
                          dropout_rate=dropout_rate)

In [204]:
transformer.build(X_train_f.shape)

In [205]:
transformer_output = transformer(X_train_f[:3])

print(X_train_f[:3].shape)
print(transformer_output.shape)

(3, 128)
(3, 1)


In [206]:
attn_scores = transformer.decoder.decoder_layer[-1].last_attn_scores
print(attn_scores.shape)

(3, 1, 1, 1)


In [207]:
transformer.summary()

## Training

In [208]:
#learning_rate = CustomSchedule(d_model)
optimizer = Adam(0.001, beta_1=0.9, beta_2=0.98, epsilon=1e-8)

In [209]:
transformer.compile(
    loss='mean_squared_error',#'mean_absolute_error',
    optimizer=optimizer,
    metrics=['R2Score'])# 'mean_squared_error'

In [210]:
path_model = '/content/drive/MyDrive/MyColabProject/Models'
checkpoint_filepath = (path_model+'/1SF_nsepy_Model_inp128_out1_fourierTransform.weights.h5')
print(checkpoint_filepath)

/content/drive/MyDrive/MyColabProject/Models/1SF_nsepy_Model_inp128_out1_fourierTransform.weights.h5


In [211]:
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    #monitor='loss',
    mode='min',
    save_best_only=True)

early_stopping = EarlyStopping(monitor="val_loss",
                               #monitor="loss",
                               patience=10, mode="min",
                               restore_best_weights=True)

In [212]:
transformer.fit(X_train_f,y_train,
                epochs=100, batch_size=32,
                validation_data=[X_val_f,y_val],
                callbacks=[model_checkpoint_callback, early_stopping])

Epoch 1/100
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 47ms/step - R2Score: -0.0192 - loss: 85127.5859 - val_R2Score: -0.0026 - val_loss: 9486.4082
Epoch 2/100
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - R2Score: -0.0024 - loss: 6413.4512 - val_R2Score: -0.0015 - val_loss: 1956.3088
Epoch 3/100
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - R2Score: -0.0016 - loss: 1455.6531 - val_R2Score: -0.0037 - val_loss: 564.3473
Epoch 4/100
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - R2Score: -0.0033 - loss: 431.1339 - val_R2Score: -6.1870e-04 - val_loss: 194.8564
Epoch 5/100
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - R2Score: -0.0011 - loss: 147.6359 - val_R2Score: -8.3256e-04 - val_loss: 74.2308
Epoch 6/100
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - R2Score: -0.0011 - loss: 58.3956 - val_R2Score: -3.3367e-04 

<keras.src.callbacks.history.History at 0x7c767048e950>

In [None]:
transformer.fit(X_train_f,y_train,
                initial_epoch=100, epochs=200, batch_size=32,
                validation_data=[X_val_f,y_val],
                callbacks=[model_checkpoint_callback, early_stopping])

In [None]:
transformer.fit(X_train_f,y_train,
                initial_epoch=200, epochs=500, batch_size=32,
                validation_data=[X_val_f,y_val],
                callbacks=[model_checkpoint_callback, early_stopping])

## Prediction - from Transformer

In [213]:
predictions = transformer((X_test_f), training=False)

In [214]:
predictions.shape

TensorShape([1056, 1])

In [215]:
predictions[0]

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.43744284], dtype=float32)>

In [216]:
i = 12
print('Predictions = \n',predictions[i])
print('Actuals = \n',y_test[i])

Predictions = 
 tf.Tensor([2.2952313], shape=(1,), dtype=float32)
Actuals = 
 [1.1]


### Validating the results

In [217]:
y_pred = predictions.numpy().reshape(-1)
y_test = y_test.reshape(-1)
print(y_pred.shape)
print(y_test.shape)

(1056,)
(1056,)


In [218]:
score_mae = mean_absolute_error(y_test, y_pred)
print("The Mean Absolute Error of our Model is {}".format(round(score_mae, 2)))
score_rmse = mean_squared_error(y_test, y_pred)
print("The Root Mean Squared Error of our Model is {}".format(round(score_rmse, 2)))
score_r2 = r2_score(y_test, y_pred)
print("The accuracy of our model is {}%".format(round(score_r2, 2) *100))

The Mean Absolute Error of our Model is 1.72
The Root Mean Squared Error of our Model is 13.72
The accuracy of our model is 55.00000000000001%


In [219]:
fig = go.Figure(data=[go.Ohlc(x=np.arange(0,1000,1),
                    open=y_pred,
                    high=y_pred,
                    low=y_test,
                    close=y_test
                              )])
fig.show()

In [None]:
'''
fig = go.Figure(data=[go.Ohlc(x=np.arange(0,1000,1),
                    open=y_pred,
                    high=y_pred,
                    low=y_test,
                    close=y_test
                              )])
fig.show()
'''

'\nfig = go.Figure(data=[go.Ohlc(x=np.arange(0,1000,1),\n                    open=y_pred,\n                    high=y_pred,\n                    low=y_test,\n                    close=y_test\n                              )])\nfig.show()\n'

 ## Prediction - from Saved model

In [None]:
# Load the model
pre_trained_model_path = '/content/drive/MyDrive/MyColabProject/Models/1SF_nsepy_Model_inp128_out1_fourierTransform.weights.h5'
print(pre_trained_model_path)

/content/drive/MyDrive/hobby_project/Models/1SF_nsepy_Model_inp39_out1_vocab90.weights.weights.h5


In [None]:
transformer.load_weights(pre_trained_model_path)

### Prediction from Pre-trained loaded transformer

In [None]:
predictions_reconstructed_model = transformer(X_test_f, training=False)

In [None]:
i = 1
print('Predictions = \n',predictions_reconstructed_model[i])
print('Actuals = \n',y_test[i])

Predictions = 
 [39 38 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39
 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39]
Actuals = 
 [53  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 89]


### Compare saved model with Actuals

In [None]:
y_preTrained_pred = predictions_reconstructed_model.numpy().reshape(-1)
y_test = y_test.reshape(-1)
print(y_preTrained_pred.shape)
print(y_test.shape)

In [None]:
score_mae = mean_absolute_error(y_test, y_preTrained_pred)
print("The Mean Absolute Error of our Model is {}".format(round(score_mae, 2)))
score_rmse = mean_squared_error(y_test, y_preTrained_pred)
print("The Root Mean Squared Error of our Model is {}".format(round(score_rmse, 2)))
score_r2 = r2_score(y_test, y_preTrained_pred)
print("The accuracy of our model is {}%".format(round(score_r2, 2) *100))

In [None]:
fig = go.Figure(data=[go.Ohlc(x=np.arange(0,1000,1),
                    open=y_preTrained_pred,
                    high=y_preTrained_pred,
                    low=y_test,
                    close=y_test
                              )])
fig.show()

### Exporting the outputs to csv

In [None]:
data_df = pd.read_csv(path+'/OSF_results.csv')
data_df.columns

Index(['Actuals', 'OSF_Model_1', 'OSF_2'], dtype='object')

In [None]:
# Uncomment only if you wish to compare the data in excel sheet
data_df = pd.read_csv(path+'/OSF_results.csv')
#data_df.drop(columns=['Unnamed: 0'], axis=1, inplace=True)

# Using DataFrame.insert() to add a column
#data_df.insert(2, "OSF_2", y_reconstructed_model, True)
data_df.insert(2, "OSF_2_better", y_pred, True)

data_df

Unnamed: 0,Actuals,OSF_Model_1,OSF_2_better,OSF_2
0,542,542,542,542
1,509,509,509,509
2,722,562,586,588
3,517,517,517,517
4,525,525,525,525
...,...,...,...,...
2518,520,520,520,520
2519,500,500,500,500
2520,542,542,542,542
2521,523,523,523,523


In [None]:
data_df.to_csv(path+'/OSF_results.csv',index=False)

# Failed Models

# Success Models

In [None]:
# 1st Best model
# Inputs
'''
X Train shape (13608, 128)
Context Train shape (13608, 1)
Y Train shape (13608, 1)
X Validation shape (4536, 128)
Context Validation shape (4536, 1)
Y Validation shape (4536, 1)
X Test shape (1056, 128)
Context Test shape (1056, 1)
Y Test shape (1056, 1)

data_q1 = data_df.iloc[:4800,:129].copy()
data_q2 = data_df.iloc[:4800,128:257].copy()
data_q3 = data_df.iloc[:4800,256:385].copy()
data_q4 = data_df.iloc[:4800,384:].copy()
data_df_merged = pd.DataFrame()
data_df_merged = pd.concat([pd.DataFrame(data_q1.values),pd.DataFrame(data_q2.values),pd.DataFrame(data_q3.values),pd.DataFrame(data_q4.values)],
                           ignore_index=True)
print(data_df_merged.shape)
data_df_merged.head(3)
'''
# Hyperparameters
'''
# Hyperparameters for Fourier Transform
signal_len = X_train.shape[1] # Length of the input time series
frame_length = 128 # window size for Fourier Transform
frame_step = 64 # Hop size for Fourier Transform
fft_length = frame_length

# Hyperparameters for Attention Layer and DNN Layer
num_layers = 1 # number of TransformerEncoderLayer layers (Original paper = 6)
num_heads = 1 # number of self-attention heads in the MultiheadAttention layer (Original paper = 8)
dropout_rate = 0.3 # Dropout rate

Time Bins = 1
Frequency Bins = 65
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 1 , 65 )
'''
# Weights
'''
Model: "transformer"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ fourier_transform (FourierTransform) │ ?                           │             128 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ encoder (Encoder)                    │ ?                           │          64,350 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ decoder (Decoder)                    │ ?                           │          81,705 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten (Flatten)                    │ (3, 65)                     │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_6 (Dense)                      │ (3, 1)                      │              66 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 146,249 (571.29 KB)
 Trainable params: 146,121 (570.79 KB)
 Non-trainable params: 128 (512.00 B)
'''

# Output
'''
Test
The Mean Absolute Error of our Model is 1.72
The Root Mean Squared Error of our Model is 13.72
The accuracy of our model is 55.00000000000001%