# Import Lib
Neural machine translation with a Transformer and Keras - https://www.tensorflow.org/text/tutorials/transformer#the_transformer

In [1]:
#!pip install tensorflow_text

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [15,8]
import seaborn as sns
import plotly.graph_objects as go

In [3]:
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Embedding, Layer, MultiHeadAttention, LayerNormalization, Conv1D, MaxPool1D
from tensorflow.keras.layers import Add, Dense, Dropout, Flatten, Concatenate, BatchNormalization
from tensorflow.keras.optimizers.schedules import LearningRateSchedule
from tensorflow.math import rsqrt, minimum
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau, Callback
from tensorflow.keras.models import load_model, save_model
from tensorflow.keras.ops import round as tf_round
from tensorflow.keras.regularizers import l2, l1
from tensorflow.keras.backend import set_value
from tensorflow.signal import stft, hann_window

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, mean_absolute_error, mean_squared_error

In [5]:
from tensorflow.keras import initializers

initializer_for_relu = initializers.HeNormal() # For layers with activation function Relu
initializer_for_sigmoid = initializers.GlorotNormal() # For layers with activation function Sigmoid

In [6]:
import warnings
warnings.filterwarnings('ignore')

# Data Preprocessing

## Import Data

In [7]:
# Mount google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
path = '/content/drive/MyDrive/MyColabProject/Data'
data_df = pd.read_csv(path+'/raw_nsepy_inp512_differencedVal_fourierTransform.csv')
data_df.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,512
0,-4.45,6.45,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,...,-3.25,5.7,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35
1,6.45,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,-4.0,...,5.7,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35,2.05
2,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,-4.0,1.5,...,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35,2.05,1.7


In [9]:
data_df.shape

(41412, 513)

## Train Test Split
* symbol =
           'BPCL','POWERGRID','NTPC','SUNPHARMA','TATACONSUM','ONGC','HINDALCO','ICICIBANK','SBIN','BHARTIARTL',
           'WIPRO','ITC','AXISBANK','JSWSTEEL','COALINDIA','HDFCLIFE','TATAMOTORS'
* Open, high, low, close  = 609
* Complete set = 2436, 4872, 9744

In [283]:
data_df.iloc[:2436,256:].shape

(2436, 257)

In [284]:
data_df.iloc[:2436,256:].head(3)

Unnamed: 0,256,257,258,259,260,261,262,263,264,265,...,503,504,505,506,507,508,509,510,511,512
0,2.75,-0.4,-1.65,-0.85,-1.0,0.3,3.9,7.85,2.75,4.3,...,-3.25,5.7,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35
1,-0.4,-1.65,-0.85,-1.0,0.3,3.9,7.85,2.75,4.3,-5.35,...,5.7,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35,2.05
2,-1.65,-0.85,-1.0,0.3,3.9,7.85,2.75,4.3,-5.35,-0.25,...,-5.45,3.2,-4.45,-2.15,-1.05,-1.1,3.05,4.35,2.05,1.7


In [285]:
data_q1 = data_df.iloc[:17052,:129].copy()
data_q2 = data_df.iloc[:17052,128:257].copy()
data_q3 = data_df.iloc[:17052,256:385].copy()
data_q4 = data_df.iloc[:17052,384:].copy()

data_df_merged = pd.DataFrame()
data_df_merged = pd.concat([pd.DataFrame(data_q1.values),pd.DataFrame(data_q2.values),pd.DataFrame(data_q3.values),pd.DataFrame(data_q4.values)],
                           ignore_index=True)
print(data_df_merged.shape)
data_df_merged.head(3)

(68208, 129)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,128
0,-4.45,6.45,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,...,-7.65,-1.25,0.0,8.0,-6.05,1.8,-0.75,-0.9,-0.1,-2.8
1,6.45,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,-4.0,...,-1.25,0.0,8.0,-6.05,1.8,-0.75,-0.9,-0.1,-2.8,-1.5
2,-3.1,3.0,3.1,-2.55,-3.25,9.45,1.55,-5.2,-4.0,1.5,...,0.0,8.0,-6.05,1.8,-0.75,-0.9,-0.1,-2.8,-1.5,0.5


In [286]:
#data_128.head(3)

In [287]:
data_df_merged.tail(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,128
68205,3.4,6.6,1.55,7.45,0.55,-8.2,6.95,11.85,13.3,-5.95,...,7.5,-6.75,30.95,-1.7,17.7,9.65,10.35,5.0,-14.3,1.35
68206,6.6,1.55,7.45,0.55,-8.2,6.95,11.85,13.3,-5.95,17.05,...,-6.75,30.95,-1.7,17.7,9.65,10.35,5.0,-14.3,1.35,-15.05
68207,1.55,7.45,0.55,-8.2,6.95,11.85,13.3,-5.95,17.05,-2.5,...,30.95,-1.7,17.7,9.65,10.35,5.0,-14.3,1.35,-15.05,-10.0


In [288]:
inp_len = 128 # Length of the input
out_len = 1 # Length of Output and Context

In [289]:
prediction_percentage = 0.1 #0.025
train_df, test_df = train_test_split(data_df_merged, test_size=prediction_percentage, random_state=1, shuffle=True)

In [290]:
print('Train shape',train_df.shape)
print('Test shape',test_df.shape)

Train shape (61387, 129)
Test shape (6821, 129)


In [291]:
train_df, val_df = train_test_split(train_df, test_size=0.25, random_state=1, shuffle=True)

In [292]:
print('Train shape',train_df.shape)
print('Validation shape',val_df.shape)
print('Test shape',test_df.shape)

Train shape (46040, 129)
Validation shape (15347, 129)
Test shape (6821, 129)


Keras Model.fit training expects (inputs, labels) pairs. The inputs is tokenized sequences. The labels are the same sequences shifted by 1. This shift is so that at each location input sequence, the label in the next token.

In [293]:
X_train = train_df.drop(columns=train_df.columns[inp_len:].values).values
y_train = train_df.drop(columns=train_df.columns[:inp_len].values).values

X_val = val_df.drop(columns=val_df.columns[inp_len:].values).values
y_val = val_df.drop(columns=val_df.columns[:inp_len].values).values

X_test = test_df.drop(columns=test_df.columns[inp_len:].values).values
y_test = test_df.drop(columns=test_df.columns[:inp_len].values).values


In [294]:
print('X Train shape',X_train.shape)
print('Y Train shape',y_train.shape)

print('X Validation shape',X_val.shape)
print('Y Validation shape',y_val.shape)

print('X Test shape',X_test.shape)
print('Y Test shape',y_test.shape)

X Train shape (46040, 128)
Y Train shape (46040, 1)
X Validation shape (15347, 128)
Y Validation shape (15347, 1)
X Test shape (6821, 128)
Y Test shape (6821, 1)


In [295]:
X_train_f = X_train.astype(np.float32)
y_train_t = tf.convert_to_tensor(y_train)

X_val_f = X_val.astype(np.float32)
y_val_t = tf.convert_to_tensor(y_val)

X_test_f = X_test.astype(np.float32)
y_test_t = tf.convert_to_tensor(y_test)

In [296]:
X_train_f[0]

array([ 4.05,  1.35, -3.  ,  1.9 , -0.5 ,  5.2 , -1.2 ,  1.3 ,  4.2 ,
        0.3 , -1.9 , -1.6 ,  1.45, -1.7 ,  0.6 , -0.65,  1.25, -0.6 ,
       -3.3 , -1.9 ,  3.75,  2.05, -1.7 , -2.45, -3.  ,  1.05,  3.85,
       -1.4 ,  1.2 , -0.75, -0.9 , -0.65, -0.5 , -2.1 , -3.55,  1.5 ,
        0.3 ,  1.  , -3.1 , -0.45,  0.25,  4.  , -0.35, -0.65,  1.75,
       -0.65, -0.55,  1.9 , -0.45, -0.05, -0.7 ,  2.  ,  2.6 ,  5.25,
        0.3 , -2.1 ,  0.  ,  0.8 ,  0.  , -0.25,  0.95,  0.  ,  0.55,
       -2.05, -1.35, -0.65,  1.45, -0.25,  0.75,  0.65, -0.1 ,  4.8 ,
       -0.05,  1.6 ,  0.35,  0.25, -0.7 , -0.2 ,  1.3 , -2.  , -3.3 ,
        4.35,  0.05, -0.9 ,  0.95, -2.05,  0.05,  0.6 , -0.5 ,  1.2 ,
        0.4 ,  0.7 , -0.9 , -0.3 , -0.1 ,  0.75, -0.25,  2.  ,  0.95,
       -0.6 , -0.7 ,  2.1 , -3.25,  0.75, -0.35,  0.65, -0.6 , -2.15,
        0.1 ,  2.35, -1.1 , -0.35, -0.1 , -0.9 ,  1.3 , -0.95, -2.5 ,
        2.7 ,  1.8 , -0.45,  0.35, -1.6 , -0.5 , -2.35, -0.85,  0.55,
        0.05, -1.5 ]

In [297]:
y_train[0]

array([0.65])

In [298]:
y_train_t[0]

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([0.65])>

## Testing tf.signal.stft

In [None]:
spectrogram1 = tf.signal.stft(signals=X_train_f[0],
                              frame_length=512,
                              frame_step=1)

print(X_train[0].shape[0])
print(spectrogram1.shape)
print(spectrogram1)

In [None]:
spectrogram2 = tf.signal.stft(X_train_f[0], frame_length=512, frame_step=256)

print(X_train[0].shape[0])
print(spectrogram2.shape)
print(spectrogram2)

In [None]:
spectrogram3 = tf.signal.stft(X_train_f[0], frame_length=256, frame_step=128)

print(X_train[0].shape[0])
print(spectrogram3.shape)
print(spectrogram3)

In [None]:
spectrogram4 = tf.signal.stft(X_train_f[0], frame_length=16, frame_step=8)

print(X_train[0].shape[0])
print(spectrogram4.shape)
print(spectrogram4)

# Model Architecture <br>
* The querys is what you're trying to find.
* The keys what sort of information the dictionary has.
* The value is that information.


<b>fft_length and frame_length</b>
* The number of frequency bins depends on the FFT size (fft_length), which is often set to the next power of 2 that is greater than or equal to frame_length
* example, if frame_lenght = 39, Next power of 2 greater than 39 is 64. Setting fft_length = 64 would allow the FFT algorithm to work more efficiently by padding the input to a length of 64.
* or choose frame_lenght any of 2, 4, 8, 16, 32, 64, ... <br>

<b>frame_step</b>
* To choose frame_step, 50% Overlap (a common choice): To achieve 50% overlap, set frame_step to half the frame_length: frame_step = frame_length/2
* example, if frame_length=8, then frame_step=8/2 = 4. This will give you overlapping frames, which improves frequency resolution.<br>

<b>Frequency_bins v/s time_bins</b>
*  have more Frequency_bins if identifying a long-term uptrend or downtrend based on weekly or monthly stock price data, or detecting cyclic behavior like seasonality.
* have more time_bins if Detecting intraday trends or anomalies (like a sudden price surge due to a news release).

In [199]:
# Hyperparameters for Fourier Transform
sample_signal_length = [128, 64, 32, 16] # Length of the input time series
sample_frame_length = [64, 32, 16, 8] # window size for Fourier Transform
sample_frame_size = len(sample_frame_length)
sample_frame_step = [32, 16, 8, 4] # Hop size for Fourier Transform
sample_fft_length = sample_frame_length
MAX_INPUT_SIZE = 128

# Hyperparameters for Attention Layer and DNN Layer
sample_num_layers = [1, 2, 4, 6] # number of TransformerEncoderLayer layers (Original paper = 6)
sample_num_heads = [1, 2, 4, 6] # number of self-attention heads in the MultiheadAttention layer (Original paper = 8)
sample_dropout_rate = 0.2 # Dropout rate
sample_regularizer_rate = 0.005 # Use to regularizer the weights in attention model

# Extra Parameters
sample_index_for_testing = 0

In [200]:
int(tf.math.floor((sample_fft_length[sample_index_for_testing]/2) +1))

33

In [201]:
sample_fft_length[sample_index_for_testing]

64

In [202]:
# The STFT output shape can be defined as:
# Output Shape = (batch_size, 𝐹, 𝑇)
sample_time_bins = list()
sample_frequency_bins = list()
for i in np.arange(sample_frame_size):
  print(i)
  # Number of Time Frames (T)
  sample_time_bins.append(int(((sample_signal_length[i] - sample_fft_length[i])/sample_frame_step[i] )+1 ))
  print('Time Bins =',sample_time_bins[i])

  # Number of Frequency Bins (F)
  sample_frequency_bins.append(int(tf.math.floor((sample_fft_length[i]/2) +1)))
  print('Frequency Bins =',sample_frequency_bins[i])

  print('output_shape=[batch_size, time_bins, frequency_bins]')
  print('Output shape = (batch_size,',sample_time_bins[i],',',sample_frequency_bins[i],')\n')

0
Time Bins = 3
Frequency Bins = 33
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 3 , 33 )

1
Time Bins = 3
Frequency Bins = 17
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 3 , 17 )

2
Time Bins = 3
Frequency Bins = 9
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 3 , 9 )

3
Time Bins = 3
Frequency Bins = 5
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 3 , 5 )



In [203]:
tf.math.sqrt(tf.cast(65, dtype=tf.bfloat16))*3

<tf.Tensor: shape=(), dtype=bfloat16, numpy=24.25>

In [204]:
tf.math.sqrt(tf.cast(33, dtype=tf.bfloat16))*3

<tf.Tensor: shape=(), dtype=bfloat16, numpy=17.25>

In [205]:
tf.math.sqrt(tf.cast(17, dtype=tf.bfloat16))*3

<tf.Tensor: shape=(), dtype=bfloat16, numpy=12.375>

In [206]:
tf.math.sqrt(tf.cast(9, dtype=tf.bfloat16))*3

<tf.Tensor: shape=(), dtype=bfloat16, numpy=9>

In [207]:
tf.math.sqrt(tf.cast(5, dtype=tf.bfloat16))*3

<tf.Tensor: shape=(), dtype=bfloat16, numpy=6.6875>

In [208]:
data_512_vals = data_df.iloc[:,:-1].values
data_512_vals

array([[ -4.45,   6.45,  -3.1 , ...,  -1.05,  -1.1 ,   3.05],
       [  6.45,  -3.1 ,   3.  , ...,  -1.1 ,   3.05,   4.35],
       [ -3.1 ,   3.  ,   3.1 , ...,   3.05,   4.35,   2.05],
       ...,
       [  4.4 ,   6.  ,  -6.1 , ..., -14.1 ,  -9.4 , -37.6 ],
       [  6.  ,  -6.1 ,  -2.55, ...,  -9.4 , -37.6 ,   4.75],
       [ -6.1 ,  -2.55, -15.25, ..., -37.6 ,   4.75,  -5.25]])

In [209]:
data_512_vals[:,-sample_frame_length[sample_index_for_testing]:].shape

(41412, 64)

In [210]:
data_512_vals[:,-sample_frame_length[sample_index_for_testing]:]

array([[  1.1 ,   6.8 ,   2.5 , ...,  -1.05,  -1.1 ,   3.05],
       [  6.8 ,   2.5 ,  -2.9 , ...,  -1.1 ,   3.05,   4.35],
       [  2.5 ,  -2.9 ,   4.5 , ...,   3.05,   4.35,   2.05],
       ...,
       [ -5.25,  25.25,  -9.6 , ..., -14.1 ,  -9.4 , -37.6 ],
       [ 25.25,  -9.6 ,  10.  , ...,  -9.4 , -37.6 ,   4.75],
       [ -9.6 ,  10.  ,  11.25, ..., -37.6 ,   4.75,  -5.25]])

In [211]:
data_512_vals[:,-sample_frame_length[1]:].shape

(41412, 32)

In [212]:
data_512_vals[:,-sample_frame_length[1]:]

array([[  1.15,  -4.15,  -1.7 , ...,  -1.05,  -1.1 ,   3.05],
       [ -4.15,  -1.7 ,   3.15, ...,  -1.1 ,   3.05,   4.35],
       [ -1.7 ,   3.15,   0.75, ...,   3.05,   4.35,   2.05],
       ...,
       [ -3.  ,  -0.15, -14.85, ..., -14.1 ,  -9.4 , -37.6 ],
       [ -0.15, -14.85,  13.35, ...,  -9.4 , -37.6 ,   4.75],
       [-14.85,  13.35,   9.05, ..., -37.6 ,   4.75,  -5.25]])

## The Fourier Transform layer

The output is a 2D array of shape (n_freqs, n_times), where n_freqs is the number of frequency bins (same as the length of f), and n_times is the number of time segments (same as the length of t).
* Rows of Zxx correspond to different frequencies (i.e., values in the f array).
* Columns of Zxx correspond to different time segments (i.e., values in the t array).
* The magnitude of the complex numbers in Zxx (np.abs(Zxx)) represents the strength or amplitude of each frequency component at that time segment.
* The phase of the complex numbers in Zxx (np.angle(Zxx)) represents the phase information of the signal at that frequency and time.
*  A larger window (windows_size, hops) gives better frequency resolution but poorer time resolution. You may need to experiment with different values based on your data.

* tf.py_function allows TensorFlow to execute scipy_stft_fn, passing a tensor as input, converting it to a NumPy array internally, and returning a tensor that is compatible with TensorFlow.
* In the build method of the FourierTransform class, the shape of self.kernel should be a tuple, but it is set as a single value (self.windows_size). self.windows_size should be wrapped in parentheses to specify the shape correctly, assuming it's a 1D kernel.

In [213]:
class FourierTransform(Layer):
  def __init__(self, signal_length, frame_length, frame_step):
    super(FourierTransform, self).__init__()
    self.signal_length = signal_length
    self.frame_length = frame_length
    self.frame_step = frame_step

  def build(self, input_shape):
    # Define weights
    self.kernel = self.add_weight(
        shape=(self.signal_length,), # the use of ',' after self.signal_len is a must, read the above mentioned comments point-2
        initializer=initializer_for_relu,
        trainable=False
    )

  def call(self, x):
    #window_gen = hann_window(self.windows_size)  # symmetric Gaussian window
    # Convert the waveform to a spectrogram via a STFT.
    spectrogram = tf.signal.stft(signals=x, frame_length=self.frame_length, fft_length=self.frame_length, frame_step=self.frame_step)
    magnitude_x = tf.math.abs(spectrogram)
    angle_x = tf.math.angle(spectrogram) # Disable it if using only magnitude as output
    magnitude_x = tf_round(magnitude_x, 4)
    angle_x = tf_round(angle_x, 4) # Disable it if using only magnitude as output
    return magnitude_x, angle_x

In [214]:
# Create an Embedding Object
sft_layer = FourierTransform(signal_length=sample_signal_length[sample_index_for_testing],
                             frame_length=sample_frame_length[sample_index_for_testing],
                             frame_step=sample_frame_step[sample_index_for_testing])
sft_layer.build(X_train_f[:3].shape)
# Calling the function
out_sft_mag, out_sft_ang = sft_layer(X_train_f[:3])

In [215]:
print(out_sft_mag.shape)

(3, 3, 33)


In [216]:
X_train_f[0,:]

array([ 14.4 ,   9.6 ,   2.55, -19.9 , -11.85, -13.8 ,  14.  ,  -3.8 ,
        -1.2 ,  -1.9 ,  -6.2 ,   4.  ,   8.75,   0.9 ,   6.65,   5.8 ,
         0.  ,  -2.5 ,  -6.  ,  -2.9 , -31.75,   9.9 , -12.25,   6.5 ,
        -3.2 ,   1.2 ,  12.45,  -0.25,   3.55,  -4.25,   2.45,   8.05,
         1.4 ,   2.8 ,   3.8 ,  -2.95,  -0.05,  -6.4 ,  14.9 ,   0.8 ,
        -4.9 ,   1.85, -11.25,  -6.25,   8.25,  -2.5 ,   2.7 ,   8.7 ,
         2.25,  -1.4 ,   0.15,  -1.9 ,  23.5 , -15.45,  -4.4 ,   9.9 ,
       -21.55,   4.95,  -4.7 ,   4.25,  19.  ,  -0.25,   2.65,  -0.45,
        21.35,  11.05,  -4.85,   3.15,  -5.4 ,  -7.85,  -0.8 ,  -1.7 ,
         7.6 ,  20.5 , -43.  ,  17.  ,  13.95,   5.05,  -1.55,  -0.1 ,
        12.25,  -2.45,   0.35,   1.9 ,  -9.9 ,   4.  ,  -3.1 ,  -5.95,
        -4.65,  -0.55,  -1.25,   5.5 ,   1.05,   4.85,   5.6 ,   4.9 ,
         1.4 ,   2.2 ,  -0.25,  -2.6 ,  10.9 ,  -4.2 ,   0.  ,  15.4 ,
        -7.9 , -16.85,   4.  ,  -6.3 ,   8.3 ,   2.5 ,  11.95,   3.5 ,
      

In [217]:
# Inspecting the time-component when frequency-component = 3
#out_sft_mag[0,1,:]

In [218]:
# Inspecting the time-component when frequency-component = 12
out_sft_mag[0,0,:]

<tf.Tensor: shape=(33,), dtype=float32, numpy=
array([ 9.7705, 20.6572, 28.1555, 50.889 , 38.6572, 37.8122, 41.6774,
       16.2969, 35.6998, 43.1729, 35.1532, 23.718 , 24.1899, 18.4097,
       15.9575, 41.4464, 29.4592, 25.7323, 19.8003, 38.6601, 35.0415,
        3.0897, 38.4422, 40.8258, 37.0218, 65.795 , 63.6118, 64.0544,
       27.4887, 53.2495, 50.4445, 47.0059,  6.1176], dtype=float32)>

In [219]:
# Inspecting the frequency-component when time-component = 3
out_sft_mag[0,:,1]

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([20.6572, 39.3889, 20.9196], dtype=float32)>

In [220]:
# Inspecting the frequency-component when time-component = 12
out_sft_mag[0,:,4]

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([38.6572, 71.1966, 53.685 ], dtype=float32)>

In [221]:
print(out_sft_ang.shape)

(3, 3, 33)


In [222]:
# Inspecting the time-component when frequency-component = 3
out_sft_ang[0,0,:]

<tf.Tensor: shape=(33,), dtype=float32, numpy=
array([ 0.    ,  2.3238, -0.1959, -2.6284,  1.0852,  0.1216, -2.4624,
        2.3168,  0.7266, -2.5798, -0.2437,  2.1116, -0.7534, -3.0232,
        2.383 , -0.7338,  2.4438,  0.7582, -2.9673, -1.6526,  1.4178,
       -1.4689, -0.4826,  2.0093,  3.0367, -0.902 ,  2.9747,  0.349 ,
       -1.1963,  2.8863,  0.8667, -1.6162,  3.1416], dtype=float32)>

In [223]:
# Inspecting the time-component when frequency-component = 12
#out_sft_ang[0,6,:]

In [224]:
# Inspecting the frequency-component when time-component = 3
out_sft_ang[0,:,1]

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([2.3238, 3.127 , 2.6034], dtype=float32)>

In [225]:
# Inspecting the frequency-component when time-component = 12
out_sft_ang[0,:,4]

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([1.0852, 0.5318, 0.4572], dtype=float32)>

## Custom Attention Block

In [226]:
class BaseAttention(Layer):
  def __init__(self, frequency_bins, time_bins, **kwargs):
    super().__init__()
    self.mha = MultiHeadAttention(key_dim = frequency_bins,
                                  kernel_initializer = initializer_for_relu,
                                  **kwargs)
    self.layernorm = LayerNormalization()
    self.add = Add()
    self.frequency_bins = frequency_bins
    self.time_bins = time_bins

  def build(self, input_shape):
    # Define weights
    self.kernel = self.add_weight(
        shape=(self.time_bins, self.frequency_bins),
        initializer=initializer_for_relu,
        trainable=True
    )

## self-attention layer - Magnitude and Angle

* tf.ensure_shape Updates the shape of a tensor and checks at runtime that the shape holds.
* When executed, this operation asserts that the input tensor x's shape is compatible with the shape argument.

In [227]:
class SelfAttention(BaseAttention):
  def __init__(self, frequency_bins, time_bins, **kwargs):
      # Call the parent class (BaseAttention) constructor
      super().__init__(frequency_bins, time_bins, **kwargs)

  def call(self, magnitude):
    magnitude = tf.ensure_shape(magnitude, [None, self.time_bins, self.frequency_bins])
    attn_output = self.mha(
        query=magnitude,  # The querys is what you're trying to find.
        key=magnitude,  # The keys what sort of information the dictionary has.
        value=magnitude # The value is that information.
        )
    # Cache the attention scores for plotting later.
    #self.last_attn_scores = attn_scores

    x = self.add([magnitude, attn_output])
    x = self.layernorm(x)
    x = tf_round(x, 4)
    return x

In [228]:
sample_lsa = SelfAttention(frequency_bins=sample_frequency_bins[sample_index_for_testing],
                           time_bins=sample_time_bins[sample_index_for_testing],
                           num_heads=sample_num_heads[sample_index_for_testing],
                           dropout=sample_dropout_rate,
                           kernel_regularizer=l2(sample_regularizer_rate),
                           bias_regularizer=l2(sample_regularizer_rate),
                           activity_regularizer=l2(sample_regularizer_rate)
                                    )

In [229]:
#sample_lsa.build(out_sft_mag.shape)
out_lsa_mag = sample_lsa(out_sft_mag)
print(out_sft_mag.shape)
print(out_lsa_mag.shape)

(3, 3, 33)
(3, 3, 33)


In [230]:
out_lsa_mag[0,0,:]

<tf.Tensor: shape=(33,), dtype=float32, numpy=
array([-0.2925, -0.3304, -1.691 , -0.5044,  0.9516,  0.4992, -1.6858,
        0.9333,  0.4588,  1.2579,  1.413 , -1.2895,  0.3402, -0.7383,
        1.2459,  0.2688, -1.7336,  0.1832, -0.005 , -0.7046,  0.4651,
       -0.9777,  2.1608,  0.0628, -0.5986, -1.0667,  0.6255,  1.5333,
       -0.7126, -0.6018, -0.7066, -0.1952,  1.4351], dtype=float32)>

## The global cross-attention layer

The similarity (or dot product) between the Query and each Key is computed to determine an attention score. These scores measure how relevant each item (Key) is to the current item (Query).

In [231]:
class GlobalCrossAttention(BaseAttention):
  def __init__(self, frequency_bins, time_bins, **kwargs):
      # Call the parent class (BaseAttention) constructor
      super().__init__(frequency_bins, time_bins, **kwargs)

  def call(self, magnitude, angle):
    magnitude = tf.ensure_shape(angle, [None, self.time_bins, self.frequency_bins])
    angle = tf.ensure_shape(angle, [None, self.time_bins, self.frequency_bins])
    attn_output, attn_scores = self.mha(
        query=magnitude,  # The querys is what you're trying to find.
        key=angle,  # The keys what sort of information the dictionary has.
        value=angle, # The value is that information.
        return_attention_scores=True
        )

    # Cache the attention scores for plotting later.
    self.last_attn_scores = attn_scores

    x = self.add([magnitude, attn_output])
    x = self.layernorm(x)
    x = tf_round(x, 4)
    return x

In [232]:
sample_gca = GlobalCrossAttention(frequency_bins=sample_frequency_bins[sample_index_for_testing],
                                  time_bins=sample_time_bins[sample_index_for_testing],
                                  num_heads=sample_num_heads[sample_index_for_testing],
                                  kernel_regularizer=l2(sample_regularizer_rate),
                                  bias_regularizer=l2(sample_regularizer_rate),
                                  activity_regularizer=l2(sample_regularizer_rate)
                                  )
#sample_gca.build(out_lsa_ang.shape)

In [233]:
out_gca_mag = sample_gca(magnitude=out_lsa_mag, angle=out_lsa_mag)
print(out_lsa_mag.shape)
print(out_gca_mag.shape)

(3, 3, 33)
(3, 3, 33)


In [234]:
out_gca_mag[0,0,:]

<tf.Tensor: shape=(33,), dtype=float32, numpy=
array([ 0.8373, -0.9443, -1.4611,  0.1424,  0.3965, -0.3603, -1.0156,
        0.9383, -0.4016, -0.8531,  0.6377,  1.0105, -0.0267,  0.8164,
        0.9347,  1.1039, -1.6128,  0.752 ,  0.5584, -0.9602, -1.0135,
        0.3422,  1.3949, -1.9537,  0.4286, -1.1298,  0.7892,  2.2719,
        0.9016, -1.1277, -0.9899, -0.5692,  0.1635], dtype=float32)>

## Feed Forward Network Layer

In [235]:
class FeedForward(Layer):
  # dff - dence feed forward neurons
  # sft_len - output shape Fourier Transform
  def __init__(self, frequency_bins, time_bins, regularizer_rate, dropout_rate=0.1):
    super().__init__()
    self.seq = Sequential([
        #Dense(tf.get_static_value(tf.cast(tf.math.sqrt(tf.cast(frequency_bins, dtype=tf.bfloat16))*time_bins, dtype=tf.int32)),
        #Dense(tf.get_static_value(tf.cast(frequency_bins*time_bins, dtype=tf.int32)),
        #      kernel_regularizer=l2(regularizer_rate),
        #      bias_regularizer=l2(regularizer_rate),
        #      activity_regularizer=l2(regularizer_rate),
        #      activation='relu', kernel_initializer=initializer_for_relu),
        #Dropout(dropout_rate),

        #Dense(tf.get_static_value(tf.cast(frequency_bins*time_bins, dtype=tf.int32)),
              #kernel_regularizer=l2(regularizer_rate),
              #bias_regularizer=l2(regularizer_rate),
              #activity_regularizer=l2(regularizer_rate),
        #      activation='relu', kernel_initializer=initializer_for_relu),
        #Dropout(dropout_rate),

        Dense(tf.get_static_value(tf.cast(frequency_bins, dtype=tf.int32)),
              #kernel_regularizer=l2(regularizer_rate),
              #bias_regularizer=l2(regularizer_rate),
              #activity_regularizer=l2(regularizer_rate),
              activation='relu', kernel_initializer=initializer_for_relu),
        Dropout(dropout_rate),

        Dense(frequency_bins
              #kernel_regularizer=l2(regularizer_rate),
              #bias_regularizer=l2(regularizer_rate),
              #activity_regularizer=l2(regularizer_rate)
              ) # NO Activation Function, to predict linear values as given in original paper
    ])
    self.add = Add()
    self.layer_norm = LayerNormalization()

  def call(self, x):
    x = self.add([x, self.seq(x)])
    x = self.layer_norm(x)
    return x

In [236]:
sample_ffn = FeedForward(frequency_bins=sample_frequency_bins[sample_index_for_testing],
                         time_bins=sample_time_bins[sample_index_for_testing],
                         regularizer_rate=sample_regularizer_rate)
sample_ffn_out = sample_ffn(out_gca_mag)
print(out_gca_mag.shape)
print(sample_ffn_out.shape)

(3, 3, 33)
(3, 3, 33)


In [237]:
sample_ffn_out[0,0,:]

<tf.Tensor: shape=(33,), dtype=float32, numpy=
array([ 0.83900315, -1.148834  , -0.5942258 ,  0.1771884 , -0.8024908 ,
        0.73828274, -1.5770444 ,  1.5407318 , -1.7301832 ,  0.17747828,
        1.2425725 ,  0.5957787 , -1.3207335 ,  1.5138462 , -0.26855853,
        0.21270603, -0.66509116,  0.9423366 ,  0.5890055 , -0.7863967 ,
        0.1938309 ,  0.38184282,  1.04177   , -1.9637449 , -0.00738339,
       -1.4108367 ,  1.4866147 ,  1.3431258 ,  0.9751418 , -0.66946715,
       -0.69446313, -0.38266706,  0.03086461], dtype=float32)>

## Complete Encoder Layer

In [238]:
class EncoderLayer(Layer):
  def __init__(self,*, frequency_bins, time_bins, num_heads, dropout_rate, regularizer_rate):
    super().__init__()

    self.self_attention = SelfAttention(frequency_bins=frequency_bins,
                                        time_bins=time_bins,
                                        num_heads=num_heads,
                                        dropout=dropout_rate,
                                        kernel_regularizer=l2(regularizer_rate),
                                        bias_regularizer=l2(regularizer_rate),
                                        activity_regularizer=l2(regularizer_rate)
                                             )

    #self.ffn = FeedForward(frequency_bins=frequency_bins, time_bins=time_bins, regularizer_rate=regularizer_rate, dropout_rate=dropout_rate)

  def call(self, angle):
    angle = self.self_attention(angle)
    #angle = self.ffn(angle)
    return angle

In [239]:
sample_encoder_layer = EncoderLayer(frequency_bins=sample_frequency_bins[sample_index_for_testing],
                                    time_bins=sample_time_bins[sample_index_for_testing],
                                    num_heads=sample_num_heads[sample_index_for_testing],
                                    dropout_rate=sample_dropout_rate,
                                    regularizer_rate=sample_regularizer_rate)
#sample_encoder_layer.build(out_sft_ang.shape)

In [240]:
sample_encoder_output = sample_encoder_layer(out_sft_ang, training=False)
# Print the shape.
print(out_sft_ang.shape)
print(sample_encoder_output.shape)

(3, 3, 33)
(3, 3, 33)


In [241]:
class Encoder(Layer):
  def __init__(self, *, frequency_bins, time_bins, num_layers, num_heads, dropout_rate, regularizer_rate):
    super().__init__()

    self.num_layers=num_layers

    self.enc_layers = [ EncoderLayer(frequency_bins=frequency_bins,
                                     time_bins=time_bins,
                                     num_heads=num_heads,
                                     dropout_rate=dropout_rate,
                                     regularizer_rate=regularizer_rate
                                     ) for _ in range(num_layers)]

  def call(self, angle):
    for i in range(self.num_layers):
      angle = self.enc_layers[i](angle)

    #self.last_attn_scores = self.enc_layers[-1].last_attn_scores
    return angle

In [242]:
# Testing the encoder

# Instantiate the encoder.
sample_encoder = Encoder(frequency_bins=sample_frequency_bins[sample_index_for_testing],
                         time_bins=sample_time_bins[sample_index_for_testing],
                         num_layers=sample_num_layers[sample_index_for_testing],
                         num_heads=sample_num_heads[sample_index_for_testing],
                         dropout_rate=sample_dropout_rate,
                         regularizer_rate=sample_regularizer_rate)
#sample_encoder.build(out_sft_ang.shape)

In [243]:
sample_encoder_output = sample_encoder(out_sft_ang, training=False)
# Print the shape.
print(out_sft_ang.shape)
print(sample_encoder_output.shape)

(3, 3, 33)
(3, 3, 33)


In [244]:
sample_encoder_output[0,0,:]

<tf.Tensor: shape=(33,), dtype=float32, numpy=
array([-0.8524,  1.4569, -1.4672, -1.2711, -0.3202,  0.8219, -2.5074,
       -0.3202, -0.2242, -0.025 ,  1.1535, -0.4706,  1.5152,  0.2639,
        0.3599, -0.7566,  1.6819, -0.1221,  1.1399, -1.3601,  0.3897,
       -0.9401,  0.3852, -0.1876,  1.7202, -0.0955, -0.358 ,  0.0739,
        1.1337,  0.9129, -1.2822, -0.6512,  0.2032], dtype=float32)>

## Complete Decoder Layer

Each DecoderLayer containing a CausalSelfAttention, a CrossAttention, and a FeedForward layer:

In [245]:
class DecoderLayer(Layer):
  def __init__(self, *, frequency_bins, time_bins, num_heads, dropout_rate, regularizer_rate):
    super().__init__()

    self.local_self_att = SelfAttention(
        frequency_bins=frequency_bins,
        time_bins=time_bins,
        num_heads=num_heads,
        dropout=dropout_rate,
        kernel_regularizer=l2(regularizer_rate),
        bias_regularizer=l2(regularizer_rate),
        activity_regularizer=l2(regularizer_rate)
    )

    self.global_cross_att = GlobalCrossAttention(
        frequency_bins=frequency_bins,
        time_bins=time_bins,
        num_heads=num_heads,
        dropout=dropout_rate,
        kernel_regularizer=l2(regularizer_rate),
        bias_regularizer=l2(regularizer_rate),
        activity_regularizer=l2(regularizer_rate)
    )

    self.ffn = FeedForward(frequency_bins=frequency_bins, time_bins=time_bins, regularizer_rate=regularizer_rate, dropout_rate=dropout_rate)

  def call(self, magnitude, angle):
    self_att_out = self.local_self_att(magnitude)
    cross_att_out = self.global_cross_att(magnitude=self_att_out, angle=angle)

    # Cache the last attention scores for plotting later
    self.last_attn_scores = self.global_cross_att.last_attn_scores

    fnn_out = self.ffn(cross_att_out)

    return fnn_out

In [246]:
# Testing decoder layer
sample_decoder_layer = DecoderLayer(frequency_bins=sample_frequency_bins[sample_index_for_testing],
                                    time_bins=sample_time_bins[sample_index_for_testing],
                                    num_heads=sample_num_heads[sample_index_for_testing],
                                    dropout_rate=sample_dropout_rate,
                                    regularizer_rate=sample_regularizer_rate)
#sample_decoder_layer.build(out_sft_mag.shape)
sample_decoder_layer_output = sample_decoder_layer(magnitude=out_sft_mag, angle=sample_encoder_output)

In [247]:
print(out_sft_mag.shape)
print(sample_decoder_layer_output.shape)

(3, 3, 33)
(3, 3, 33)


In [248]:
sample_decoder_layer_output[0,0,:]

<tf.Tensor: shape=(33,), dtype=float32, numpy=
array([-1.0832957 , -0.48015666, -1.7773705 , -0.8542696 ,  0.73512083,
        1.2747847 , -1.2799056 ,  0.4416815 ,  0.337475  ,  0.05064895,
        1.8690975 , -1.1236825 ,  1.7024299 ,  0.34599602,  0.19735035,
       -0.33404619,  1.858329  , -0.00996634,  2.436176  , -1.3924024 ,
        0.2705691 , -0.7671325 , -0.5271646 ,  0.41469282, -0.28482014,
       -0.65390813,  0.5975116 , -0.00272809, -0.2870841 , -0.8012094 ,
        0.45629263, -1.1756694 , -0.15334451], dtype=float32)>

In [249]:
class Decoder(Layer):
  def __init__(self, *, frequency_bins, time_bins, num_heads, num_layers, regularizer_rate, dropout_rate=0.1):
    super().__init__()

    self.num_layers=num_layers

    self.decoder_layer = [ DecoderLayer(frequency_bins=frequency_bins,
                                        time_bins=time_bins,
                                        num_heads=num_heads,
                                        dropout_rate=dropout_rate,
                                        regularizer_rate=regularizer_rate
                                        ) for _ in range(num_layers)]

  def call(self, magnitude, angle):
    for i in range(self.num_layers):
      magnitude  = self.decoder_layer[i](magnitude, angle)

    self.last_attn_scores = self.decoder_layer[-1].last_attn_scores

    return magnitude

In [250]:
# Test the decoder

# Instantiate the decoder.
sample_decoder = Decoder(frequency_bins=sample_frequency_bins[sample_index_for_testing],
                         time_bins=sample_time_bins[sample_index_for_testing],
                         num_layers=sample_num_layers[sample_index_for_testing],
                         num_heads=sample_num_heads[sample_index_for_testing],
                         dropout_rate=sample_dropout_rate,
                         regularizer_rate=sample_regularizer_rate)
#sample_decoder.build(out_sft_mag.shape)
output = sample_decoder(magnitude=out_sft_mag, angle=sample_encoder_output)

In [251]:
output[0,0,:]

<tf.Tensor: shape=(33,), dtype=float32, numpy=
array([ 0.33691075,  1.659502  , -1.4174569 , -2.4530344 ,  1.5540025 ,
       -0.8381461 , -0.12992863,  0.6152983 ,  0.70704776,  0.09831578,
       -0.39514607, -0.34276944, -0.19086851, -1.5986297 ,  1.0888466 ,
       -0.26272422,  1.1952305 , -0.49649405,  0.9377551 , -0.16314377,
       -0.82847005,  0.35413635,  0.07650726, -1.2396945 ,  1.2003206 ,
        0.14341435, -0.79909825, -0.10054268,  0.5674903 ,  0.5947427 ,
       -1.9386165 ,  0.63120055,  1.434043  ], dtype=float32)>

In [252]:
print(out_sft_mag.shape)
print(sample_encoder_output.shape)
print(output.shape)

(3, 3, 33)
(3, 3, 33)
(3, 3, 33)


## Transformer

In [253]:
class TransformerLayer(Layer):
  def __init__(self, *, signal_length, max_signal_length, frame_length, frame_step,
               #frequency_bins, time_bins,
               num_heads, num_layers, dropout_rate=0.1, regularizer_rate):
    super().__init__()

    self.time_bins = tf.cast(((signal_length - frame_length)/frame_step)+1 , dtype=tf.int32)

    self.frequency_bins = tf.cast(tf.math.floor((frame_length/2) +1), dtype=tf.int32)

    self.sft_layer = FourierTransform(signal_length=signal_length,
                                      frame_length=frame_length,
                                      frame_step=frame_step)

    self.encoder = Encoder(frequency_bins=tf.get_static_value(self.frequency_bins),
                           time_bins=tf.get_static_value(self.time_bins),
                           num_layers=num_layers,
                           num_heads=num_heads,
                           dropout_rate=dropout_rate,
                           regularizer_rate=regularizer_rate)

    self.decoder = Decoder(frequency_bins=tf.get_static_value(self.frequency_bins),
                           time_bins=tf.get_static_value(self.time_bins),
                           num_layers=num_layers,
                           num_heads=num_heads,
                           dropout_rate=dropout_rate,
                           regularizer_rate=regularizer_rate)

    self.flatten_layer = Flatten()

    self.dense_layer = Dense(signal_length,
                              activation='relu', kernel_initializer=initializer_for_relu
                              #kernel_regularizer=l2(regularizer_rate),
                              #bias_regularizer=l2(regularizer_rate),
                              #activity_regularizer=l2(regularizer_rate)
                             )

    #self.conv_layer = Conv1D(filters=self.filter_size,
    #                         kernel_size=[tf.get_static_value(self.time_bins)],
    #                         strides=[tf.get_static_value(self.time_bins)],
    #                         #activation='relu', kernel_initializer=initializer_for_relu,
    #                         input_shape = (tf.get_static_value(self.time_bins),tf.get_static_value(self.frequency_bins)))

  def call(self, inputs):
    # To use a Keras model with `.fit` you must pass all your inputs in the
    # first argument.
    magnitude_x, angle_x = self.sft_layer(inputs)

    enc_out = self.encoder(angle_x)  # (batch_size, time_bins, frequency_bins)

    dec_out = self.decoder(magnitude_x, enc_out)  # (batch_size, time_bins, frequency_bins)

    # Reduce the dimentionality.
    #dec_out = self.conv_layer(dec_out)

    # Final linear layer output.
    dec_out = self.flatten_layer(dec_out) # (batch_size, time_bins * frequency_bins)

    # Reduce the dimentionality.
    dec_out = self.dense_layer(dec_out)
    # Return the output and the attention weights.
    return dec_out


In [254]:
sample_transformer_layer = TransformerLayer(signal_length=sample_signal_length[sample_index_for_testing],
                                            max_signal_length = MAX_INPUT_SIZE,
                                            frame_length=sample_frame_length[sample_index_for_testing],
                                            frame_step=sample_frame_step[sample_index_for_testing],
                                            num_layers=sample_num_layers[sample_index_for_testing],
                                            num_heads=sample_num_heads[sample_index_for_testing],
                                            dropout_rate=sample_dropout_rate,
                                            regularizer_rate=sample_regularizer_rate)

sample_transformer_layer_output = sample_transformer_layer(X_train_f[:3])

In [255]:
sample_transformer_layer_output.shape

TensorShape([3, 128])

## Inception

In [256]:
class Inception(Model):
  def __init__(self, *, signal_length, max_signal_length, frame_length, frame_step,
              #frequency_bins, time_bins,
              num_heads, num_layers, dropout_rate=0.1, regularizer_rate):
    super().__init__()
    self.iteration_len = tf.cast(tf.get_static_value(tf.size(frame_length)), dtype=tf.int32)
    self.signal_length = signal_length

    self.transformer = [ TransformerLayer(signal_length=signal_length[i], max_signal_length=max_signal_length,
                                          frame_length=frame_length[i], frame_step=frame_step[i],
                                        num_heads=num_heads[i],num_layers=num_layers[i], regularizer_rate=regularizer_rate,
                                        dropout_rate=dropout_rate
                                     ) for i in range(self.iteration_len)]

    self.final_layer = Dense(1) # STUPID !!!!!! - using activation='relu' will limit the output between 0 and infinity, it won't give -ve outputs

  def call(self, inputs):
  # To use a Keras model with `.fit` you must pass all your inputs in the first argument.
    concat_out = None
    for i in range(self.iteration_len):
      inputs = inputs[:,-self.signal_length[i]:]
      transformer_out = self.transformer[i](inputs)
      if i == 0:
        concat_out = transformer_out
      else:
        concat_out = tf.concat([concat_out, transformer_out], 1)

    logits = self.final_layer(concat_out)  # (batch_size, target_len)

    try:
      # Drop the keras mask, so it doesn't scale the losses/metrics.
      del logits._keras_mask
    except AttributeError:
      pass

    # Return the final output and the attention weights.
    return logits

In [257]:

sample_transformer = Inception(signal_length=sample_signal_length,
                               max_signal_length = MAX_INPUT_SIZE,
                               frame_length=sample_frame_length,
                               frame_step=sample_frame_step,
                               num_layers=sample_num_layers,
                               num_heads=sample_num_heads,
                               dropout_rate=sample_dropout_rate,
                               regularizer_rate=sample_regularizer_rate)

sample_transformer_output = sample_transformer(X_train_f[:3])
sample_transformer_output


<tf.Tensor: shape=(3, 1), dtype=float32, numpy=
array([[-0.13646135],
       [ 1.2114104 ],
       [ 0.39865   ]], dtype=float32)>

# Model 1

## Model building

In [258]:
#Define a model with Transformer layer
tf.keras.backend.clear_session()
#model = tf.keras.Sequential()

In [259]:
# Hyperparameters for Fourier Transform
signal_length = [128, 128, 64, 64, 64, 32] # Length of the input time series
frame_length = [16, 16, 16, 16, 8, 16] # Hop size for Fourier Transform
frame_size = len(frame_length)
frame_step = [8, 4, 4, 2, 4, 2] # Hop size for Fourier Transform
fft_length = frame_length
MAX_INPUT_SIZE = 128

# Hyperparameters for Attention Layer and DNN Layer
num_layers = [1, 1, 1, 1, 1, 1] # number of TransformerEncoderLayer layers (Original paper = 6)
num_heads = [1, 1, 1, 1, 2, 2] # number of self-attention heads in the MultiheadAttention layer (Original paper = 8)
dropout_rate = 0.3 # Dropout rate
regularizer_rate = 0.0001  # Use to regularizer the weights in attention model

In [260]:
# The STFT output shape can be defined as:
# Output Shape = (batch_size, 𝐹, 𝑇)
time_bins = list()
frequency_bins = list()
for i in np.arange(frame_size):
  print(i)
  # Number of Time Frames (T)
  time_bins.append(int(((signal_length[i] - fft_length[i])/frame_step[i] )+1 ))
  print('Time Bins =',time_bins[i])

  # Number of Frequency Bins (F)
  frequency_bins.append(int(tf.math.floor((fft_length[i]/2) +1)))
  print('Frequency Bins =',frequency_bins[i])

  print('output_shape=[batch_size, time_bins, frequency_bins]')
  print('Output shape = (batch_size,',time_bins[i],',',frequency_bins[i],')\n')

0
Time Bins = 15
Frequency Bins = 9
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 15 , 9 )

1
Time Bins = 29
Frequency Bins = 9
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 29 , 9 )

2
Time Bins = 13
Frequency Bins = 9
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 13 , 9 )

3
Time Bins = 25
Frequency Bins = 9
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 25 , 9 )

4
Time Bins = 15
Frequency Bins = 5
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 15 , 5 )

5
Time Bins = 9
Frequency Bins = 9
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 9 , 9 )



In [261]:
transformer = Inception(signal_length=signal_length,
                        max_signal_length=MAX_INPUT_SIZE,
                        frame_length=frame_length,
                        frame_step=frame_step,
                        num_layers=num_layers,
                        num_heads=num_heads,
                        dropout_rate=dropout_rate,
                        regularizer_rate=regularizer_rate)

In [262]:
#transformer.build(X_train_f.shape)

In [263]:
transformer_output = transformer(X_train_f[:3])

print(X_train_f[:3].shape)
print(transformer_output.shape)

(3, 128)
(3, 1)


In [264]:
transformer.summary()

## Training

In [None]:
# Define the learning rate variable
optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.98, epsilon=1e-8, clipnorm=1.0)

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001, verbose=2)

In [None]:
transformer.compile(
    loss='mean_squared_error',#'mean_absolute_error',
    optimizer= optimizer,
    metrics=['R2Score'])# 'mean_squared_error'

In [None]:
path_model = '/content/drive/MyDrive/hobby_project/Models'
checkpoint_filepath = (path_model+'/Idea2_1SF_nsepy_Model_inp128_out1_fourierTransform_withInception.weights.h5')
print(checkpoint_filepath)

/content/drive/MyDrive/hobby_project/Models/Idea2_1SF_nsepy_Model_inp128_out1_fourierTransform_withInception.weights.h5


In [None]:
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True)

early_stopping = EarlyStopping(monitor="val_loss",
                               patience=10,
                               mode="min",
                               restore_best_weights=True)

In [None]:
history_1 = transformer.fit(X_train_f,y_train,
                            epochs=25, batch_size=1024,
                            shuffle = True,
                            validation_data=[X_val_f,y_val],
                            callbacks=[model_checkpoint_callback, early_stopping, reduce_lr])

Epoch 1/25
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 402ms/step - R2Score: 0.0064 - loss: 287989.4375 - val_R2Score: 0.0263 - val_loss: 199261.7344 - learning_rate: 0.0010
Epoch 2/25
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 243ms/step - R2Score: 0.0519 - loss: 183044.7500 - val_R2Score: 0.0569 - val_loss: 136909.7344 - learning_rate: 0.0010
Epoch 3/25
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 247ms/step - R2Score: 0.1049 - loss: 126496.7188 - val_R2Score: 0.0964 - val_loss: 96794.3672 - learning_rate: 0.0010
Epoch 4/25
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 242ms/step - R2Score: 0.1734 - loss: 89958.9609 - val_R2Score: 0.1488 - val_loss: 69798.5234 - learning_rate: 0.0010
Epoch 5/25
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 232ms/step - R2Score: 0.2522 - loss: 65316.0000 - val_R2Score: 0.2075 - val_loss: 51180.0156 - learning_rate: 0.0010
Epoch 6/25
[1m45/45[0m [32m

In [None]:
history_2 = transformer.fit(X_train_f,y_train,
                            initial_epoch=25, epochs=50,
                            batch_size=1024, shuffle = True,
                            validation_data=[X_val_f,y_val],
                            callbacks=[model_checkpoint_callback, early_stopping, reduce_lr])

Epoch 26/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 244ms/step - R2Score: 0.9941 - loss: 42.8880 - val_R2Score: 0.7428 - val_loss: 44.8525 - learning_rate: 0.0010
Epoch 27/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 232ms/step - R2Score: 0.9940 - loss: 25.7096 - val_R2Score: 0.7437 - val_loss: 32.6721 - learning_rate: 0.0010
Epoch 28/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 239ms/step - R2Score: 0.9944 - loss: 14.8663 - val_R2Score: 0.7438 - val_loss: 25.7032 - learning_rate: 0.0010
Epoch 29/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 245ms/step - R2Score: 0.9942 - loss: 8.8780 - val_R2Score: 0.7446 - val_loss: 22.0206 - learning_rate: 0.0010
Epoch 30/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 242ms/step - R2Score: 0.9947 - loss: 5.5812 - val_R2Score: 0.7446 - val_loss: 19.6033 - learning_rate: 0.0010
Epoch 31/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [None]:
history_3 = transformer.fit(X_train_f,y_train,
                            initial_epoch=50, epochs=100,
                            batch_size=1024, shuffle = True,
                            validation_data=[X_val_f,y_val],
                            callbacks=[model_checkpoint_callback, early_stopping, reduce_lr])

Epoch 51/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 250ms/step - R2Score: 0.9965 - loss: 0.2432 - val_R2Score: 0.7510 - val_loss: 15.7563 - learning_rate: 5.0000e-04
Epoch 52/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 244ms/step - R2Score: 0.9987 - loss: 0.0869 - val_R2Score: 0.7522 - val_loss: 15.6796 - learning_rate: 5.0000e-04
Epoch 53/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 216ms/step - R2Score: 0.9990 - loss: 0.0697 - val_R2Score: 0.7513 - val_loss: 15.7328 - learning_rate: 5.0000e-04
Epoch 54/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 214ms/step - R2Score: 0.9989 - loss: 0.0755 - val_R2Score: 0.7513 - val_loss: 15.7320 - learning_rate: 5.0000e-04
Epoch 55/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 220ms/step - R2Score: 0.9987 - loss: 0.0910 - val_R2Score: 0.7513 - val_loss: 15.7387 - learning_rate: 5.0000e-04
Epoch 56/100
[1m45/45[0m [32m━━━━━━━━

In [None]:
# Train Loss
train_loss = list(history_1.history['loss'])
train_loss.extend(history_2.history['loss'])
#train_loss.extend(history_3.history['loss'])
#train_loss.extend(history_4.history['loss'])

# Validation Loss
val_loss = list(history_1.history['val_loss'])
val_loss.extend(history_2.history['val_loss'])
#val_loss.extend(history_3.history['val_loss'])
#val_loss.extend(history_4.history['val_loss'])

# plot history
plt.plot(np.array(train_loss[5:]), label='train loss')
plt.plot(np.array(val_loss[5:]), label='validation loss')
plt.xlabel("Number of Epochs ")
plt.ylabel("Loss")
plt.grid()
plt.legend()
plt.show()

## Prediction - from Transformer

In [None]:
predictions = transformer((X_test_f), training=False)

In [None]:
predictions.shape

TensorShape([6821, 1])

In [None]:
predictions[0]

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([17.246437], dtype=float32)>

In [None]:
i = 12
print('Predictions = \n',predictions[i])
print('Actuals = \n',y_test[i])

Predictions = 
 tf.Tensor([-1.7586663], shape=(1,), dtype=float32)
Actuals = 
 -1.75


### Validating the results

In [None]:
y_pred = predictions.numpy().reshape(-1)
y_test = y_test.reshape(-1)
print(y_pred.shape)
print(y_test.shape)

(6821,)
(6821,)


In [None]:
score_mae = mean_absolute_error(y_test, y_pred)
print("The Mean Absolute Error of our Model is {}".format(round(score_mae, 2)))
score_rmse = mean_squared_error(y_test, y_pred)
print("The Root Mean Squared Error of our Model is {}".format(round(score_rmse, 2)))
score_r2 = r2_score(y_test, y_pred)
print("The accuracy of our model is {}%".format(round(score_r2, 2) *100))

The Mean Absolute Error of our Model is 1.43
The Root Mean Squared Error of our Model is 16.91
The accuracy of our model is 75.0%


In [None]:
fig = go.Figure(data=[go.Ohlc(x=np.arange(0,10000,1),
                    open=y_pred,
                    high=y_pred,
                    low=y_test,
                    close=y_test
                              )])
fig.show()

In [None]:
'''
fig = go.Figure(data=[go.Ohlc(x=np.arange(0,1000,1),
                    open=y_pred,
                    high=y_pred,
                    low=y_test,
                    close=y_test
                              )])
fig.show()
'''

'\nfig = go.Figure(data=[go.Ohlc(x=np.arange(0,1000,1),\n                    open=y_pred,\n                    high=y_pred,\n                    low=y_test,\n                    close=y_test\n                              )])\nfig.show()\n'

 ## Prediction - from Saved model

In [269]:
path_model = '/content/drive/MyDrive/MyColabProject/Models/Success_Model'
checkpoint_filepath = (path_model+'/Idea2_1SF_nsepy_Model_inp128_out1_FourierTransform_withInception_moreTimeBin_bestSoFar.weights.h5')
print(checkpoint_filepath)

/content/drive/MyDrive/MyColabProject/Models/Success_Model/Idea2_1SF_nsepy_Model_inp128_out1_FourierTransform_withInception_moreTimeBin_bestSoFar.weights.h5


In [270]:
transformer.load_weights(checkpoint_filepath)

### Prediction from Pre-trained loaded transformer

In [308]:
predictions_reconstructed_model = transformer((X_train_f), training=False)
y_test_redused = y_train

In [309]:
predictions_reconstructed_model.shape

TensorShape([46040, 1])

In [310]:
i = 32
print('Predictions = \n',predictions_reconstructed_model[i])
print('Actuals = \n',y_test_redused[i])

Predictions = 
 tf.Tensor([-2.411502], shape=(1,), dtype=float32)
Actuals = 
 [-2.35]


### Compare saved model with Actuals

In [311]:
y_pred = predictions_reconstructed_model.numpy().reshape(-1)
y_test = y_test_redused.reshape(-1)
print(y_pred.shape)
print(y_test.shape)

(46040,)
(46040,)


In [312]:
score_mae = mean_absolute_error(y_test, y_pred)
print("The Mean Absolute Error of our Model is {}".format(round(score_mae, 2)))
score_rmse = mean_squared_error(y_test, y_pred)
print("The Root Mean Squared Error of our Model is {}".format(round(score_rmse, 2)))
score_r2 = r2_score(y_test, y_pred)
print("The accuracy of our model is {}%".format(round(score_r2, 2) *100))

The Mean Absolute Error of our Model is 3.48
The Root Mean Squared Error of our Model is 46.59
The accuracy of our model is 28.999999999999996%


In [304]:
fig = go.Figure(data=[go.Ohlc(x=np.arange(0,10000,1),
                    open=y_pred,
                    high=y_pred,
                    low=y_test,
                    close=y_test
                              )])
fig.show()

### Exporting the outputs to csv

In [None]:
data_df = pd.read_csv(path+'/OSF_results.csv')
data_df.columns

Index(['Actuals', 'OSF_Model_1', 'OSF_2'], dtype='object')

In [None]:
# Uncomment only if you wish to compare the data in excel sheet
data_df = pd.read_csv(path+'/OSF_results.csv')
#data_df.drop(columns=['Unnamed: 0'], axis=1, inplace=True)

# Using DataFrame.insert() to add a column
#data_df.insert(2, "OSF_2", y_reconstructed_model, True)
data_df.insert(2, "OSF_2_better", y_pred, True)

data_df

Unnamed: 0,Actuals,OSF_Model_1,OSF_2_better,OSF_2
0,542,542,542,542
1,509,509,509,509
2,722,562,586,588
3,517,517,517,517
4,525,525,525,525
...,...,...,...,...
2518,520,520,520,520
2519,500,500,500,500
2520,542,542,542,542
2521,523,523,523,523


In [None]:
data_df.to_csv(path+'/OSF_results.csv',index=False)

# Failed Models

# Success Models

In [None]:
# 1st Best model
# Input formation
'''
data_q1 = data_df.iloc[:1200,:129].copy()
data_q2 = data_df.iloc[:1200,128:257].copy()
data_q3 = data_df.iloc[:1200,256:385].copy()
data_q4 = data_df.iloc[:1200,384:].copy()
data_df_merged = pd.DataFrame()
data_df_merged = pd.concat([pd.DataFrame(data_q1.values),pd.DataFrame(data_q2.values),pd.DataFrame(data_q3.values),pd.DataFrame(data_q4.values)],
                           ignore_index=True)
'''
# Inputs
'''
X Train shape (3240, 128)
Context Train shape (3240, 1)
Y Train shape (3240, 1)
X Validation shape (1080, 128)
Context Validation shape (1080, 1)
Y Validation shape (1080, 1)
X Test shape (480, 128)
Context Test shape (480, 1)
Y Test shape (480, 1)
'''
# Hyperparameters
'''
# Hyperparameters for Fourier Transform
signal_len = X_train.shape[1] # Length of the input time series
frame_length = 128 # window size for Fourier Transform
frame_step = 64 # Hop size for Fourier Transform
fft_length = frame_length

# Hyperparameters for Attention Layer and DNN Layer
num_layers = 2 # number of TransformerEncoderLayer layers (Original paper = 6)
num_heads = 1 # number of self-attention heads in the MultiheadAttention layer (Original paper = 8)
dropout_rate = 0.2 # Dropout rate

Time Bins = 1
Frequency Bins = 65
output_shape=[batch_size, time_bins, frequency_bins]
Output shape = (batch_size, 1 , 65 )
'''
# Weights
'''
Model: "transformer"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ fourier_transform (FourierTransform) │ ?                           │             128 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ encoder (Encoder)                    │ ?                           │          69,160 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ decoder (Decoder)                    │ ?                           │         103,870 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten (Flatten)                    │ (3, 65)                     │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_8 (Dense)                      │ (3, 1)                      │              66 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 173,224 (676.66 KB)
 Trainable params: 173,096 (676.16 KB)
 Non-trainable params: 128 (512.00 B)
'''

# Output
'''
Test
The Mean Absolute Error of our Model is 1.47
The Root Mean Squared Error of our Model is 20.26
The accuracy of our model is 32.0%