In [None]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, LSTM, Dense, Bidirectional, Layer, GlobalMaxPool1D, GlobalMaxPooling1D, GlobalAveragePooling1D, Concatenate, Dropout, Add, Attention

from tensorflow.keras.optimizers.legacy import Adam, SGD, RMSprop
from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError

from tensorflow.keras.regularizers import L1L2
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Layer, Dense, Dot, Activation
from tensorflow.keras.regularizers import l1_l2

from attention import Attention

from tensorflow.keras.callbacks import Callback


from tensorflow.keras import backend as K

from matplotlib import pyplot as plt

K.clear_session()

# Custom callback to stop training when AUC exceeds 70%
class AUCThresholdCallback(Callback):
    def __init__(self, threshold=0.7):
        super(AUCThresholdCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        auc = logs.get('val_auc')
        if auc is not None and auc > self.threshold:
            print(f"\nStopping training as AUC reached {auc:.2f}, which is above the threshold of {self.threshold}.")
            self.model.stop_training = True

auc_callback = AUCThresholdCallback(threshold=0.7)

class AttentionLayer(Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], input_shape[-1]), initializer='random_normal', trainable=True)
        self.b = self.add_weight(name='attention_bias', shape=(input_shape[-1],), initializer='zeros', trainable=True)
        self.u = self.add_weight(name='attention_u', shape=(input_shape[-1],), initializer='random_normal', trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, inputs):
        u_it = tf.tanh(tf.tensordot(inputs, self.W, axes=1) + self.b)
        a_it = tf.nn.softmax(tf.tensordot(u_it, self.u, axes=1), axis=1)
        output = inputs * tf.expand_dims(a_it, -1)
        return tf.reduce_sum(output, axis=1)

num_heads = 20
key_dim = 8

class SelfAttention_head(tf.keras.layers.Layer):
    def __init__(self, num_heads, key_dim):
        super(SelfAttention, self).__init__()
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.attention = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)
    
    def call(self, inputs):
        attn_output = self.attention(inputs, inputs)
        return attn_output
    

# class SelfAttention(Layer):
#     def __init__(self, units):
#         super(SelfAttention, self).__init__()
#         self.units = units
#         self.W = Dense(units)
#         self.U = Dense(units)
#         self.V = Dense(1)
    
#     def call(self, inputs):
#         # inputs.shape = (batch_size, timesteps, hidden_size)
#         score = tf.nn.tanh(self.W(inputs) + self.U(inputs))
#         attention_weights = tf.nn.softmax(self.V(score), axis=1)
#         context_vector = attention_weights * inputs
#         context_vector = tf.reduce_sum(context_vector, axis=1)  # Weighted sum of the inputs
#         return context_vector, attention_weights
    

class SelfAttention(Layer):
    def __init__(self, units, l1=1e-5, l2=1e-4, **kwargs):
        super(SelfAttention, self).__init__(**kwargs)
        self.units = units
        self.l1 = l1
        self.l2 = l2
        self.query_dense = Dense(units, kernel_regularizer=L1L2(l1=self.l1, l2=self.l2))
        self.key_dense = Dense(units, kernel_regularizer=L1L2(l1=self.l1, l2=self.l2))
        self.value_dense = Dense(units)  # No regularization on value, as it's just a projection
        
    def call(self, inputs):
        # Query, Key, Value transformations
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        
        # Multiplicative self-attention (dot-product)
        attention_scores = Dot(axes=-1)([query, key])
        attention_weights = Activation('tanh')(attention_scores)
        
        # Apply the attention weights to the value vectors
        context_vector = Dot(axes=1)([attention_weights, value])
        
        return context_vector
    
# l1 = 0.0001
# l2 = 0.0001

l1 = None
l2 = None

lstm_dropout = 0.2
recurrent_dropout = 0.0
dense_dropout = 0.3
loss = 'mse'
# loss = 'binary_crossentropy'
# loss = 'mse'

n_target = Y_train.shape[-2:]

regularization_criteria = L1L2(l1=l1, l2=l2)
# regularization_criteria = None

n_target_y = Y_train.shape[-1]

def build_model(input_shape):
    input_layer = Input(shape=input_shape, name='Input_1')
    sequence = input_layer
    sequence = LSTM(50, return_sequences = True, recurrent_activation = 'tanh', activation = 'tanh', name="LSTM-1", kernel_regularizer = regularization_criteria, dropout=lstm_dropout, recurrent_dropout=recurrent_dropout)(input_layer)
    # LSTM_1 = sequence
    # LSTM_2 = LSTM(50, return_sequences = True, activation = 'tanh', name="LSTM-2", kernel_regularizer = regularization_criteria, dropout=lstm_dropout, recurrent_dropout=recurrent_dropout)(LSTM_1)
    
    # sequence = Bidirectional(LSTM(50, return_sequences=True, recurrent_activation = 'tanh', activation = 'tanh', kernel_regularizer = regularization_criteria, dropout=lstm_dropout, recurrent_dropout=recurrent_dropout), name="BiLSTM-1")(sequence)
    # sequence = Bidirectional(LSTM(25, return_sequences=True, activation = 'tanh', kernel_regularizer = regularization_criteria, dropout=lstm_dropout, recurrent_dropout=recurrent_dropout), name="BiLSTM-2")(sequence)
    # LSTM_3 = LSTM(200, return_sequences = True, activation = 'tanh', name="LSTM-3", kernel_regularizer = regularization_criteria, dropout=lstm_dropout, recurrent_dropout=recurrent_dropout)(BiLSTM_2)
    # sequence = LSTM(200, return_sequences = True, activation = 'tanh', name="LSTM-4", kernel_regularizer = regularization_criteria, dropout=lstm_dropout, recurrent_dropout=recurrent_dropout)(LSTM_3)
    
    
    # sequence = BatchNormalization()(sequence)
    # sequence = Add( name='Con_1')([sequence,LSTM_1])
    # sequence = BatchNormalization()(sequence)

    # sequence = Attention(units=10, score='luong')(sequence)

    # sequence = Add( name='Con_1')([sequence,sequence])
    # sequence = BatchNormalization()(sequence)

    # sequence = SelfAttention(100, l1=l1, l2=l2)(sequence)
    # sequence = SelfAttention_head(num_heads, key_dim)(sequence)
    # # sequence = AttentionLayer()(sequence)
    # sequence = BatchNormalization()(sequence)

    sequence = Flatten()(sequence)

    sequence = Dense(20, activation = 'tanh', name='Dense_1')(sequence)
    sequence = Dropout(dense_dropout, name='Dropout_1')(sequence)
    sequence = Dense(10, activation = 'tanh', name='Dense_2')(sequence)
    # sequence = Dropout(dense_dropout, name='Dropout_2')(sequence)
    # sequence = Dense(10, activation = 'tanh', name='Dense_3')(sequence)
    # sequence = Dropout(dense_dropout, name='Dropout_3')(sequence)
    sequence = Dense(n_target_y, activation = 'tanh', name='Dense_4')(sequence)
    # sequence = tf.keras.layers.Activation("softmax")(sequence)
    # sequence = Lambda(lambda x: (x + 1) / 2) (sequence)
    output_layer = sequence

    model = Model(inputs=input_layer, outputs=output_layer)

    return model

# input_shape = (len(X_train), 8)
input_shape = (look_back, X_train.shape[2])

# lstm_units = 50
# dense_units = 10

optimizer = Adam(learning_rate=0.001)

model = build_model(input_shape)
model.compile(optimizer=optimizer, loss=loss, metrics=METRICS)
model.summary()

import time

start_time = time.time()

history = model.fit(X_train, Y_train, epochs=5, batch_size=64, validation_data=(X_test, Y_test),
                        # callbacks=[auc_callback],
                        # class_weight=class_weight_dict
                      )

end_time = time.time()
difference_in_seconds = end_time - start_time
print(f"The difference in seconds is: {difference_in_seconds}")