In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Input, TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, BatchNormalization, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import Precision, Recall

# SelfAttention Layer
class SelfAttention(Layer):
    def __init__(self, units):
        super(SelfAttention, self).__init__()
        self.Wq = Dense(units, use_bias=False)
        self.Wk = Dense(units, use_bias=False)
        self.Wv = Dense(units, use_bias=False)

    def call(self, x):
        q = self.Wq(x)
        k = self.Wk(x)
        v = self.Wv(x)
        attention_scores = tf.matmul(q, k, transpose_b=True)
        attention_weights = tf.nn.softmax(attention_scores)
        attention_output = tf.matmul(attention_weights, v)
        return attention_output + x

def build_optimized_model():
    input_shape = (10, 256, 256, 1)
    inputs = Input(shape=input_shape)

    # Optimal hyperparameters
    filters = 64
    kernel_size = 5
    lstm_units = 32
    dropout_rate = 0.3
    dense_units = 64
    learning_rate = 0.00020063

    # Convolution and Pooling Layers
    x = TimeDistributed(Conv2D(filters, (kernel_size, kernel_size), activation='relu'))(inputs)
    x = TimeDistributed(MaxPooling2D((2, 2)))(x)
    x = TimeDistributed(Conv2D(filters * 2, (kernel_size, kernel_size), activation='relu'))(x)
    x = TimeDistributed(MaxPooling2D((2, 2)))(x)
    x = TimeDistributed(Flatten())(x)

    # LSTM Layer
    x = LSTM(lstm_units, return_sequences=True)(x)
    x = Dropout(dropout_rate)(x)
    x = BatchNormalization()(x)

    # Self-Attention Layer
    x = SelfAttention(lstm_units)(x)
    x = Flatten()(x)

    # Dense Layer
    x = Dense(dense_units, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = Dropout(dropout_rate)(x)

    # Output Layer
    outputs = Dense(3, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy', Precision(name='precision'), Recall(name='recall')])

    return model

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True)

# Assuming 'train_dataset', 'val_dataset', and 'test_dataset' are defined
model = build_optimized_model()

# Fit the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=100,
    callbacks=[early_stopping, model_checkpoint]
)

# Evaluate the model
results = model.evaluate(test_dataset)
print(f"Test Loss: {results[0]}")
print(f"Test Accuracy: {results[1]}")
print(f"Test Precision: {results[2]}")
print(f"Test Recall: {results[3]}")
