In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model

In [2]:
# Transformer components
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs  # Residual connection

    # Feed Forward Network
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res  # Another residual connection

# Build the model
def build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0, num_classes=2):
    inputs = layers.Input(shape=input_shape)
    
    # Positional embedding
    x = layers.Conv1D(filters=head_size, kernel_size=1)(inputs)
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    
    # Global average pooling and final classification
    x = layers.GlobalAveragePooling1D()(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    return Model(inputs, outputs)

# Parameters
input_shape = (128, 12)  # (timesteps, features)
head_size = 256
num_heads = 4
ff_dim = 256
num_transformer_blocks = 4
mlp_units = [128]
dropout = 0.1
mlp_dropout = 0.2
num_classes = 10  # Number of output classes

# Create the model
model = build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout, mlp_dropout, num_classes)

# Compile the model
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["sparse_categorical_accuracy"])

# Summary
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 128, 12)]    0           []                               
                                                                                                  
 conv1d (Conv1D)                (None, 128, 256)     3328        ['input_1[0][0]']                
                                                                                                  
 layer_normalization (LayerNorm  (None, 128, 256)    512         ['conv1d[0][0]']                 
 alization)                                                                                       
                                                                                                  
 multi_head_attention (MultiHea  (None, 128, 256)    1051904     ['layer_normalization[0][0]',