In [1]:
import tensorflow as tf
from tensorflow.keras import layers

# Define some hyperparameters
vocab_size = 10000  # Size of the vocabulary
embedding_dim = 768  # Dimensionality of the embeddings
max_length = 50  # Maximum length of the input sequence
num_heads = 8  # Number of attention heads in the multi-head attention mechanism
ffn_units = 512  # Number of units in the feed-forward neural network

# Define the input layer
inputs = tf.keras.Input(shape=(None,), dtype=tf.int32)

# Define the embedding layer
embedding_layer = layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim)
x = embedding_layer(inputs)

# Define the transformer layer
# 1. Multi-Head Self Attention Mechanism
multi_head_attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim // num_heads)
x = multi_head_attention(query=x, value=x, key=x)
x = layers.LayerNormalization()(x)

# 2. Position-wise Feed-Forward Neural Network
ffn = tf.keras.Sequential([
    layers.Dense(ffn_units, activation='relu'),
    layers.Dense(embedding_dim)
])
ffn_output = ffn(x)

x = layers.Add()([x, ffn_output])
x = layers.LayerNormalization()(x)

# Define the output layer (for a binary classification task as an example)
outputs = layers.Dense(1, activation='sigmoid')(x)

# Build the model
model = tf.keras.Model(inputs, outputs)

# Display the model summary
model.summary()


2023-09-25 10:55:16.518478: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-09-25 10:55:16.518889: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, None, 768)    7680000     ['input_1[0][0]']                
                                                                                                  
 multi_head_attention (MultiHea  (None, None, 768)   2362368     ['embedding[0][0]',              
 dAttention)                                                      'embedding[0][0]',              
                                                                  'embedding[0][0]']              
             