In [26]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, MultiHeadAttention


In [27]:
# Generate synthetic data
# Replace this with your actual data preparation
num_samples = 1000
sequence_length = 10
embedding_dim = 32

input_data = tf.random.normal((num_samples, sequence_length, embedding_dim))
output_labels = tf.random.normal((num_samples, 1))

In [28]:
def self_attention_model(input_shape):
    inputs = Input(shape=input_shape)
    
    # Self-Attention layer
    attention_output = MultiHeadAttention(num_heads=1, key_dim=embedding_dim)(inputs, inputs)
    
    # Output layer
    output = Dense(1, activation='linear')(attention_output)
    
    model = tf.keras.Model(inputs=inputs, outputs=output)
    return model

In [29]:
# Multi-Head Attention Model
def multi_head_attention_model(input_shape):
    inputs = Input(shape=input_shape)
    
    # Multi-Head Attention layer
    attention_output = MultiHeadAttention(num_heads=4, key_dim=embedding_dim)(inputs, inputs)
    
    # Output layer
    output = Dense(1, activation='linear')(attention_output)
    
    model = tf.keras.Model(inputs=inputs, outputs=output)
    return model


In [30]:
# Create and compile models
self_attention_model = self_attention_model((sequence_length, embedding_dim))
self_attention_model.compile(optimizer='adam', loss='mse')

multi_head_attention_model = multi_head_attention_model((sequence_length, embedding_dim))
multi_head_attention_model.compile(optimizer='adam', loss='mse')

In [31]:
# Generate synthetic test data (replace this with your actual data loading)
num_test_samples = 100
test_input_data = tf.random.normal((num_test_samples, sequence_length, embedding_dim))
test_output_labels = tf.random.normal((num_test_samples, 1))

# Self-Attention Model
self_attention_model.compile(optimizer='adam', loss='mse')
self_attention_mse = self_attention_model.evaluate(test_input_data, test_output_labels)
print("Self-Attention MSE:", self_attention_mse)

# Multi-Head Attention Model
multi_head_attention_model.compile(optimizer='adam', loss='mse')
multi_head_attention_mse = multi_head_attention_model.evaluate(test_input_data, test_output_labels)
print("Multi-Head Attention MSE:", multi_head_attention_mse)


Self-Attention MSE: 1.217741847038269
Multi-Head Attention MSE: 1.1947734355926514


In [32]:
self_attention_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
self_attention_accuracy = self_attention_model.evaluate(test_input_data, test_output_labels)
print("Self-Attention Accuracy:", self_attention_accuracy)

multi_head_attention_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
multi_head_attention_accuracy = multi_head_attention_model.evaluate(test_input_data, test_output_labels)
print("Multi-Head Attention Accuracy:", multi_head_attention_accuracy)


Self-Attention Accuracy: [-1.6928701400756836, 0.0]
Multi-Head Attention Accuracy: [-2.2005224227905273, 0.0]


In [36]:
from nltk.translate.bleu_score import corpus_bleu

# Assuming reference translations and model predictions
reference_translations = [['reference sentence 1', 'reference sentence 2', ...]]
model_predictions_self_attention = self_attention_model.predict(test_input_data)

# Debugging: Print lengths
print("Length of reference_translations:", len(reference_translations))
print("Length of model_predictions_self_attention:", len(model_predictions_self_attention))

# Debugging: Print content
print("Reference Translations:", reference_translations)
print("Model Predictions Self Attention:", model_predictions_self_attention)

# Ensure lengths match
assert len(reference_translations) == len(model_predictions_self_attention), "Mismatch in lengths."

# Assuming you have a function to convert indices to words
def indices_to_words(indices, vocabulary):
    return [vocabulary[idx] for idx in indices]

# Convert model predictions to sentences
def predictions_to_sentences(predictions, vocabulary):
    sentences = []
    for prediction in predictions:
        # Assuming each prediction is a sequence of word indices
        words = indices_to_words(prediction, vocabulary)
        sentence = ' '.join(words)
        sentences.append(sentence)
    return sentences

# Replace this with your actual vocabulary (mapping from indices to words)
# Example vocabulary
vocab = {0: 'hello', 1: 'world', 2: 'example', 3: 'another', 4: 'word'}


# Convert model predictions to sentences
predicted_sentences_self_attention = predictions_to_sentences(model_predictions_self_attention, vocab)

# Debugging: Print the predicted sentences
print("Predicted Sentences (Self Attention):", predicted_sentences_self_attention)

# Compute BLEU scores
bleu_self_attention = corpus_bleu(reference_translations, predicted_sentences_self_attention)
print("Self-Attention BLEU Score:", bleu_self_attention)


Length of reference_translations: 1
Length of model_predictions_self_attention: 100
Reference Translations: [['reference sentence 1', 'reference sentence 2', Ellipsis]]
Model Predictions Self Attention: [[[ 3.90645117e-02]
  [ 2.46397238e-02]
  [ 3.29668336e-02]
  [ 3.44508775e-02]
  [ 2.10584365e-02]
  [ 3.84328291e-02]
  [-5.40573150e-04]
  [ 3.17621902e-02]
  [ 1.33423582e-02]
  [ 4.75334376e-03]]

 [[-5.33390790e-04]
  [ 1.77176110e-02]
  [-1.08453874e-02]
  [ 5.38647175e-04]
  [ 4.39145602e-03]
  [ 4.81742620e-03]
  [-1.82201751e-02]
  [-3.47811170e-03]
  [ 4.05419990e-03]
  [-7.19079934e-03]]

 [[ 1.25713751e-01]
  [ 1.27052501e-01]
  [ 1.29180193e-01]
  [ 1.29016027e-01]
  [ 1.28474250e-01]
  [ 1.32035583e-01]
  [ 1.26247242e-01]
  [ 1.29582152e-01]
  [ 1.24318123e-01]
  [ 1.34755805e-01]]

 [[ 1.09360695e-01]
  [ 1.05911732e-01]
  [ 1.19810462e-01]
  [ 1.08492665e-01]
  [ 1.12857416e-01]
  [ 1.02907568e-01]
  [ 1.08929083e-01]
  [ 1.06982067e-01]
  [ 1.09494701e-01]
  [ 1.10546

AssertionError: Mismatch in lengths.