In [39]:
import tensorflow as tf
from tensorflow.keras import layers, models

english_texts = ["i love you", "how are you", "good morning", "thank you", "see you soon"]
french_texts  = ["je t'aime", "comment ça va", "bonjour", "merci", "à bientôt"]

In [40]:
tokenizer_en = tf.keras.preprocessing.text.Tokenizer()
tokenizer_fr = tf.keras.preprocessing.text.Tokenizer()
tokenizer_en.fit_on_texts(english_texts)
tokenizer_fr.fit_on_texts(french_texts)

sequences_en = tokenizer_en.texts_to_sequences(english_texts)
sequences_fr = tokenizer_fr.texts_to_sequences(french_texts)

X = tf.keras.preprocessing.sequence.pad_sequences(sequences_en, padding='post')
y = tf.keras.preprocessing.sequence.pad_sequences(sequences_fr, padding='post')

vocab_inp = len(tokenizer_en.word_index) + 1
vocab_tar = len(tokenizer_fr.word_index) + 1
embed_dim = 64
num_heads = 2
ff_dim = 128

In [41]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
    
    def call(self, inputs):
        attn_output = self.att(inputs, inputs)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        return self.layernorm2(out1 + ffn_output)

In [42]:
inputs = layers.Input(shape=(X.shape[1],))
embedding = layers.Embedding(vocab_inp, embed_dim)(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)(embedding)
flatten = layers.GlobalAveragePooling1D()(transformer_block)
outputs = layers.Dense(vocab_tar, activation="softmax")(flatten)

model = models.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [34]:

model.fit(X, y[:, 0], epochs=50, verbose=0)

<keras.src.callbacks.history.History at 0x19a52350050>

In [38]:
test_input = tokenizer_en.texts_to_sequences(["Thank you"])
test_input = tf.keras.preprocessing.sequence.pad_sequences(test_input, maxlen=X.shape[1], padding='post')
pred = model.predict(test_input)
pred_word_index = tf.argmax(pred[0]).numpy()

for word, index in tokenizer_fr.word_index.items():
    if index == pred_word_index:
        print("Predicted French word:", word)
        break

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Predicted French word: merci
