In [18]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.optimizers import Adam

In [11]:
english_sentences = ["hello how are you", "i love programming", "good morning", "thank you"]
french_sentences = ["bonjour comment allez vous", "j'aime la programmation", "bonjour", "merci"]


In [12]:
# Tokenize
eng_tokenizer = Tokenizer()
fra_tokenizer = Tokenizer()
eng_tokenizer.fit_on_texts(english_sentences)
fra_tokenizer.fit_on_texts(french_sentences)

In [13]:
X = pad_sequences(eng_tokenizer.texts_to_sequences(english_sentences))
y = pad_sequences(fra_tokenizer.texts_to_sequences(french_sentences))

In [14]:
# Reshape X for RNN input
X = X.reshape(X.shape[0], X.shape[1], 1)

In [19]:
model = Sequential([
    SimpleRNN(32, return_sequences=True, input_shape=(X.shape[1], 1)),
    Dense(len(fra_tokenizer.word_index) + 1, activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=0.01), metrics=['accuracy'], loss='sparse_categorical_crossentropy')
model.fit(X, y, epochs=20)

Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.3125 - loss: 2.3484
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.4375 - loss: 2.1500
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.5000 - loss: 1.9995
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.5625 - loss: 1.8734
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.6250 - loss: 1.7585
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.6875 - loss: 1.6505
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.7500 - loss: 1.5477
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.7500 - loss: 1.4486
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x790eac0d3580>

In [20]:
def translate(text):
    sequence = pad_sequences(eng_tokenizer.texts_to_sequences([text]), maxlen=X.shape[1])
    prediction = model.predict(sequence.reshape(1, X.shape[1], 1))[0]
    return ' '.join([fra_tokenizer.index_word[i] for i in np.argmax(prediction.reshape(-1, len(fra_tokenizer.word_index) + 1), axis=1) if i != 0])

# Test
print("\nTranslations:")
for text in ["hello how are you", "good morning"]:
    print(f"English: {text}")
    print(f"French: {translate(text)}\n")


Translations:
English: hello how are you
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step
French: comment allez vous

English: good morning
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
French: bonjour

