In [1]:
import tensorflow as tf
import numpy as np




In [2]:
source_texts = ['hello','how are you','goodbye']
target_texts = ['bonjour','comment ca va','au revoir']

In [3]:
source_vocab = set(' '.join(source_texts))
target_vocab = set(' '.join(target_texts))
source_vocab_size = len(source_vocab)
target_vocab_size = len(target_vocab)
print(source_vocab)

{'w', 'g', 'b', 'y', 'u', 'r', ' ', 'l', 'o', 'd', 'a', 'h', 'e'}


In [4]:
source_char_to_int = {char:idx for idx,char in enumerate(source_vocab)}
target_char_to_int = {char:idx for idx,char in enumerate(target_vocab)}
source_int_to_char = {idx:char for char,idx in source_char_to_int.items()}
target_int_to_char = {idx:char for char,idx in target_char_to_int.items()}

In [5]:
source_sequences = [[source_char_to_int[char] for char in text] for text in source_texts]
print(source_sequences)
target_sequences = [[target_char_to_int[char] for char in text] for text in target_texts]

[[11, 12, 7, 7, 8], [11, 8, 0, 6, 10, 5, 12, 6, 3, 8, 4], [1, 8, 8, 9, 2, 3, 12]]


max_sequences_length = max(len(seq) for seq in source_sequences)

In [6]:
max_sequence_length = max(len(seq) for seq in source_sequences)
print(max_sequence_length)
source_sequences = tf.keras.preprocessing.sequence.pad_sequences(source_sequences,maxlen=max_sequence_length,padding='post')
print(source_sequences)
target_sequences = tf.keras.preprocessing.sequence.pad_sequences(target_sequences,maxlen=max_sequence_length,padding='post')

11
[[11 12  7  7  8  0  0  0  0  0  0]
 [11  8  0  6 10  5 12  6  3  8  4]
 [ 1  8  8  9  2  3 12  0  0  0  0]]


In [7]:
input_shape =(max_sequence_length,source_vocab_size)
output_shape =(max_sequence_length,target_vocab_size)

model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(source_vocab_size,64,input_length=max_sequence_length),
    tf.keras.layers.SimpleRNN(128,return_sequences=True),
    tf.keras.layers.Dense(target_vocab_size,activation='softmax')
])

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])





In [8]:
target_sequences_one_hot = np.array([tf.keras.utils.to_categorical(seq,num_classes=target_vocab_size) for seq in target_sequences])
print(target_sequences_one_hot)

[[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.

In [9]:
model.fit(source_sequences,target_sequences_one_hot,epochs=500)

Epoch 1/500


Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 

<keras.src.callbacks.History at 0x20d7ac6fc90>

In [10]:
input_sequence = "how are you"
input_sequence = [source_char_to_int[char] for char in input_sequence]
input_sequence = tf.keras.preprocessing.sequence.pad_sequences([input_sequence],maxlen=max_sequence_length,padding='post')
output_sequence = model.predict(input_sequence)[0]



In [11]:
output_sequence = [target_int_to_char[np.argmax(char)] for char in output_sequence]

In [12]:
print("Input Sequence: how are you ")
print("Translated Sequence:",''.join(output_sequence))

Input Sequence: how are you 
Translated Sequence: mment ca va
