In [1]:
import tensorflow as tf

In [2]:
from tensorflow.keras import Model

In [3]:
from tensorflow.keras.layers import Input,Embedding,LSTM,Dense

In [4]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [5]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [6]:
import numpy as np

In [7]:
english_texts = [
    "I love natural language processing.",
    "TensorFlow is a powerful framework.",
    "Machine learning is fascinating.",
    "This is an example text.",
    "Neural networks are amazing.",
    "AI is the future of technology."
]

# French example sentences (translations of English sentences)
french_texts = [
    "J'adore le traitement du langage naturel.",
    "TensorFlow est un cadre puissant.",
    "L'apprentissage automatique est fascinant.",
    "Ceci est un exemple de texte.",
    "Les réseaux neuronaux sont incroyables.",
    "L'IA est l'avenir de la technologie."
]

In [8]:
french_texts=["<start>"+" "+sentence+" "+"<end>" for sentence in french_texts]

In [9]:
EnglishTokenizer=Tokenizer(oov_token="<UNK>")
EnglishTokenizer.fit_on_texts(english_texts)
src_sentences=EnglishTokenizer.texts_to_sequences(english_texts)
src_max_len=max(len(i) for i in src_sentences)
print(src_max_len)
src_input_sent=pad_sequences(src_sentences,maxlen=src_max_len,padding='post')

6


In [10]:
FrenchTokenizer=Tokenizer(oov_token="<UNK>")
FrenchTokenizer.fit_on_texts(french_texts)
trg_sentences=FrenchTokenizer.texts_to_sequences(french_texts)
trg_max_len=max(len(i) for i in trg_sentences)
print(trg_max_len)
trg_input_sent=pad_sequences(trg_sentences,maxlen=trg_max_len,padding='post')

8


In [11]:
trg_input_sent.shape,src_input_sent.shape

((6, 8), (6, 6))

In [16]:
English_word2text=EnglishTokenizer.word_index
French_word2text=FrenchTokenizer.word_index

In [17]:
src_vocab_size=len(English_word2text)+1
trg_vocab_size=len(French_word2text)+1

In [18]:
dim=10
lstm_units=5

In [2]:
import tensorflow as tf

In [None]:
encoder_input=Input(shape=(src_max_len,))
encoder_embedded=Embedding(src_vocab_size,dim,trainable=True)
encoder_embed=encoder_embedded(encoder_input)
encoder_lstm=LSTM(lstm_units,return_sequences=True,return_state=True)
encoder_output,state_h,state_c=encoder_lstm(encoder_embed)

decoder_input=Input(shape=(trg_max_len,))
decoder_embedded=Embedding(trg_vocab_size,dim,trainable=True)
decoder_embed=decoder_embedded(decoder_input)
decoder_lstm=LSTM(lstm_units,return_sequences=True,return_state=True)
decoder_output,state_h1,state_c1=decoder_lstm(decoder_embed,initial_state=[state_h,state_c])
decoder_dense=Dense(trg_vocab_size,activation="softmax")
decoder_output=decoder_dense(decoder_output)

model=Model([encoder_input,decoder_input],[decoder_output])

In [53]:
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_11 (InputLayer)       [(None, 6)]                  0         []                            
                                                                                                  
 input_12 (InputLayer)       [(None, 8)]                  0         []                            
                                                                                                  
 embedding_8 (Embedding)     (None, 6, 10)                280       ['input_11[0][0]']            
                                                                                                  
 embedding_9 (Embedding)     (None, 8, 10)                310       ['input_12[0][0]']            
                                                                                            

In [54]:
optimizer=tf.keras.optimizers.Adam(learning_rate=0.7,clipnorm=5)

In [55]:
model.compile(loss='categorical_crossentropy',optimizer=optimizer,metrics=['accuracy'])

In [56]:
trg_one_hot=tf.one_hot(trg_input_sent,depth=trg_vocab_size)

In [57]:
trg_one_hot.shape

TensorShape([6, 8, 31])

## Training(Teacher-Forcing)

In [102]:
loss_function=tf.keras.losses.SparseCategoricalCrossentropy()

In [103]:
def train_step(inputs, targets):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_function(targets, predictions)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    return loss

In [104]:
batch_size=3
epochs=10
for epoch in range(epochs):
    total_loss = 0.0
    steps_per_epoch = src_max_len // batch_size

    for step in range(steps_per_epoch):
        start = step * batch_size
        end = (step + 1) * batch_size

        batch_inputs = src_input_sent[start:end]
        batch_targets = trg_input_sent[start:end]
        loss = train_step([batch_inputs,batch_targets],batch_targets)
        total_loss += loss

    average_loss = total_loss / steps_per_epoch
    print(f"Epoch {epoch + 1}/{epochs} - Loss: {average_loss:.4f}")

Epoch 1/10 - Loss: 0.5395
Epoch 2/10 - Loss: 0.5833
Epoch 3/10 - Loss: 0.6348
Epoch 4/10 - Loss: 0.6084
Epoch 5/10 - Loss: 1.1494
Epoch 6/10 - Loss: 0.8982
Epoch 7/10 - Loss: 0.7278
Epoch 8/10 - Loss: 0.6401
Epoch 9/10 - Loss: 1.4008
Epoch 10/10 - Loss: 0.8916


## fit method 

In [66]:
model.fit([src_input_sent,trg_input_sent],trg_one_hot,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2b7e14f4a30>

## Evaluation

In [26]:
test_english_texts = [
    "I like deep learning.",
    "AI is changing the world.",
    "natural language processing is exciting."
]

test_french_texts = [
    "J'aime l'apprentissage profond.",
    "L'IA change le monde.",
    "Le traitement du langage naturel est passionnant."
]

In [27]:
test_src_sentences=EnglishTokenizer.texts_to_sequences([test_english_texts])
test_trg_sentences=FrenchTokenizer.texts_to_sequences([test_french_texts])

test_src_padding=pad_sequences(test_src_sentences,maxlen=src_max_len,padding="post")
test_tar_padding=pad_sequences(test_trg_sentences,maxlen=trg_max_len,padding="post")

In [28]:
loss,accuracy=model.evaluate([test_src_padding,test_tar_padding],tf.one_hot(test_tar_padding,depth=trg_vocab_size))



## Inference(Prediction) 

In [29]:
src="natural language processing"
trg=np.zeros((1,1))
trg[0,0]=French_word2text["start"]

In [30]:
new=EnglishTokenizer.texts_to_sequences([src])

In [31]:
new_src_padding=pad_sequences(new,maxlen=src_max_len,padding="post")

In [32]:
new_src_padding

array([[5, 6, 7, 0, 0, 0]])

In [33]:
output_sent=model.predict([new_src_padding,trg])



In [34]:
output_sent

array([[[2.22221070e-06, 6.04132083e-06, 9.02282774e-01, 3.21875305e-05,
         1.94454137e-02, 1.72299275e-03, 1.66824926e-02, 8.45050719e-03,
         2.53484264e-04, 3.57941724e-03, 1.78835558e-06, 2.16635417e-05,
         2.81908922e-03, 1.16851786e-03, 6.72145747e-04, 6.60289970e-06,
         5.49640926e-03, 4.64118831e-03, 4.88242926e-03, 5.69077255e-03,
         2.40993500e-03, 2.35546287e-03, 3.81117105e-04, 8.97551712e-04,
         3.36478843e-06, 5.80939988e-04, 1.40836276e-03, 3.90062225e-04,
         7.30626334e-06, 2.41539720e-03, 1.12923635e-02]]], dtype=float32)

In [35]:
translated_french_text = FrenchTokenizer.sequences_to_texts(output_sent[0])

In [36]:
translated_french_text

['<UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK> <UNK>']

In [37]:
encoder_model=Model(encoder_input,[encoder_output,state_h,state_c])

In [38]:
encoder_model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 6)]               0         
                                                                 
 embedding_2 (Embedding)     (None, 6, 10)             280       
                                                                 
 lstm_2 (LSTM)               [(None, 6, 5),            320       
                              (None, 5),                         
                              (None, 5)]                         
                                                                 
Total params: 600 (2.34 KB)
Trainable params: 600 (2.34 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [68]:
decoder_model=Model([decoder_input,[state_h,state_c]],[decoder_output,state_h1,state_c1])

In [69]:
decoder_model.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 8)]                  0         []                            
                                                                                                  
 embedding_9 (Embedding)     (None, 8, 10)                310       ['input_12[0][0]']            
                                                                                                  
 input_15 (InputLayer)       [(None, 5)]                  0         []                            
                                                                                                  
 input_16 (InputLayer)       [(None, 5)]                  0         []                            
                                                                                            

## Greedy decoding

In [82]:
def greedy_decode_sequence(input_seq):
    input_seq=np.expand_dims(input_seq,axis=0)
    encoder_output, state_h, state_c = encoder_model.predict(input_seq)
    target_seq = np.array([[French_word2text['start']]])
    output_seq = []
    stop_condition=False
    while not stop_condition:
        decoder_output, state_h1, state_c1 = decoder_model.predict([target_seq,[state_h, state_c]])

        # Get the next token index (greedy decoding)
        sampled_token_index = np.argmax(decoder_output[0, 0, :])
        if sampled_token_index ==0:
            break
        if sampled_token_index == French_word2text['end'] or len(output_seq) >= trg_max_len- 1:
            stop_condition=True

        output_seq.append(sampled_token_index)
        target_seq = np.array([[sampled_token_index]])
        state_h,state_c=state_h1,state_c1

    return output_seq


In [96]:
test_english_texts = [
    "I like deep learning.",
    "AI is changing the world.",
    "natural language processing is exciting."
]

test_french_texts = [
    "J'aime l'apprentissage profond.",
    "L'IA change le monde.",
    "Le traitement du langage naturel est passionnant."
]

test_english_sequences = EnglishTokenizer.texts_to_sequences(test_english_texts)
test_french_sequences = FrenchTokenizer.texts_to_sequences(test_french_texts)

test_english_padded_sequences = pad_sequences(test_english_sequences, maxlen=src_max_len)
test_french_padded_sequences = pad_sequences(test_french_sequences, maxlen=trg_max_len)

In [84]:
seq_gred=greedy_decode_sequence(test_english_padded_sequences[1])



In [85]:
seq_gred

[2, 25, 25, 25, 25, 25, 25, 25]

In [90]:
French_text2word=FrenchTokenizer.index_word

In [92]:
m=""
for i in seq_gred:
    m+=French_text2word[i]+" "
m

'start sont sont sont sont sont sont sont '

In [95]:
test_french_texts[1]

"L'IA change le monde."