In [2]:
import tensorflow as tf

In [3]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
import numpy
import  numpy as np

In [5]:
# English example sentences
english_texts = [
    "I love natural language processing.",
    "TensorFlow is a powerful framework.",
    "Machine learning is fascinating.",
    "This is an example text.",
    "Neural networks are amazing.",
    "AI is the future of technology."
]

# French example sentences (translations of English sentences)
french_texts = [
    "J'adore le traitement du langage naturel.",
    "TensorFlow est un cadre puissant.",
    "L'apprentissage automatique est fascinant.",
    "Ceci est un exemple de texte.",
    "Les réseaux neuronaux sont incroyables.",
    "L'IA est l'avenir de la technologie."
]

In [6]:
# Tokenize the English and French sentences separately with OOV token
english_tokenizer = Tokenizer(oov_token="UNK")
english_tokenizer.fit_on_texts(english_texts)
english_word_index = english_tokenizer.word_index

french_tokenizer = Tokenizer(oov_token="UNK")
french_tokenizer.fit_on_texts(french_texts)
french_word_index = french_tokenizer.word_index

# Convert text data to sequences of integers
english_sequences = english_tokenizer.texts_to_sequences(english_texts)
french_sequences = french_tokenizer.texts_to_sequences(french_texts)

# Pad sequences to have the same length for modeling
max_sequence_length = 8
english_padded_sequences = pad_sequences(english_sequences, maxlen=max_sequence_length)
french_padded_sequences = pad_sequences(french_sequences, maxlen=max_sequence_length)


In [7]:
french_word_index["start"]=len(french_word_index)+1

In [8]:
from keras.layers import LSTM,Input

In [19]:
class TextClassificationModel(tf.keras.Model):
    def __init__(self,i_vocab_size,t_vocab_size,embedding_dim,max_sequence_length):
        super(TextClassificationModel, self).__init__()
        self.embedding_layer1 = tf.keras.layers.Embedding(i_vocab_size, embedding_dim, input_length=max_sequence_length)
        self.lstm1 = LSTM(10,return_sequences=True,return_state=True)
        self.embedding_layer2 = tf.keras.layers.Embedding(t_vocab_size, embedding_dim, input_length=max_sequence_length)
        self.lstm2 = LSTM(10,return_sequences=True,return_state=True)
        self.dense_layer = tf.keras.layers.Dense(t_vocab_size, activation='softmax')

    def call(self, inputs):
        e_inp,d_inp=inputs
        x = self.embedding_layer1(e_inp)
        e_op,h,c= self.lstm1(x)
        y = self.embedding_layer2(d_inp)
        d_op,h1,c1= self.lstm2(y,initial_state=[h,c])
        output = self.dense_layer(d_op)
        return output


embedding_dim = 8
t_vocab_size = len(french_word_index) + 1
i_vocab_size=len(english_word_index)+1

model = TextClassificationModel(i_vocab_size,t_vocab_size,embedding_dim,max_sequence_length)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
french_one_hot=tf.one_hot(french_padded_sequences,depth=t_vocab_size)

In [21]:
model.fit([english_padded_sequences, french_padded_sequences],french_one_hot,
          epochs=7)

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.src.callbacks.History at 0x217a3adea70>

In [22]:
model.summary()

Model: "text_classification_model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     multiple                  224       
                                                                 
 lstm_6 (LSTM)               multiple                  760       
                                                                 
 embedding_7 (Embedding)     multiple                  240       
                                                                 
 lstm_7 (LSTM)               multiple                  760       
                                                                 
 dense_3 (Dense)             multiple                  330       
                                                                 
Total params: 2314 (9.04 KB)
Trainable params: 2314 (9.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
index_french_word = french_tokenizer.index_word

In [24]:
index_english_word = english_tokenizer.index_word

In [25]:
new_english_text = "The application of AI in robotics is fascinating."

In [26]:
new_english_sequence = english_tokenizer.texts_to_sequences([new_english_text])

In [27]:
new_english_padded_sequences = pad_sequences(new_english_sequence, maxlen=max_sequence_length)

In [28]:
tar_seq=np.zeros((1,1))

In [29]:
tar_seq[0,0]=french_word_index["start"]

In [35]:
tar_seq

array([[29.]])

In [30]:
translated_french_sequences = model.predict([new_english_padded_sequences,tar_seq])



In [31]:
translated_french_sequences

array([[[0.03434602, 0.03346131, 0.03403141, 0.03376238, 0.03365267,
         0.03312952, 0.03314159, 0.03334253, 0.03353604, 0.03329704,
         0.03338773, 0.03333544, 0.03325678, 0.03292664, 0.03323413,
         0.03326814, 0.0330774 , 0.03296291, 0.03292364, 0.03338081,
         0.033292  , 0.03309099, 0.03320865, 0.03360784, 0.03321854,
         0.03340982, 0.03349654, 0.03279203, 0.03320983, 0.03321964]]],
      dtype=float32)

In [32]:
translated_french_sequences[0]

array([[0.03434602, 0.03346131, 0.03403141, 0.03376238, 0.03365267,
        0.03312952, 0.03314159, 0.03334253, 0.03353604, 0.03329704,
        0.03338773, 0.03333544, 0.03325678, 0.03292664, 0.03323413,
        0.03326814, 0.0330774 , 0.03296291, 0.03292364, 0.03338081,
        0.033292  , 0.03309099, 0.03320865, 0.03360784, 0.03321854,
        0.03340982, 0.03349654, 0.03279203, 0.03320983, 0.03321964]],
      dtype=float32)

In [33]:
translated_french_text = french_tokenizer.sequences_to_texts(translated_french_sequences[0])

In [34]:
translated_french_text

['UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK']