In [1]:
import tensorflow as tf

In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
import numpy
import  numpy as np

In [32]:
english_texts = [
    "I love natural language processing.",
    "TensorFlow is a powerful framework.",
    "Machine learning is fascinating.",
    "This is an example text.",
    "Neural networks are amazing.",
    "AI is the future of technology."
]

# French example sentences (translations of English sentences)
french_texts = [
    "J'adore le traitement du langage naturel.",
    "TensorFlow est un cadre puissant.",
    "L'apprentissage automatique est fascinant.",
    "Ceci est un exemple de texte.",
    "Les r√©seaux neuronaux sont incroyables.",
    "L'IA est l'avenir de la technologie."
]

In [33]:
french_texts=["start"+" "+sentence+" "+"end" for sentence in french_texts]

In [34]:
french_texts[0]

"start J'adore le traitement du langage naturel. end"

In [42]:
# Tokenize the English and French sentences separately with OOV token
english_tokenizer = Tokenizer(oov_token="UNK")
english_tokenizer.fit_on_texts(english_texts)
english_word_index = english_tokenizer.word_index

french_tokenizer = Tokenizer(oov_token="UNK")
french_tokenizer.fit_on_texts(french_texts)
french_word_index = french_tokenizer.word_index

# Convert text data to sequences of integers
english_sequences = english_tokenizer.texts_to_sequences(english_texts)
french_sequences = french_tokenizer.texts_to_sequences(french_texts)

In [43]:
english_sequences=[sequence[::-1] for sequence in english_sequences]

In [48]:
eng_max_len=max(len(i) for i in english_sequences)

In [49]:
fre_max_len=max(len(i) for i in french_sequences)

In [46]:
eng_max_len,fre_max_len

(6, 8)

In [50]:
english_padded_sequences = pad_sequences(english_sequences, maxlen=eng_max_len)
french_padded_sequences = pad_sequences(french_sequences, maxlen=fre_max_len)

In [51]:
from keras.layers import LSTM,Input

In [140]:
class TextClassificationModel(tf.keras.Model):
    def __init__(self,i_vocab_size,t_vocab_size,embedding_dim,eng_max_len,fre_max_len):
        super(TextClassificationModel, self).__init__()
        self.embedding_layer1 = tf.keras.layers.Embedding(i_vocab_size, embedding_dim, input_length=eng_max_len,trainable=True)
        self.lstm1 = LSTM(10,kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.08, maxval=0.08),
                          return_sequences=True,return_state=True)
        self.embedding_layer2 = tf.keras.layers.Embedding(t_vocab_size, embedding_dim, input_length=fre_max_len,trainable=True)
        self.lstm2 = LSTM(10,kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.08, maxval=0.08)
                          ,return_sequences=True,return_state=True)
        self.dense_layer = tf.keras.layers.Dense(t_vocab_size, activation='softmax')

    def call(self, inputs):
        e_inp,d_inp=inputs
        x = self.embedding_layer1(e_inp)
        e_op,h,c= self.lstm1(x)
        y = self.embedding_layer2(d_inp)
        d_op,h1,c1= self.lstm2(y,initial_state=[h,c])
        output = self.dense_layer(d_op)
        return output


embedding_dim = 8
t_vocab_size = len(french_word_index) + 1
i_vocab_size=len(english_word_index)+1

model = TextClassificationModel(i_vocab_size,t_vocab_size,embedding_dim,eng_max_len,fre_max_len)

In [141]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.7,clipnorm=0.1)

In [142]:
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [190]:
french_one_hot=tf.one_hot(french_padded_sequences,depth=t_vocab_size)

In [191]:
model.fit([english_padded_sequences, french_padded_sequences],french_one_hot,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x22c7b4fb460>

In [192]:
model.summary()

Model: "text_classification_model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     multiple                  224       
                                                                 
 lstm_4 (LSTM)               multiple                  760       
                                                                 
 embedding_5 (Embedding)     multiple                  248       
                                                                 
 lstm_5 (LSTM)               multiple                  760       
                                                                 
 dense_2 (Dense)             multiple                  341       
                                                                 
Total params: 2333 (9.11 KB)
Trainable params: 2333 (9.11 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [175]:
test_english_texts = [
    "I like deep learning.",
    "AI is changing the world.",
    "Natural language processing is exciting."
]

test_french_texts = [
    "J'aime l'apprentissage profond.",
    "L'IA change le monde.",
    "Le traitement du langage naturel est passionnant."
]

test_english_sequences = english_tokenizer.texts_to_sequences(test_english_texts)
test_french_sequences = french_tokenizer.texts_to_sequences(test_french_texts)

test_english_sequences=[sequence[::-1] for sequence in test_english_sequences]

test_english_padded_sequences = pad_sequences(test_english_sequences, maxlen=eng_max_len)
test_french_padded_sequences = pad_sequences(test_french_sequences, maxlen=fre_max_len)


In [178]:
tets_french_one_hot=tf.one_hot(test_french_padded_sequences,depth=t_vocab_size)

In [180]:
test_loss, test_accuracy = model.evaluate([test_english_padded_sequences,
                                           test_french_padded_sequences], tets_french_one_hot)




In [146]:
index_french_word = french_tokenizer.index_word

In [147]:
index_english_word = english_tokenizer.index_word

In [148]:
new_english_text = "I love natural language processing."

In [149]:
new_english_text=new_english_text+" "+"end"

In [150]:
new_english_text

'I love natural language processing. end'

In [151]:
new_english_sequence = english_tokenizer.texts_to_sequences([new_english_text])

In [152]:
new_reversed_sequence = new_english_sequence[0][::-1]

In [153]:
new_reversed_sequence

[1, 7, 6, 5, 4, 3]

In [154]:
eng_max_len

6

In [155]:
new_padded_sequence = pad_sequences([new_reversed_sequence], maxlen=eng_max_len)

In [156]:
new_padded_sequence 

array([[1, 7, 6, 5, 4, 3]])

In [157]:
tar_seq=np.zeros((1,1))

In [158]:
tar_seq[0,0]=french_word_index["start"]

In [159]:
translated_french_sequences = model.predict([new_padded_sequence ,tar_seq])



In [160]:
translated_french_sequences

array([[[2.6552029e-06, 2.6170757e-10, 9.9983954e-01, 2.5502958e-17,
         5.3306226e-10, 3.6464646e-15, 5.4196370e-15, 6.2110836e-08,
         8.6082549e-14, 4.7857489e-16, 4.4316062e-14, 3.4292993e-09,
         1.8900453e-14, 3.8516665e-09, 4.4798339e-11, 4.4302642e-17,
         1.5669240e-04, 1.7168793e-07, 8.2010611e-14, 2.1472220e-08,
         2.9008165e-12, 6.3158164e-12, 3.6721698e-08, 4.2745532e-07,
         3.5540418e-07, 1.0523217e-14, 7.5257016e-18, 1.8769233e-09,
         7.0212679e-15, 1.9555265e-12, 5.1139431e-12]]], dtype=float32)

In [171]:
translated_french_sequences[0][0][2]=10

In [172]:
translated_french_sequences

array([[[2.6552029e-06, 2.6170757e-10, 1.0000000e+01, 2.5502958e-17,
         5.3306226e-10, 3.6464646e-15, 5.4196370e-15, 6.2110836e-08,
         8.6082549e-14, 4.7857489e-16, 4.4316062e-14, 3.4292993e-09,
         1.8900453e-14, 3.8516665e-09, 4.4798339e-11, 4.4302642e-17,
         1.5669240e-04, 1.7168793e-07, 8.2010611e-14, 2.1472220e-08,
         2.9008165e-12, 6.3158164e-12, 3.6721698e-08, 4.2745532e-07,
         3.5540418e-07, 1.0523217e-14, 7.5257016e-18, 1.8769233e-09,
         7.0212679e-15, 1.9555265e-12, 5.1139431e-12]]], dtype=float32)

In [173]:
translated_french_text = french_tokenizer.sequences_to_texts(translated_french_sequences[0])

In [174]:
translated_french_text

['UNK UNK du UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK']

In [127]:
english_word_index

{'UNK': 1,
 'is': 2,
 'i': 3,
 'love': 4,
 'natural': 5,
 'language': 6,
 'processing': 7,
 'tensorflow': 8,
 'a': 9,
 'powerful': 10,
 'framework': 11,
 'machine': 12,
 'learning': 13,
 'fascinating': 14,
 'this': 15,
 'an': 16,
 'example': 17,
 'text': 18,
 'neural': 19,
 'networks': 20,
 'are': 21,
 'amazing': 22,
 'ai': 23,
 'the': 24,
 'future': 25,
 'of': 26,
 'technology': 27}