In [1]:
import keras

In [2]:
import tensorflow as tf

In [3]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
import numpy
import  numpy as np

In [5]:
english_texts = [
    "I love natural language processing.",
    "TensorFlow is a powerful framework.",
    "Machine learning is fascinating.",
    "This is an example text.",
    "Neural networks are amazing.",
    "AI is the future of technology.",
    "I love natural language processing.",
    "TensorFlow is a powerful framework.",
    "Machine learning is fascinating.",
    "This is an example text.",
    "Neural networks are amazing.",
    "AI is the future of technology.",
    "I love natural language processing.",
    "TensorFlow is a powerful framework.",
    "Machine learning is fascinating.",
    "This is an example text.",
    "Neural networks are amazing.",
    "AI is the future of technology."
    
]

# French example sentences (translations of English sentences)
french_texts = [
    "J'adore le traitement du langage naturel.",
    "TensorFlow est un cadre puissant.",
    "L'apprentissage automatique est fascinant.",
    "Ceci est un exemple de texte.",
    "Les réseaux neuronaux sont incroyables.",
    "L'IA est l'avenir de la technologie.",
    "J'adore le traitement du langage naturel.",
    "TensorFlow est un cadre puissant.",
    "L'apprentissage automatique est fascinant.",
    "Ceci est un exemple de texte.",
    "Les réseaux neuronaux sont incroyables.",
    "L'IA est l'avenir de la technologie.",
    "J'adore le traitement du langage naturel.",
    "TensorFlow est un cadre puissant.",
    "L'apprentissage automatique est fascinant.",
    "Ceci est un exemple de texte.",
    "Les réseaux neuronaux sont incroyables.",
    "L'IA est l'avenir de la technologie."
]

In [6]:
english_tokenizer = Tokenizer(oov_token="UNK")
english_tokenizer.fit_on_texts(english_texts)
english_word_index = english_tokenizer.word_index

french_tokenizer = Tokenizer(oov_token="UNK")
french_tokenizer.fit_on_texts(french_texts)
french_word_index = french_tokenizer.word_index

# Convert text data to sequences of integers
english_sequences = english_tokenizer.texts_to_sequences(english_texts)
french_sequences = french_tokenizer.texts_to_sequences(french_texts)

# Pad sequences to have the same length for modeling
max_sequence_length = 8
english_padded_sequences = pad_sequences(english_sequences, maxlen=max_sequence_length)
french_padded_sequences = pad_sequences(french_sequences, maxlen=max_sequence_length)


In [7]:
french_word_index["start"]=len(french_word_index)+1

In [8]:
french_word_index["end"]=len(french_word_index)+1

In [9]:
from keras.layers import LSTM,Input

In [10]:
class TextClassificationModel(tf.keras.Model):
    def __init__(self,i_vocab_size,t_vocab_size,embedding_dim,max_sequence_length):
        super(TextClassificationModel, self).__init__()
        self.embedding_layer1 = tf.keras.layers.Embedding(i_vocab_size, embedding_dim, input_length=max_sequence_length)
        self.lstm1 = LSTM(10,return_sequences=True,return_state=True)
        self.embedding_layer2 = tf.keras.layers.Embedding(t_vocab_size, embedding_dim, input_length=max_sequence_length)
        self.lstm2 = LSTM(10,return_sequences=True,return_state=True)
        self.dense_layer = tf.keras.layers.Dense(t_vocab_size, activation='softmax')

    def call(self, inputs):
        e_inp,d_inp=inputs
        x = self.embedding_layer1(e_inp)
        e_op,h,c= self.lstm1(x)
        y = self.embedding_layer2(d_inp)
        d_op,h1,c1= self.lstm2(y,initial_state=[h,c])
        output = self.dense_layer(d_op)
        return output


embedding_dim = 100
t_vocab_size = len(french_word_index) + 1
i_vocab_size=len(english_word_index)+1

model = TextClassificationModel(i_vocab_size,t_vocab_size,embedding_dim,max_sequence_length)

In [11]:
loss_function=tf.keras.losses.SparseCategoricalCrossentropy()

In [12]:
optimizer=tf.keras.optimizers.Adam(learning_rate=0.7,clipnorm=5)

## Training

In [13]:
def train_step(inputs, targets):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_function(targets, predictions)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    return loss

In [14]:
batch_size=3

In [15]:
epochs=10
for epoch in range(epochs):
    total_loss = 0.0
    steps_per_epoch = len(english_padded_sequences) // batch_size

    for step in range(steps_per_epoch):
        start = step * batch_size
        end = (step + 1) * batch_size

        batch_inputs = english_padded_sequences[start:end]
        batch_targets = french_padded_sequences[start:end]
        loss = train_step([batch_inputs,batch_targets],batch_targets)
        total_loss += loss

    average_loss = total_loss / steps_per_epoch
    print(f"Epoch {epoch + 1}/{epochs} - Loss: {average_loss:.4f}")

Epoch 1/10 - Loss: 3.6079
Epoch 2/10 - Loss: 2.3262
Epoch 3/10 - Loss: 1.5869
Epoch 4/10 - Loss: 1.3139
Epoch 5/10 - Loss: 0.9162
Epoch 6/10 - Loss: 0.5964
Epoch 7/10 - Loss: 0.6393
Epoch 8/10 - Loss: 0.5795
Epoch 9/10 - Loss: 0.5765
Epoch 10/10 - Loss: 0.8834


In [16]:
model.summary()

Model: "text_classification_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  2800      
                                                                 
 lstm (LSTM)                 multiple                  4440      
                                                                 
 embedding_1 (Embedding)     multiple                  3100      
                                                                 
 lstm_1 (LSTM)               multiple                  4440      
                                                                 
 dense (Dense)               multiple                  341       
                                                                 
Total params: 15121 (59.07 KB)
Trainable params: 15121 (59.07 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [17]:
test_english_texts = [
    "I like deep learning.",
    "AI is changing the world.",
    "natural language processing is exciting."
]

test_french_texts = [
    "J'aime l'apprentissage profond.",
    "L'IA change le monde.",
    "Le traitement du langage naturel est passionnant."
]

test_english_sequences = english_tokenizer.texts_to_sequences(test_english_texts)
test_french_sequences = french_tokenizer.texts_to_sequences(test_french_texts)

test_english_padded_sequences = pad_sequences(test_english_sequences, maxlen=max_sequence_length)
test_french_padded_sequences = pad_sequences(test_french_sequences, maxlen=max_sequence_length)


In [18]:
encoder_inputs = tf.keras.layers.Input(shape=(max_sequence_length,))
encoder_embedding = model.layers[0](encoder_inputs)
encoder_outputs, state_h, state_c = model.layers[1](encoder_embedding)
encoder_model = tf.keras.Model(encoder_inputs, [encoder_outputs,state_h,state_c])

In [19]:
encoder_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 8)]               0         
                                                                 
 embedding (Embedding)       (None, 8, 100)            2800      
                                                                 
 lstm (LSTM)                 [(None, 8, 10),           4440      
                              (None, 10),                        
                              (None, 10)]                        
                                                                 
Total params: 7240 (28.28 KB)
Trainable params: 7240 (28.28 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [20]:
decoder_inputs = tf.keras.layers.Input(shape=(max_sequence_length,))
decoder_state_input_h = tf.keras.layers.Input(shape=(None,))
decoder_state_input_c = tf.keras.layers.Input(shape=(None,))
decoder_state_inputs = tf.keras.layers.Input(shape=(max_sequence_length,None))
decoder_embedding = model.layers[2](decoder_inputs)
decoder_outputs, state_h, state_c = model.layers[3](decoder_embedding, 
                                                    initial_state=[decoder_state_input_h, decoder_state_input_c])
decoder_outputs = model.layers[4](decoder_outputs)
decoder_model = tf.keras.Model([decoder_inputs] + [decoder_state_inputs,decoder_state_input_h, decoder_state_input_c], 
                               [decoder_outputs]+[state_h, state_c])

In [21]:
decoder_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 8)]                  0         []                            
                                                                                                  
 embedding_1 (Embedding)     (None, 8, 100)               3100      ['input_2[0][0]']             
                                                                                                  
 input_3 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, None)]               0         []                            
                                                                                            

In [22]:
def greedy_decode_sequence(input_seq):
    input_seq=np.expand_dims(input_seq,axis=0)
    encoder_output, state_h, state_c = encoder_model.predict(input_seq)
    target_seq = np.array([[french_word_index['start']]])
    output_seq = []
    stop_condition=False
    while not stop_condition:
        decoder_output, state_h1, state_c1 = decoder_model.predict([target_seq]+[encoder_output,state_h, state_c])

        # Get the next token index (greedy decoding)
        sampled_token_index = np.argmax(decoder_output[0, 0, :])
        if sampled_token_index ==0:
            break
        if sampled_token_index == french_word_index['end'] or len(output_seq) >= max_sequence_length - 1:
            stop_condition=True

        output_seq.append(sampled_token_index)
        target_seq = np.array([[sampled_token_index]])
        state_h,state_c=state_h1,state_c1

    return output_seq


In [23]:
english_index_word = english_tokenizer.index_word
french_index_word = french_tokenizer.index_word

In [55]:
test_english_padded_sequences[2]

array([0, 0, 0, 5, 6, 7, 2, 1])

In [56]:
seq_gred=greedy_decode_sequence(test_english_padded_sequences[2])



In [57]:
seq_gred

[16, 16, 16, 16, 16, 16, 16, 16]

In [58]:
l=""
for i in seq_gred:
    l+=french_index_word[i]

In [59]:
l

'fascinantfascinantfascinantfascinantfascinantfascinantfascinantfascinant'

In [65]:
def beam_search_infer(input_seq, beam_width=3):
    input_seq=np.expand_dims(input_seq,axis=0)
    encoder_output, state_h, state_c = encoder_model.predict(input_seq)
    target_seq = np.array([[french_word_index['start']]])

    sequences = [{'seq': target_seq, 'prob': 1.0, 'state': [state_h, state_c]}]

    for _ in range(max_sequence_length):
        all_candidates = []

        for seq in sequences:
            target_seq, state = seq['seq'], seq['state']
            decoder_output, state_h1, state_c1 = decoder_model.predict([target_seq]+[encoder_output,state])
            top_k_indices = np.argsort(decoder_output[0, 0, :])[-beam_width:]

            for idx in top_k_indices:
                candidate_seq = np.array([[idx]])
                candidate_prob = seq['prob'] * decoder_output[0, 0, idx]
                all_candidates.append({'seq': candidate_seq, 'prob': candidate_prob, 'state': [state_h1, state_c1]})

        ordered = sorted(all_candidates, key=lambda x: x['prob'], reverse=True)
        sequences = ordered[:beam_width]

    output_seq = sequences[0]['seq'][0]

    return output_seq

In [77]:
seq=beam_search_infer(test_english_padded_sequences[2],2)



In [72]:
seq

array([16], dtype=int64)

In [73]:
m=""
for i in seq:
    m+=french_index_word[i]
m

'fascinant'

In [79]:
import tensorflow as tf

def beam_search_infer2(input_seq, beam_width=3):
    input_seq = np.expand_dims(input_seq, axis=0)
    encoder_output, state_h, state_c = encoder_model.predict(input_seq)
    target_seq = np.array([[french_word_index['start']]])

    sequences = [{'seq': target_seq, 'prob': 1.0, 'state': [state_h, state_c]}]

    for _ in range(max_sequence_length):
        all_candidates = []

        for seq in sequences:
            target_seq, state = seq['seq'], seq['state']
            decoder_output, state_h1, state_c1 = decoder_model.predict([target_seq] + [encoder_output, state])
            log_probs = tf.math.log(decoder_output)  # Using log probabilities
            top_k_values, top_k_indices = tf.math.top_k(log_probs[0, 0, :], k=beam_width)  # Get top-k indices

            for idx in top_k_indices.numpy():  # Convert TensorFlow tensor to NumPy array
                candidate_seq = np.array([[idx]])
                candidate_prob = seq['prob'] * tf.math.exp(log_probs[0, 0, idx])  # Convert log prob back to normal prob
                all_candidates.append({'seq': candidate_seq, 'prob': candidate_prob, 'state': [state_h1, state_c1]})

        ordered = sorted(all_candidates, key=lambda x: x['prob'], reverse=True)
        sequences = ordered[:beam_width]

    output_seq = sequences[0]['seq'][0]

    return output_seq


In [80]:
seq2=beam_search_infer2(test_english_padded_sequences[2],2)



In [81]:
seq2

array([16])