# Aim:

To implement and train Sequence-to-Sequence Encoder-Decoder models for neural language translation (English-Hindi, English-Spanish), and apply encoder-decoder architectures to real-world problem formulations such as text summarization and image captioning

In [None]:
import csv
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense

df = pd.read_csv('Hindi_English_Truncated_Corpus.csv').sample(2000, random_state=42)
df

Unnamed: 0,source,english_sentence,hindi_sentence
3556,tides,He declares the result and reports it to the E...,वही परिणाम की घोषणा करता है और निर्वाचन आयोग क...
25899,ted,was a little uncomfortable for them.,थोडा कठिन था।
89038,indic2012,But Mulla Assamudin was proved to be not eligi...,मगर मुल्ला असमुद्दीन अक्षम सिद्ध हुए।
78212,ted,I would never have to make a book and then pre...,मुझे कभी भी किताब बना कर किसी प्रदर्शन-स्थल को...
96955,indic2012,headind kaun banega crorepati,शीर्षक कौन बनेगा करोड़पति (Kaun Banega Crorepa...
...,...,...,...
15990,indic2012,positive feedback which is co2 and ch4 is form...,सकरामातक पुननिर्वेशन (Positive feedback) जो की...
20160,ted,"It's still heavy now, and it was heavy before ...","अभी भी ये बड़ी बात है, उससे पहले भी यह बड़ी बा..."
97112,ted,(Applause),(तालियाँ)
111071,tides,I am more accustomed to the various dramas tha...,मैं अमेरिका और ब्रिटेन में बनने वाली फिल्मों क...


In [67]:
english_sentences = df['english_sentence'].astype(str).values
hindi_sentences = [f'<start> {sent} <end>' for sent in df['hindi_sentence'].astype(str).values]

In [None]:
eng_tokenizer = Tokenizer(filters='')
eng_tokenizer.fit_on_texts(english_sentences)
eng_sequences = eng_tokenizer.texts_to_sequences(english_sentences)
eng_vocab_size = len(eng_tokenizer.word_index) + 1
max_eng_len = max(len(seq) for seq in eng_sequences)

hin_tokenizer = Tokenizer(filters='')
hin_tokenizer.fit_on_texts(hindi_sentences)
hin_sequences = hin_tokenizer.texts_to_sequences(hindi_sentences)
hin_vocab_size = len(hin_tokenizer.word_index) + 1
max_hin_len = max(len(seq) for seq in hin_sequences)

In [69]:
eng_padded = pad_sequences(eng_sequences, maxlen=max_eng_len, padding='post')
hin_padded = pad_sequences(hin_sequences, maxlen=max_hin_len, padding='post')

In [None]:
decoder_target_data = np.zeros_like(hin_padded)

In [71]:
for i, seq in enumerate(hin_padded):
    decoder_target_data[i, :-1] = seq[1:]

In [None]:
embedding_dim = 256
latent_dim = 512

In [None]:
encoder_inputs = Input(shape=(max_eng_len,))
enc_emb = Embedding(eng_vocab_size, embedding_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
_, state_h, state_c = encoder_lstm(enc_emb)
encoder_states = [state_h, state_c]

In [None]:
decoder_inputs = Input(shape=(max_hin_len,))
dec_emb = Embedding(hin_vocab_size, embedding_dim, mask_zero=True)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)
decoder_dense = Dense(hin_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [None]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()

In [None]:
model.fit(
    [eng_padded, hin_padded],
    decoder_target_data,
    batch_size=64,
    epochs=10,
    validation_split=0.2
)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 3s/step - loss: 8.3916 - val_loss: 7.2945
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 3s/step - loss: 6.8078 - val_loss: 7.0820
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 3s/step - loss: 6.4633 - val_loss: 6.9587
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 3s/step - loss: 6.2954 - val_loss: 6.9481
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 3s/step - loss: 6.1920 - val_loss: 6.9208
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 3s/step - loss: 5.9207 - val_loss: 6.9172
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 3s/step - loss: 5.7565 - val_loss: 6.9202
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 3s/step - loss: 5.6566 - val_loss: 6.9433
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x36004fdc0>

In [None]:
encoder_model = Model(encoder_inputs, encoder_states)

In [None]:
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(dec_emb, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

In [None]:
def translate_sentence(input_text):
    input_seq = eng_tokenizer.texts_to_sequences([input_text])
    input_seq = pad_sequences(input_seq, maxlen=max_eng_len, padding='post')

    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = hin_tokenizer.word_index.get('<start>', 0)

    decoded_sentence = ''
    stop_condition = False
    step_count = 0

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = hin_tokenizer.index_word.get(sampled_token_index, '')

        print(f"Step {step_count}: Predicted Token: {sampled_word} (Index: {sampled_token_index})")

        if sampled_word == '<end>' or len(decoded_sentence) > max_hin_len:
            stop_condition = True
        else:
            decoded_sentence += ' ' + sampled_word

        target_seq[0, 0] = sampled_token_index
        states_value = [h, c]
        step_count += 1

        if step_count > max_hin_len:
            print("Stopping due to max length reached!")
            break

    return decoded_sentence.strip()

In [81]:
while True:
    user_input = input("Enter an English sentence (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        print("Exiting the translator. Goodbye!")
        break
    hindi_translation = translate_sentence(user_input)
    print(f"Translated Hindi: {hindi_translation}\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Step 0: Predicted Token: और (Index: 8)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Step 1: Predicted Token: एक (Index: 13)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Step 2: Predicted Token: बारे (Index: 87)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Step 3: Predicted Token: के (Index: 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Step 4: Predicted Token: लिए (Index: 23)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Step 5: Predicted Token: <end> (Index: 2)
Translated Hindi: और एक बारे के लिए

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Step 0: Predicted Token: जो (Index: 20)
[1m1/1[0m [32m━━━━━━━━━━━━━━━

2. Implement and train Engish to Spanish Machine Translation for the Hugging Face opus_books dataset

In [145]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Bidirectional, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from datasets import load_dataset

In [87]:
dataset = load_dataset("opus_books", "en-es")

Generating train split: 100%|██████████| 93470/93470 [00:00<00:00, 1605333.05 examples/s]


In [96]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['id', 'translation'],
        num_rows: 93470
    })
})


In [152]:
train_data = dataset["train"].select(range(2500))
english_sentences = [item["translation"]["en"] for item in train_data]
spanish_sentences = [item["translation"]["es"] for item in train_data]

In [153]:
print(f"Sample English: {english_sentences[5]}")
print(f"Sample Spanish: {spanish_sentences[5]}")

Sample English: Their estate was large, and their residence was at Norland Park, in the centre of their property, where, for many generations, they had lived in so respectable a manner as to engage the general good opinion of their surrounding acquaintance.
Sample Spanish: Su propiedad era de buen tamaño, y en el centro de ella se encontraba la residencia, Norland Park, donde la manera tan digna en que habían vivido por muchas generaciones llegó a granjearles el respeto de todos los conocidos del lugar.


In [154]:
spanish_sentences = [f"<start> {sent} <end>" for sent in spanish_sentences]

In [None]:
eng_tokenizer = Tokenizer(filters='')
eng_tokenizer.fit_on_texts(english_sentences)
eng_sequences = eng_tokenizer.texts_to_sequences(english_sentences)
eng_vocab_size = len(eng_tokenizer.word_index) + 1
max_eng_len = max(len(seq) for seq in eng_sequences)

spa_tokenizer = Tokenizer(filters='')
spa_tokenizer.fit_on_texts(spanish_sentences)
spa_sequences = spa_tokenizer.texts_to_sequences(spanish_sentences)
spa_vocab_size = len(spa_tokenizer.word_index) + 1
max_spa_len = max(len(seq) for seq in spa_sequences)

In [156]:
eng_padded = pad_sequences(eng_sequences, maxlen=max_eng_len, padding='post')
spa_padded = pad_sequences(spa_sequences, maxlen=max_spa_len, padding='post')

In [None]:
decoder_target_data = np.zeros_like(spa_padded)
for i, seq in enumerate(spa_padded):
    decoder_target_data[i, :-1] = seq[1:]

In [None]:
embedding_dim = 300
latent_dim = 512

In [None]:
encoder_inputs = Input(shape=(max_eng_len,))
enc_emb = Embedding(eng_vocab_size, embedding_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = Bidirectional(LSTM(latent_dim, return_state=True, dropout=0.4))
encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder_lstm(enc_emb)
encoder_states = [forward_h, forward_c]

In [None]:
decoder_inputs = Input(shape=(max_spa_len,))
dec_emb = Embedding(spa_vocab_size, embedding_dim, mask_zero=True)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.4)
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)

decoder_dense = Dense(spa_vocab_size, activation="softmax")
decoder_outputs = decoder_dense(decoder_outputs)

In [None]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()

In [163]:
model.fit(
    [eng_padded, spa_padded],
    decoder_target_data,
    batch_size=64,
    epochs=10,
    validation_split=0.2,
)

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 3s/step - loss: 8.3869 - val_loss: 7.1385
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 3s/step - loss: 6.7024 - val_loss: 6.8713
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 3s/step - loss: 6.4239 - val_loss: 6.7449
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 3s/step - loss: 6.2329 - val_loss: 6.6733
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 3s/step - loss: 6.0672 - val_loss: 6.6022
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 3s/step - loss: 5.8860 - val_loss: 6.5360
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 3s/step - loss: 5.6927 - val_loss: 6.5221
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 3s/step - loss: 5.5531 - val_loss: 6.4922
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x367d88310>

In [None]:
encoder_model = Model(encoder_inputs, encoder_states)

In [None]:
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(dec_emb, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

In [None]:
def beam_search_decoder(input_text, beam_width=5, max_len=50, temperature=1.0):
    input_seq = eng_tokenizer.texts_to_sequences([input_text])
    input_seq = pad_sequences(input_seq, maxlen=max_eng_len, padding="post")

    states_value = encoder_model.predict(input_seq)

    start_token = spa_tokenizer.word_index["<start>"]
    end_token = spa_tokenizer.word_index["<end>"]

    sequences = [([start_token], 0.0, states_value)]

    final_translations = []

    for _ in range(max_len):
        new_sequences = []
        
        for seq, score, states in sequences:
            target_seq = np.zeros((1, 1))
            target_seq[0, 0] = seq[-1]

            output_tokens, h, c = decoder_model.predict([target_seq] + states)

            output_tokens = np.log(output_tokens[0, -1, :]) / temperature
            exp_tokens = np.exp(output_tokens)
            probabilities = exp_tokens / np.sum(exp_tokens)

            top_indices = np.argsort(probabilities)[-beam_width:]

            for idx in top_indices:
                new_seq = seq + [idx]
                new_score = score + np.log(probabilities[idx])
                new_states = [h, c]
                new_sequences.append((new_seq, new_score, new_states))

        sequences = sorted(new_sequences, key=lambda x: x[1], reverse=True)[:beam_width]

        for seq, score, _ in sequences:
            if seq[-1] == end_token:
                final_translations.append((seq, score))

        if len(final_translations) >= beam_width:
            break

    if final_translations:
        best_translation = sorted(final_translations, key=lambda x: x[1], reverse=True)[0][0]
    else:
        best_translation = sequences[0][0]

    translated_sentence = " ".join(spa_tokenizer.index_word.get(idx, "") for idx in best_translation if idx > 0)
    return translated_sentence.replace("<start>", "").replace("<end>", "").strip()

In [175]:
while True:
    user_input = input("Enter an English sentence (or 'exit' to quit): ")
    if user_input.lower() == "exit":
        print("Exiting the translator. ¡Adiós!")
        break
    spanish_translation = beam_search_decoder(user_input)
    print(f"Translated Spanish: {spanish_translation}\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms

3. Build any other real world application such as image captioning, summarization etc using Encoder-Decoder Architecture

In [176]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Model
from datasets import load_dataset
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [195]:
dataset = load_dataset("xsum", split="train")
articles = dataset["document"][:5000] 
summaries = dataset["summary"][:5000]

In [196]:
max_vocab = 5000
max_input_length = 300
max_output_length = 50

In [197]:
tokenizer_input = Tokenizer(num_words=max_vocab, oov_token="<OOV>")
tokenizer_output = Tokenizer(num_words=max_vocab, oov_token="<OOV>")

In [198]:
tokenizer_input.fit_on_texts(articles)
tokenizer_output.fit_on_texts(summaries)

In [199]:
input_sequences = tokenizer_input.texts_to_sequences(articles)
output_sequences = tokenizer_output.texts_to_sequences(summaries)

In [200]:
input_padded = pad_sequences(input_sequences, maxlen=max_input_length, padding="post")
output_padded = pad_sequences(output_sequences, maxlen=max_output_length, padding="post")

In [None]:
embedding_dim = 128
lstm_units = 256
vocab_size_input = len(tokenizer_input.word_index) + 1
vocab_size_output = len(tokenizer_output.word_index) + 1

In [None]:
class Encoder(Model):
    def __init__(self, vocab_size, embedding_dim, lstm_units):
        super(Encoder, self).__init__()
        self.embedding = Embedding(vocab_size, embedding_dim, mask_zero=True)
        self.lstm = LSTM(lstm_units, return_state=True)

    def call(self, x):
        x = self.embedding(x)
        output, state_h, state_c = self.lstm(x)
        return state_h, state_c

In [None]:
class Decoder(Model):
    def __init__(self, vocab_size, embedding_dim, lstm_units):
        super(Decoder, self).__init__()
        self.embedding = Embedding(vocab_size, embedding_dim, mask_zero=True)
        self.lstm = LSTM(lstm_units, return_sequences=True, return_state=True)
        self.fc = Dense(vocab_size, activation='softmax')

    def call(self, x, state_h, state_c):
        x = self.embedding(x)
        output, state_h, state_c = self.lstm(x, initial_state=[state_h, state_c])
        output = self.fc(output)
        return output, state_h, state_c


In [None]:
encoder = Encoder(vocab_size_input, embedding_dim, lstm_units)
decoder = Decoder(vocab_size_output, embedding_dim, lstm_units)

encoder_inputs = tf.keras.Input(shape=(max_input_length,))
decoder_inputs = tf.keras.Input(shape=(max_output_length,))

state_h, state_c = encoder(encoder_inputs)
decoder_outputs, _, _ = decoder(decoder_inputs, state_h, state_c)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

In [205]:
model.summary()

In [206]:
target_data = np.zeros_like(output_padded)
target_data[:, :-1] = output_padded[:, 1:]

In [207]:
model.fit([input_padded, output_padded], target_data, batch_size=32, epochs=10)

Epoch 1/10


[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 508ms/step - loss: 7.4542
Epoch 2/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 505ms/step - loss: 6.1717
Epoch 3/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 534ms/step - loss: 5.7966
Epoch 4/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 612ms/step - loss: 5.5586
Epoch 5/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 599ms/step - loss: 5.3566
Epoch 6/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 594ms/step - loss: 5.1721
Epoch 7/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 589ms/step - loss: 5.0407
Epoch 8/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 592ms/step - loss: 4.9301
Epoch 9/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 595ms/step - loss: 4.8028
Epoch 10/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s

<keras.src.callbacks.history.History at 0x35cab8d60>

In [None]:
def summarize(text):
    seq = tokenizer_input.texts_to_sequences([text])
    seq_padded = pad_sequences(seq, maxlen=max_input_length, padding="post")

    state_h, state_c = encoder(seq_padded)

    start_token = tokenizer_output.word_index.get('start', 1)
    dec_input = np.zeros((1, max_output_length))
    dec_input[0, 0] = start_token

    summary = []
    for i in range(max_output_length - 1):
        preds, state_h, state_c = decoder(dec_input, state_h, state_c)
        word_index = np.argmax(preds[0, i])

        if word_index == 0:
            break

        summary.append(word_index)
        dec_input[0, i + 1] = word_index

    return ' '.join([tokenizer_output.index_word.get(idx, '<UNK>') for idx in summary])

In [215]:
test_text = articles[5]
print("Original Text:", test_text)
print("Generated Summary:", summarize(test_text))

Original Text: Simone Favaro got the crucial try with the last move of the game, following earlier touchdowns by Chris Fusaro, Zander Fagerson and Junior Bulumakau.
Rynard Landman and Ashton Hewitt got a try in either half for the Dragons.
Glasgow showed far superior strength in depth as they took control of a messy match in the second period.
Home coach Gregor Townsend gave a debut to powerhouse Fijian-born Wallaby wing Taqele Naiyaravoro, and centre Alex Dunbar returned from long-term injury, while the Dragons gave first starts of the season to wing Aled Brew and hooker Elliot Dee.
Glasgow lost hooker Pat McArthur to an early shoulder injury but took advantage of their first pressure when Rory Clegg slotted over a penalty on 12 minutes.
It took 24 minutes for a disjointed game to produce a try as Sarel Pretorius sniped from close range and Landman forced his way over for Jason Tovey to convert - although it was the lock's last contribution as he departed with a chest injury shortly a