In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from nltk.corpus import reuters
import nltk

# Download the Reuters dataset
nltk.download('reuters')

# Load the Reuters dataset
docs = reuters.fileids()
train_docs_id = list(filter(lambda doc: doc.startswith("train"), docs))
test_docs_id = list(filter(lambda doc: doc.startswith("test"), docs))

train_docs = [reuters.raw(doc_id) for doc_id in train_docs_id]
test_docs = [reuters.raw(doc_id) for doc_id in test_docs_id]

# Tokenize and preprocess the text data
max_words = 10000
max_len = 100  # Adjust as needed

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(train_docs)
word_index = tokenizer.word_index

X_train_seq = tokenizer.texts_to_sequences(train_docs)
X_test_seq = tokenizer.texts_to_sequences(test_docs)

X_train = pad_sequences(X_train_seq, maxlen=max_len, padding='post')
X_test = pad_sequences(X_test_seq, maxlen=max_len, padding='post')

# Create target summaries
Y_train = tokenizer.texts_to_sequences([reuters.raw(doc_id).split('\n')[0] for doc_id in train_docs_id])
Y_test = tokenizer.texts_to_sequences([reuters.raw(doc_id).split('\n')[0] for doc_id in test_docs_id])

# Ensure that sequences have the same length as input sequences
Y_train = pad_sequences(Y_train, maxlen=max_len, padding='post')
Y_test = pad_sequences(Y_test, maxlen=max_len, padding='post')



[nltk_data] Downloading package reuters to
[nltk_data]     C:\Users\jassu\AppData\Roaming\nltk_data...
[nltk_data]   Package reuters is already up-to-date!


In [26]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Input, Bidirectional, Concatenate, MultiHeadAttention, TimeDistributed

latent_size_1 = 256
latent_size_2 = 256
latent_size_3 = 128
embedding_dim = 200
batch_size = 64
vocab_size = len(word_index) + 1
num_heads = 4  # Number of attention heads

# Encoder
encoder_inputs = Input(shape=(max_len,))
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len)
embedded_inputs = embedding_layer(encoder_inputs)

encoder_lstm_1 = Bidirectional(LSTM(latent_size_1, return_sequences=True))(embedded_inputs)
encoder_lstm_2 = Bidirectional(LSTM(latent_size_2, return_sequences=True))(encoder_lstm_1)

# Decoder
decoder_inputs = Input(shape=(max_len,))
decoder_embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len)
decoder_embedded_inputs = decoder_embedding_layer(decoder_inputs)

decoder_lstm_1 = LSTM(latent_size_2, return_sequences=True)
decoder_lstm_2 = LSTM(latent_size_3, return_sequences=True)

decoder_lstm_1_out = decoder_lstm_1(decoder_embedded_inputs)
decoder_lstm_2_out = decoder_lstm_2(decoder_lstm_1_out)

# Multi-head attention mechanism
attention = MultiHeadAttention(num_heads=num_heads, key_dim=latent_size_3 // num_heads)(query=decoder_lstm_2_out, value=encoder_lstm_2)

# Apply attention to each time step
attention = TimeDistributed(Dense(latent_size_3))(attention)

# Concatenate attention output and decoder LSTM output
decoder_combined_context = Concatenate(axis=-1)([decoder_lstm_2_out, attention])

decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_combined_context)

# Model
seq2seq_Model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
seq2seq_Model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Training
seq2seq_Model.fit([X_train, Y_train], Y_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluation
loss, accuracy = seq2seq_Model.evaluate([X_test, Y_test], Y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.25011926889419556, Test Accuracy: 0.9613050818443298
