In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, LSTM
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
## General Configs

max_len = 200
vocab_size = 10000
embedding_dim = 128
latent_dim = 256

In [3]:
## Importing IMDB data for the text classification problem

## Only importing the train data, igoring the test data that's why kepping
## the underscor for that.

(X_train,y_train),_ = tf.keras.datasets.imdb.load_data(num_words=vocab_size)

In [4]:
len(X_train)
len(y_train)

25000

In [5]:
## Taking the subset of data for the training

X_train = X_train[:3000]
y_train = y_train[:3000]

len(X_train)
len(y_train)

3000

In [6]:
## Define the padding. Because each sentences will have the different lengths. So we need to keep all in one length

X_train = pad_sequences(X_train,maxlen=max_len, padding='post', truncating='post')

In [7]:
## Create the LSTM Model with Keras

input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(vocab_size, embedding_dim)(input_layer)
lstm_layer, state_h, state_c = LSTM(latent_dim, return_state=True)(embedding_layer)
output_layer = Dense(1, activation='sigmoid')(lstm_layer)

classification_model = Model(input_layer, output_layer)

In [8]:
classification_model.summary()

In [9]:
## Compiling the model

classification_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [10]:
## Train the model

classification_model.fit(X_train, y_train, batch_size=64, epochs=1, validation_split=0.1)

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 1s/step - accuracy: 0.4748 - loss: 0.6950 - val_accuracy: 0.5567 - val_loss: 0.6930


<keras.src.callbacks.history.History at 0x7a1880879dc0>

In [11]:
## Code to decode the embedding into the text for testing purpose
from tensorflow.keras.datasets import imdb

def decoded_review(encoded_review):

  word_index = imdb.get_word_index()

  reverse_word_index = {index + 3: word for word, index in word_index.items()}

  reverse_word_index[0] = ""
  reverse_word_index[1] = ""
  reverse_word_index[2] = ""
  reverse_word_index[3] = ""

  # Decode the review
  decoded_review = " ".join([reverse_word_index.get(i, "") for i in encoded_review[0]])

  print("****Decoded Review:****")
  print(decoded_review)

In [12]:
## Predict the sentiment

sample_review = X_train[1].reshape(1, -1)  # 1 sample with shape (1, max_len)
print("Sample Review :", decoded_review(sample_review))
prediction = classification_model.predict(sample_review)

print("Predicted sentiment probability:", prediction[0][0])
print("Predicted Sentiment:", "Positive 😊" if prediction[0][0] > 0.5 else "Negative 😞")

****Decoded Review:****
 big hair big boobs bad music and a giant safety pin these are the words to best describe this terrible movie i love cheesy horror movies and i've seen hundreds but this had got to be on of the worst ever made the plot is paper thin and ridiculous the acting is an abomination the script is completely laughable the best is the end showdown with the cop and how he worked out who the killer is it's just so damn terribly written the clothes are sickening and funny in equal  the hair is big lots of boobs  men wear those cut  shirts that show off their  sickening that men actually wore them and the music is just  trash that plays over and over again in almost every scene there is trashy music boobs and  taking away bodies and the gym still doesn't close for  all joking aside this is a truly bad film whose only charm is to look back on the disaster that was the 80's and have a good old laugh at how bad everything was back then           
Sample Review : None
[1m1/1[0

In [13]:
## Save the model

classification_model.save("lstm_imdb_classification_model.keras")

# Retraining or Finetunning the same model with different data


In [14]:
max_len = 200
vocab_size = 1000
embedding_dim = 128
latent_dim = 256

# Load IMDB dataset
(X_train, y_train), _ = tf.keras.datasets.imdb.load_data(num_words=vocab_size)
X_train = pad_sequences(X_train, maxlen=max_len, padding='post', truncating='post')


In [15]:
X_train = X_train[:1000]
y_train = y_train[:1000]

In [16]:
len(X_train[0])

200

In [17]:
## load the model
from tensorflow.keras.models import load_model

load_classification_model = load_model("lstm_imdb_classification_model.keras")

# LSTM Limitations

In LSTM (a type of RNN), training happens sequentially — the model processes one token at a time and passes information forward through hidden states.
Because of this, LSTMs struggle to remember very long contexts: even though they have a “long-term memory,” it fades as sequences grow longer.
When retraining or fine-tuning with a lot of new data, the model can gradually overwrite or forget older information.
This makes it difficult for LSTMs to efficiently handle very large datasets or maintain knowledge across long contexts.


In [18]:
## compile and train the model

load_classification_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

load_classification_model.fit(X_train, y_train, batch_size=64, epochs=1, validation_split=0.1)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 1s/step - accuracy: 0.4989 - loss: 0.6916 - val_accuracy: 0.6100 - val_loss: 0.6887


<keras.src.callbacks.history.History at 0x7a17dc5a9100>

In [19]:
## Save the model

load_classification_model.save("lstm_imdb_classification_model_updated.keras")

# Retrain the model for the Summerization Task

For a text classification task, we typically use an LSTM in a many-to-one architecture — the model reads the whole sequence and outputs a single class label.

However, for a summarization task, we need to generate another sequence as output. This requires an encoder-decoder (sequence-to-sequence) architecture.

Therefore, we can’t reuse the same LSTM model designed for classification — we must redesign and retrain a new one for summarization.

In contrast, Transformers use a single, unified architecture that can handle multiple tasks (classification, summarization, translation, etc.) simply by fine-tuning.

LSTMs are still useful for smaller, task-specific problems, but Transformers are more general and powerful for large-scale NLP.


In [20]:
updated_classification_model = load_model("lstm_imdb_classification_model_updated.keras")

In [21]:
updated_classification_model.layers

[<InputLayer name=input_layer, built=True>,
 <Embedding name=embedding, built=True>,
 <LSTM name=lstm, built=True>,
 <Dense name=dense, built=True>]

In [31]:
## Encoder

encoder_input_layer = Input(shape=(max_len,), name='encoder_input')
encoder_embedding = updated_classification_model.layers[1](encoder_input_layer)
encoder_output, state_h, state_c = updated_classification_model.layers[2](encoder_embedding)

In [34]:
encoder_input_layer.shape

(None, 200)

In [35]:
## Decoder

output_vocab_size = 8000
target_max_len = 50

decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(output_vocab_size, embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=[state_h, state_c])
decoder_dense = Dense(output_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [36]:
## Seq2Seq Model (Encoder-Decoder)

seq2seq_model = Model([encoder_input_layer, decoder_inputs], decoder_outputs) # Use the correct input layer
seq2seq_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [38]:
## Data

encoder_input_data = X_train[:1000]  # using IMDB input for now
decoder_input_data = np.random.randint(1, output_vocab_size, (1000, target_max_len))
decoder_target_data = np.random.randint(1, output_vocab_size, (1000, target_max_len, 1))


Shape of encoder_input_data: (1000, 200)
Shape of decoder_input_data: (1000, 50)
Shape of decoder_target_data: (1000, 50, 1)


In [39]:
seq2seq_model.fit(
    [encoder_input_data, decoder_input_data], # encoder_input_data should have shape (None, max_len)
    decoder_target_data,
    batch_size=32,
    epochs=1,
    validation_split=0.1
)

[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 2s/step - accuracy: 1.2760e-04 - loss: 8.9872 - val_accuracy: 0.0000e+00 - val_loss: 8.9875


<keras.src.callbacks.history.History at 0x7a17e50a82f0>