In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)
train_data, test_data = imdb['train'], imdb['test']

train_sentences = []
train_labels = []

for s, l in train_data:
    train_sentences.append(str(s.numpy()))
    train_labels.append(l.numpy())

test_sentences = []
test_labels = []

for s, l in test_data:
    test_sentences.append(str(s.numpy()))
    test_labels.append(l.numpy())

vocab_size = 10000
max_length = 200
embedding_dim = 16
oov_tok = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(train_sentences)

train_sequences = tokenizer.texts_to_sequences(train_sentences)
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding='post', truncating='post')

test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding='post', truncating='post')

train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
num_epochs = 5
model.fit(train_padded, train_labels, epochs=num_epochs, validation_data=(test_padded, test_labels))

# Save the trained model
model.save("/content/drive/MyDrive/Temp/imdb_model.h5")

"""# Use Trained Model To implement TL"""

import tensorflow.keras.backend as K
from nltk.tokenize import sent_tokenize
import tensorflow as tf
import nltk
nltk.download('punkt')

imdb_model = tf.keras.models.load_model('/content/drive/MyDrive/Temp/imdb_model.h5')

for layer in imdb_model.layers:
    layer.trainable = False

corpus = [
    "The movie had a very strong start.",
    "However, the plot quickly fell apart.",
    "The acting was top-notch, especially the lead actor.",
    "But the storyline was predictable and uninspiring.",
    "Overall, the movie had good moments but was disappointing."
]

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)

sequences = tokenizer.texts_to_sequences(corpus)
max_length = 200
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

max_length = 200
input_shape = (max_length,)
inputs = tf.keras.Input(shape=input_shape)

x = imdb_model(inputs)

x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
x = tf.keras.layers.Dense(1, activation='sigmoid')(x)

summarization_model = tf.keras.Model(inputs=inputs, outputs=x)

summarization_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

summarization_model.summary()

predictions = summarization_model.predict(padded_sequences)

summary = [corpus[i] for i, score in enumerate(predictions) if score > 0.436]

print("Summary:")
for sentence in summary:
    print(sentence)