<a href="https://colab.research.google.com/github/Rajaanthonysamy/my_finetune/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [53]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM , Embedding, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [54]:
max_len = 200
vocab_size = 10000
embedding_dim = 128
latent_dim = 256

In [55]:
(x_train, y_train),_ = tf.keras.datasets.imdb.load_data(num_words=vocab_size)

In [56]:
x_train = x_train[:3000]
y_train = y_train[:3000]

In [57]:
len(x_train)

3000

In [58]:
len(y_train)

3000

In [59]:
input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(vocab_size, embedding_dim)(input_layer)
lstm_layer, state_h,sate_c = LSTM(latent_dim, return_state=True)(embedding_layer)
output_layer = Dense(1, activation='sigmoid')(state_h)
classification_model = Model(input_layer, output_layer)

In [60]:
classification_model.summary()

In [61]:
classification_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [62]:
x_train = pad_sequences(x_train, maxlen=max_len,truncating='post',padding='post')
classification_model.fit(x_train, y_train, epochs=1, batch_size=64,validation_split=0.1)

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 33ms/step - accuracy: 0.5194 - loss: 0.6933 - val_accuracy: 0.4367 - val_loss: 0.6977


<keras.src.callbacks.history.History at 0x7957e8852b10>

In [63]:
sample_review = x_train[0].reshape(1,-1)

pred = classification_model.predict(sample_review)

print(f"Positive (prediction: {pred[0][0]:.4f})"if pred[0][0] >0.5 else f"Negative (prediction: {pred[0][0]:.4f})")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
Positive (prediction: 0.5065)


In [64]:
word_index = tf.keras.datasets.imdb.get_word_index()
reverse_word_index = dict([(value + 3, key) for (key, value) in word_index.items()])
reverse_word_index[0] = "<PAD>"
reverse_word_index[1] = "<START>"
reverse_word_index[2] = "<UNK>"
reverse_word_index[3] = "<UNUSED>"

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

sample_review_decoded = decode_review(x_train[0])


In [65]:
sample_review = x_train[0].reshape(1,-1)

pred = classification_model.predict(sample_review)

sample_review_decoded = decode_review(x_train[0])
print(f"Review: {sample_review_decoded}")
print(f"Prediction: {"Positive" if pred[0][0] > 0.5 else "Negative"} (score: {pred[0][0]:.4f})")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Review: <START> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert <UNK> is an amazing actor and now the same being director <UNK> father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for <UNK> and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also <UNK> to the two little boy's that played the <UNK> of norman and paul they were just brilliant children are often left out of the <UNK> list i think because the stars that play them all grown up are such a big profile

In [66]:
classification_model.save("lstm_imdb_model.h5")



<h2> Why finetune not possible in LSTM model <h2>

---



In [68]:
max_len = 200
vocab_size = 1000
embedding_dim = 128
latent_dim = 256

(x_train, y_train), _ = tf.keras.datasets.imdb.load_data(num_words=vocab_size)
x_train = pad_sequences(x_train, maxlen=max_len, padding='post', truncating='post')

In [69]:
x_train = x_train[:1000]
y_train = y_train[:1000]

In [70]:
from tensorflow.keras.models import load_model
load_classification_model = load_model("lstm_imdb_model.h5")



In [71]:
load_classification_model.compile(optimizer="adam",loss = "binary_crossentropy",metrics=['accuracy'])

In [72]:
load_classification_model.fit(x_train,y_train,batch_size=64,epochs=1,validation_split=0.1)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - accuracy: 0.4810 - loss: 0.6924 - val_accuracy: 0.6100 - val_loss: 0.6851


<keras.src.callbacks.history.History at 0x7957e8855a60>

In [73]:
load_classification_model.save("lstm_updated_imdb.h5")



In [74]:
updated_classification_model = load_model("lstm_updated_imdb.h5")



In [75]:
updated_classification_model.summary()

In [76]:
updated_classification_model.layers

[<InputLayer name=input_layer_7, built=True>,
 <Embedding name=embedding_4, built=True>,
 <LSTM name=lstm_4, built=True>,
 <Dense name=dense_4, built=True>]

In [77]:
encoder_inputs = Input(shape=(max_len,))
encoder_embedding = updated_classification_model.layers[1](encoder_inputs)
encoder_output, state_h , state_c = updated_classification_model.layers[2](encoder_embedding)

In [80]:
output_vocab_size = 8000
target_max_lenn = 50

# Unpack the encoder states (this was already corrected in the previous turn, just ensuring context)
encoder_outputs, state_h_encoder, state_c_encoder = updated_classification_model.layers[2](encoder_embedding) # Re-running this to ensure correct state_h/c for the modified cell below.

decoder_inputs = Input(shape=(None,))
# Create a new Embedding layer for the decoder with a unique name
decoder_embedding_layer = Embedding(output_vocab_size, embedding_dim, name='decoder_embedding')
decoder_embedding = decoder_embedding_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=[state_h_encoder, state_c_encoder])
decoder_dense = Dense(output_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [81]:
seq2seq_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
seq2seq_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [82]:
encode_input_data = x_train[:1000]
decoder_input_data = np.random.randint(1, output_vocab_size, size=(1000, target_max_lenn))
decoder_target_data = np.random.randint(1, output_vocab_size, size=(1000, target_max_lenn))

In [83]:
seq2seq_model.fit([encode_input_data, decoder_input_data], decoder_target_data,batch_size= 32 , epochs=1, validation_split=0.1)

[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 67ms/step - accuracy: 1.5406e-04 - loss: 8.9872 - val_accuracy: 0.0000e+00 - val_loss: 8.9874


<keras.src.callbacks.history.History at 0x7957e082be60>