<a href="https://colab.research.google.com/github/Aathi005/Deep-Learning/blob/main/EXP_5_Addon%20%2B%20Testcase.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import numpy as np

# Example data (replace with your own dataset)
texts = ["I love this movie", "This movie was terrible", "Amazing film!", "I hated this movie"]
labels = [1, 0, 1, 0]  # 1 = positive, 0 = negative

# Tokenize the text
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences so they are all length 100
X = pad_sequences(sequences, maxlen=100)

# Convert labels to numpy array
y = np.array(labels)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [3]:
from keras.models import Sequential
from keras.layers import Embedding, GRU, Dense

model = Sequential([
    Embedding(10000, 32, input_length=100),
    GRU(100),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.fit(X_train, y_train,
          epochs=5,
          batch_size=64,
          validation_split=0.2)

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step - accuracy: 0.5000 - loss: 0.6967 - val_accuracy: 1.0000 - val_loss: 0.6899
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 414ms/step - accuracy: 0.5000 - loss: 0.6923 - val_accuracy: 1.0000 - val_loss: 0.6901
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 743ms/step - accuracy: 0.5000 - loss: 0.6879 - val_accuracy: 0.0000e+00 - val_loss: 0.6932
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 431ms/step - accuracy: 1.0000 - loss: 0.6836 - val_accuracy: 0.0000e+00 - val_loss: 0.6972
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 554ms/step - accuracy: 1.0000 - loss: 0.6791 - val_accuracy: 0.0000e+00 - val_loss: 0.7009


<keras.src.callbacks.history.History at 0x79ee32024ce0>

In [None]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Embedding, GRU, LSTM, Dense, TextVectorization
import numpy as np

# -----------------------------
# 1. Load IMDB dataset
# -----------------------------
max_features = 10000  # top words
max_len = 100         # max review length

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=max_features)

# Pad sequences so they are all the same length
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_len)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=max_len)

# -----------------------------
# 2. Define GRU model
# -----------------------------
gru_model = Sequential([
    Embedding(max_features, 32, input_length=max_len),
    GRU(100),
    Dense(1, activation="sigmoid")
])
gru_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# -----------------------------
# 3. Define LSTM model
# -----------------------------
lstm_model = Sequential([
    Embedding(max_features, 32, input_length=max_len),
    LSTM(100),
    Dense(1, activation="sigmoid")
])
lstm_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# -----------------------------
# 4. Train both models (quick demo: 2 epochs each)
# -----------------------------
print("\nTraining GRU model...")
gru_model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2, verbose=2)

print("\nTraining LSTM model...")
lstm_model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2, verbose=2)

# -----------------------------
# 5. Test custom reviews
# -----------------------------
# Load IMDB word index to map words -> numbers
word_index = tf.keras.datasets.imdb.get_word_index()
reverse_index = {v+3: k for k, v in word_index.items()}
reverse_index[0] = "<PAD>"
reverse_index[1] = "<START>"
reverse_index[2] = "<UNK>"

def encode_review(text):
    # Simple encoding (lowercase words mapped to IMDB word index)
    return [
        word_index.get(word.lower(), 2)  # unknown -> 2
        for word in text.split()
    ]

test_cases = [
    ("An emotional and deep plot", "Positive"),
    ("The story was dull", "Negative")
]

X_custom = []
for review, _ in test_cases:
    encoded = encode_review(review)
    padded = tf.keras.preprocessing.sequence.pad_sequences([encoded], maxlen=max_len)
    X_custom.append(padded)

X_custom = np.vstack(X_custom)

# Predictions
gru_preds = (gru_model.predict(X_custom) > 0.5).astype("int32").flatten()
lstm_preds = (lstm_model.predict(X_custom) > 0.5).astype("int32").flatten()

def decode_label(val):
    return "Positive" if val == 1 else "Negative"

# -----------------------------
# 6. Print results
# -----------------------------
print(f"\n{'Review Text':40} | {'Expected':8} | {'GRU':8} | {'LSTM':8} | Same?")
print("-"*80)
for i, (text, expected) in enumerate(test_cases):
    gru_out = decode_label(gru_preds[i])
    lstm_out = decode_label(lstm_preds[i])
    same = "Yes" if gru_out == lstm_out else "No"
    print(f"{text:40} | {expected:8} | {gru_out:8} | {lstm_out:8} | {same}")



Training GRU model...
Epoch 1/5
313/313 - 57s - 183ms/step - accuracy: 0.7220 - loss: 0.5129 - val_accuracy: 0.8428 - val_loss: 0.3721
Epoch 2/5
313/313 - 55s - 176ms/step - accuracy: 0.8798 - loss: 0.2933 - val_accuracy: 0.8448 - val_loss: 0.3591
Epoch 3/5
313/313 - 81s - 259ms/step - accuracy: 0.9143 - loss: 0.2229 - val_accuracy: 0.8400 - val_loss: 0.4176
Epoch 4/5
313/313 - 55s - 177ms/step - accuracy: 0.9387 - loss: 0.1654 - val_accuracy: 0.8314 - val_loss: 0.4067
Epoch 5/5
313/313 - 82s - 262ms/step - accuracy: 0.9575 - loss: 0.1196 - val_accuracy: 0.8368 - val_loss: 0.4858
