<a href="https://colab.research.google.com/github/PaulNjinu254/LSTM-Series/blob/main/LSTM_Series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Imports & GPU check
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import time

# Utility functions
MAX_WORDS = 10000
MAXLEN = 200
EMBED_DIM = 32
BATCH_SIZE = 64
EPOCHS = 1  # as requested

def prepare_imdb(max_words=MAX_WORDS, maxlen=MAXLEN):
    (x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=max_words)
    x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
    x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)
    return (x_train, y_train), (x_test, y_test)

def prepare_reuters(max_words=MAX_WORDS, maxlen=MAXLEN):
    (x_train, y_train), (x_test, y_test) = keras.datasets.reuters.load_data(num_words=max_words)
    x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
    x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)
    num_classes = np.max(y_train) + 1
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    return (x_train, y_train), (x_test, y_test), num_classes

def build_sequence_model(cell_type='SimpleRNN', units=32, input_shape=(MAXLEN,)):
    inputs = keras.Input(shape=input_shape, dtype='int32')
    x = layers.Embedding(MAX_WORDS, EMBED_DIM, input_length=MAXLEN)(inputs)

    if cell_type == 'SimpleRNN':
        x = layers.SimpleRNN(units)(x)
    elif cell_type == 'GRU':
        x = layers.GRU(units)(x)
    elif cell_type == 'LSTM':
        x = layers.LSTM(units)(x)
    else:
        raise ValueError('Unsupported cell_type')

    outputs = layers.Dense(1, activation='sigmoid')(x)
    model = keras.Model(inputs, outputs, name=f"{cell_type}_binary_model")
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train & compare SimpleRNN, GRU, LSTM on IMDB
print('\nPreparing IMDB data...')
(imdb_x_train, imdb_y_train), (imdb_x_test, imdb_y_test) = prepare_imdb()

results = {}
for cell in ['SimpleRNN', 'GRU', 'LSTM']:
    print(f"\nBuilding and training {cell} model...")
    model = build_sequence_model(cell_type=cell, units=32)
    model.summary()
    start = time.time()
    history = model.fit(imdb_x_train, imdb_y_train,
                        epochs=EPOCHS,
                        batch_size=BATCH_SIZE,
                        validation_split=0.2,
                        verbose=2)
    elapsed = time.time() - start
    loss, acc = model.evaluate(imdb_x_test, imdb_y_test, verbose=0)
    results[cell] = {'test_loss': float(loss), 'test_acc': float(acc), 'train_time_s': elapsed}
    print(f"{cell} -> test_acc: {acc:.4f}, time: {elapsed:.1f}s")

print('\nIMDB comparison results:')
for k, v in results.items():
    print(k, v)

# ConvLSTM2D minimal demo
print('\nConvLSTM2D demo (synthetic data, tiny model)...')
# Create tiny synthetic spatio-temporal dataset: (samples, frames, rows, cols, channels)
samples = 256
frames = 8
rows = 8
cols = 8
channels = 1
X = np.random.rand(samples, frames, rows, cols, channels).astype('float32')
# Simple binary target (random) for demo
y = np.random.randint(0, 2, size=(samples,))

inputs = keras.Input(shape=(frames, rows, cols, channels))
x = layers.ConvLSTM2D(filters=8, kernel_size=(3,3), padding='same', return_sequences=False)(inputs)
x = layers.Flatten()(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
conv_model = keras.Model(inputs, outputs, name='conv_lstm_demo')
conv_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
conv_model.summary()
conv_model.fit(X, y, epochs=EPOCHS, batch_size=32, verbose=2)

# Q2 - Reuters dataset with LSTM
print('\nPreparing Reuters data...')
(re_x_train, re_y_train), (re_x_test, re_y_test), num_classes = prepare_reuters()
print('Num classes (Reuters):', num_classes)

# Build a model suitable for multi-class classification
inputs = keras.Input(shape=(MAXLEN,), dtype='int32')
x = layers.Embedding(MAX_WORDS, EMBED_DIM, input_length=MAXLEN)(inputs)
x = layers.LSTM(64)(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)
reuters_model = keras.Model(inputs, outputs, name='reuters_lstm')
reuters_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
reuters_model.summary()
reuters_model.fit(re_x_train, re_y_train, epochs=EPOCHS, batch_size=64, validation_split=0.1, verbose=2)
loss, acc = reuters_model.evaluate(re_x_test, re_y_test, verbose=0)
print(f"Reuters test accuracy: {acc:.4f}")

# Q3 - Explanations and short usage demos
md = """
Q3:

- RNN: a high-level wrapper that lets you define custom RNN cells and use them like other recurrent layers.

- SimpleRNNCell / GRUCell / LSTMCell: these are cell-level building blocks representing a single step's computation. Use them when you need fine-grained control or to build custom RNNs.

- StackedRNNCells: combine multiple cells into a single cell that runs stacked (like multi-layer RNN inside a single RNN wrapper). Useful for implementing stacked cells with custom behavior.

- CuDNNGRU / CuDNNLSTM: historically GPU-optimized implementations for speed on NVIDIA GPUs (pre-TF 2.0/2.x). Modern TF integrates GPU-accelerations into standard GRU/LSTM when compatible flags are used; these classes may be deprecated in current TF releases.

"""
print(md)



Preparing IMDB data...

Building and training SimpleRNN model...


313/313 - 9s - 27ms/step - accuracy: 0.6697 - loss: 0.5905 - val_accuracy: 0.7706 - val_loss: 0.4947
SimpleRNN -> test_acc: 0.7732, time: 11.6s

Building and training GRU model...


313/313 - 4s - 14ms/step - accuracy: 0.7545 - loss: 0.4740 - val_accuracy: 0.8264 - val_loss: 0.3993
GRU -> test_acc: 0.8270, time: 6.3s

Building and training LSTM model...


313/313 - 4s - 14ms/step - accuracy: 0.7725 - loss: 0.4650 - val_accuracy: 0.8088 - val_loss: 0.4521
LSTM -> test_acc: 0.8046, time: 4.5s

IMDB comparison results:
SimpleRNN {'test_loss': 0.48899170756340027, 'test_acc': 0.7731999754905701, 'train_time_s': 11.553054571151733}
GRU {'test_loss': 0.39077523350715637, 'test_acc': 0.8270000219345093, 'train_time_s': 6.262945652008057}
LSTM {'test_loss': 0.45595479011535645, 'test_acc': 0.8045600056648254, 'train_time_s': 4.515757322311401}

ConvLSTM2D demo (synthetic data, tiny model)...


8/8 - 3s - 365ms/step - accuracy: 0.4844 - loss: 0.6977

Preparing Reuters data...
Num classes (Reuters): 46


127/127 - 3s - 25ms/step - accuracy: 0.3541 - loss: 2.5526 - val_accuracy: 0.4561 - val_loss: 2.2813
Reuters test accuracy: 0.4648

Q3:

- RNN: a high-level wrapper that lets you define custom RNN cells and use them like other recurrent layers.

- SimpleRNNCell / GRUCell / LSTMCell: these are cell-level building blocks representing a single step's computation. Use them when you need fine-grained control or to build custom RNNs.

- StackedRNNCells: combine multiple cells into a single cell that runs stacked (like multi-layer RNN inside a single RNN wrapper). Useful for implementing stacked cells with custom behavior.

- CuDNNGRU / CuDNNLSTM: historically GPU-optimized implementations for speed on NVIDIA GPUs (pre-TF 2.0/2.x). Modern TF integrates GPU-accelerations into standard GRU/LSTM when compatible flags are used; these classes may be deprecated in current TF releases.


