<a href="https://colab.research.google.com/github/Remonah-3/Github_Assignment/blob/master/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, GRU, LSTM, Dense, Embedding
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import ConvLSTM2D, Conv3D, BatchNormalization
import numpy as np
import matplotlib.pyplot as plt


max_features = 20000
maxlen = 80
batch_size = 32

# Load IMDB data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

def train_rnn(model_type='SimpleRNN'):
    model = Sequential()
    model.add(Embedding(max_features, 128))

    if model_type == 'SimpleRNN':
        model.add(SimpleRNN(128, dropout=0.2, recurrent_dropout=0.2))
    elif model_type == 'GRU':
        model.add(GRU(128, dropout=0.2, recurrent_dropout=0.2))
    elif model_type == 'LSTM':
        model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    print(f"\nTraining {model_type}...")
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=3,  # Keep low for quick testing
              validation_data=(x_test, y_test))

    score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
    print(f"{model_type} Test accuracy: {acc:.4f}")

train_rnn('SimpleRNN')
train_rnn('GRU')
train_rnn('LSTM')



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step

Training SimpleRNN...
Epoch 1/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 75ms/step - accuracy: 0.5197 - loss: 0.7025 - val_accuracy: 0.5882 - val_loss: 0.6533
Epoch 2/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 73ms/step - accuracy: 0.6699 - loss: 0.5996 - val_accuracy: 0.6881 - val_loss: 0.5818
Epoch 3/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 86ms/step - accuracy: 0.7564 - loss: 0.4922 - val_accuracy: 0.6596 - val_loss: 0.6215
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 12ms/step - accuracy: 0.6591 - loss: 0.6256
SimpleRNN Test accuracy: 0.6596

Training GRU...
Epoch 1/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 249ms/step - accuracy: 0.6864 - loss: 0.5602 - val_accuracy: 0.8376 - val_l

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, Conv3D, BatchNormalization
import numpy as np

# Build ConvLSTM model
seq = Sequential()
seq.add(ConvLSTM2D(filters=16, kernel_size=(3, 3),
                   input_shape=(None, 40, 40, 1),
                   padding='same', return_sequences=True))
seq.add(BatchNormalization())
seq.add(ConvLSTM2D(filters=16, kernel_size=(3, 3),
                   padding='same', return_sequences=True))
seq.add(BatchNormalization())
seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3),
               activation='sigmoid', padding='same'))

seq.compile(loss='binary_crossentropy', optimizer='adadelta')

# Generate artificial movie data
def generate_movies(n_samples=200, n_frames=10):
    row, col = 40, 40
    X = np.zeros((n_samples, n_frames, row, col, 1))
    Y = np.zeros((n_samples, n_frames, row, col, 1))

    for i in range(n_samples):
        xstart, ystart = np.random.randint(0, 35, 2)
        directionx, directiony = np.random.randint(-1, 2, 2)
        for t in range(n_frames):
            x_shift = xstart + directionx * t
            y_shift = ystart + directiony * t
            X[i, t, x_shift:x_shift+5, y_shift:y_shift+5, 0] = 1
            Y[i, t, x_shift+directionx:x_shift+directionx+5,
                  y_shift+directiony:y_shift+directiony+5, 0] = 1
    return X, Y

# Generate data
noisy_movies, shifted_movies = generate_movies()

# Train model
seq.fit(noisy_movies, shifted_movies, batch_size=10, epochs=5, validation_split=0.1)


  super().__init__(**kwargs)


Epoch 1/5
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 2s/step - loss: 0.7856 - val_loss: 0.6968
Epoch 2/5
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2s/step - loss: 0.7817 - val_loss: 0.6998
Epoch 3/5
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 2s/step - loss: 0.7803 - val_loss: 0.7028
Epoch 4/5
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 2s/step - loss: 0.7765 - val_loss: 0.7058
Epoch 5/5
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 2s/step - loss: 0.7789 - val_loss: 0.7090


<keras.src.callbacks.history.History at 0x7fed5f978470>

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, GRU, LSTM, Dense, Embedding
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import ConvLSTM2D, Conv3D, BatchNormalization
import numpy as np

# IMDB RNNs
max_features = 20000
maxlen = 80
batch_size = 32

# Load IMDB data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

def train_rnn(model_type='SimpleRNN', epochs=3):
    model = Sequential()
    model.add(Embedding(max_features, 128))

    if model_type == 'SimpleRNN':
        model.add(SimpleRNN(128, dropout=0.2, recurrent_dropout=0.2))
    elif model_type == 'GRU':
        model.add(GRU(128, dropout=0.2, recurrent_dropout=0.2))
    elif model_type == 'LSTM':
        model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))

    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    print(f"\nTraining {model_type}...")
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
              validation_data=(x_test, y_test), verbose=2)

    _, acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
    print(f"{model_type} Test Accuracy: {acc:.4f}")
    return acc

# Train all three and store accuracies
results = {}
for rnn_type in ['SimpleRNN', 'GRU', 'LSTM']:
    results[rnn_type] = train_rnn(rnn_type, epochs=3)

# Print comparison table
print("\n--- Accuracy Comparison ---")
for k, v in results.items():
    print(f"{k}: {v:.4f}")

# ConvLSTM2D
# Generate simple moving squares dataset
def generate_movies(n_samples=200, n_frames=10):
    row, col = 40, 40
    X = np.zeros((n_samples, n_frames, row, col, 1))
    Y = np.zeros((n_samples, n_frames, row, col, 1))

    for i in range(n_samples):
        xstart, ystart = np.random.randint(0, 35, 2)
        directionx, directiony = np.random.randint(-1, 2, 2)
        for t in range(n_frames):
            x_shift = xstart + directionx * t
            y_shift = ystart + directiony * t
            X[i, t, x_shift:x_shift+5, y_shift:y_shift+5, 0] = 1
            Y[i, t, x_shift+directionx:x_shift+directionx+5,
                  y_shift+directiony:y_shift+directiony+5, 0] = 1
    return X, Y

noisy_movies, shifted_movies = generate_movies()

# Build ConvLSTM2D model
seq = Sequential()
seq.add(ConvLSTM2D(filters=16, kernel_size=(3,3),
                   input_shape=(None, 40, 40, 1),
                   padding='same', return_sequences=True))
seq.add(BatchNormalization())
seq.add(ConvLSTM2D(filters=16, kernel_size=(3,3),
                   padding='same', return_sequences=True))
seq.add(BatchNormalization())
seq.add(Conv3D(filters=1, kernel_size=(3,3,3), activation='sigmoid', padding='same'))

seq.compile(loss='binary_crossentropy', optimizer='adadelta')

seq.fit(noisy_movies, shifted_movies, batch_size=10, epochs=5, validation_split=0.1, verbose=2)


Training SimpleRNN...
Epoch 1/3
782/782 - 59s - 75ms/step - accuracy: 0.5399 - loss: 0.6860 - val_accuracy: 0.6145 - val_loss: 0.6396
Epoch 2/3
782/782 - 80s - 102ms/step - accuracy: 0.7227 - loss: 0.5453 - val_accuracy: 0.7918 - val_loss: 0.4817
Epoch 3/3
782/782 - 83s - 107ms/step - accuracy: 0.7916 - loss: 0.4574 - val_accuracy: 0.7408 - val_loss: 0.5293
SimpleRNN Test Accuracy: 0.7408

Training GRU...
Epoch 1/3
782/782 - 208s - 266ms/step - accuracy: 0.7718 - loss: 0.4725 - val_accuracy: 0.8332 - val_loss: 0.3798
Epoch 2/3
782/782 - 186s - 237ms/step - accuracy: 0.8903 - loss: 0.2696 - val_accuracy: 0.8486 - val_loss: 0.3479
Epoch 3/3
782/782 - 201s - 257ms/step - accuracy: 0.9417 - loss: 0.1572 - val_accuracy: 0.8352 - val_loss: 0.4159
GRU Test Accuracy: 0.8352

Training LSTM...
Epoch 1/3
782/782 - 222s - 284ms/step - accuracy: 0.7784 - loss: 0.4616 - val_accuracy: 0.8354 - val_loss: 0.3786
Epoch 2/3
782/782 - 248s - 317ms/step - accuracy: 0.8823 - loss: 0.2912 - val_accuracy: 0.

<keras.src.callbacks.history.History at 0x7fed32135eb0>

In [6]:
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.utils import to_categorical

max_words = 10000  # top 10,000 words
maxlen = 100       # max sequence length
batch_size = 32

# Load Reuters dataset
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words)

# Pad sequences
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Convert labels to one-hot (multi-class classification)
num_classes = max(y_train) + 1
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

print(f'Training samples: {x_train.shape}, Test samples: {x_test.shape}, Classes: {num_classes}')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
[1m2110848/2110848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training samples: (8982, 100), Test samples: (2246, 100), Classes: 46


In [7]:
model = Sequential()
model.add(Embedding(max_words, 128, input_length=maxlen))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(num_classes, activation='softmax'))  # multi-class output

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

epochs = 5
history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test, y_test),
                    verbose=2)
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print(f"Reuters LSTM Test Accuracy: {acc:.4f}")

Epoch 1/5




281/281 - 87s - 309ms/step - accuracy: 0.4466 - loss: 2.1559 - val_accuracy: 0.5227 - val_loss: 1.8018
Epoch 2/5
281/281 - 80s - 283ms/step - accuracy: 0.5577 - loss: 1.6907 - val_accuracy: 0.5984 - val_loss: 1.6378
Epoch 3/5
281/281 - 82s - 292ms/step - accuracy: 0.6199 - loss: 1.4649 - val_accuracy: 0.6273 - val_loss: 1.5086
Epoch 4/5
281/281 - 82s - 293ms/step - accuracy: 0.6691 - loss: 1.2752 - val_accuracy: 0.6318 - val_loss: 1.4342
Epoch 5/5
281/281 - 79s - 280ms/step - accuracy: 0.7185 - loss: 1.0799 - val_accuracy: 0.6518 - val_loss: 1.4105
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - accuracy: 0.6565 - loss: 1.3814
Reuters LSTM Test Accuracy: 0.6518


##### **RNN**
The RNN class is the base class for all recurrent layers in Keras. It defines how sequences are processed over time. Most of the time, we do not use it directly because it is low-level. Instead, we use higher-level layers like SimpleRNN, GRU, or LSTM, which are easier to work with. You would only use RNN directly if you want to create a custom RNN layer.

##### **SimpleRNNCell**
The SimpleRNNCell class represents a single time step of a SimpleRNN. It calculates the next hidden state for one step in the sequence. In practice, we rarely use it directly because the SimpleRNN layer already handles the full sequence automatically. It is mainly for custom RNN designs where you need fine control.

##### **GRUCell**
The GRUCell class represents a single time step of a GRU (Gated Recurrent Unit). Like SimpleRNNCell, it computes the hidden state for one step only. Most of the time, we just use the GRU layer, which handles the whole sequence. GRUCell is used when you want custom sequence processing.

##### **LSTMCell**
The LSTMCell class represents a single time step of an LSTM (Long Short-Term Memory). It calculates the hidden and cell states for one step. Usually, we use the LSTM layer instead of LSTMCell because it automatically processes the full sequence. This cell is useful if you want custom LSTM behavior.

##### **StackedRNNCells**
The StackedRNNCells class allows you to combine multiple RNN cells into one layer. It can be used to create a multi-layer RNN manually. In practice, most people just stack RNN layers sequentially using SimpleRNN, GRU, or LSTM, so this class is rarely needed.

##### **CuDNNGRU**
The CuDNNGRU is a GPU-optimized GRU layer. It is much faster than a regular GRU when training on a GPU. However, it cannot run on a CPU and has fewer options for dropout and activation functions. It is useful when training large models on big datasets.

##### **CuDNNLSTM**
The CuDNNLSTM is a GPU-optimized LSTM layer. It trains much faster on a GPU than a normal LSTM. Like CuDNNGRU, it cannot run on a CPU and has limited configuration options. It is very useful for large LSTM models where speed is important.