# week 7
## Recurrent neural network (RNNs)
### part 3: Analyze the vanishing gradient problem in RNNs.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, BatchNormalization, Dropout

In [2]:
# Generate synthetic sine wave data
def generate_sine_wave(seq_length, num_samples):
    X, y = [], []
    for _ in range(num_samples):
        start = np.random.rand() * 2 * np.pi # Random start point
    seq = np.array([np.sin(start + i) for i in range(seq_length + 1)])
    X.append(seq[:-1])
    y.append(seq[-1])
    return np.array(X), np.array(y)

In [3]:
# Experimenting with different sequence lengths and hidden units
seq_lengths = [30, 50, 70]
hidden_units = [20, 50, 100]
num_samples = 1000

def build_rnn(units, seq_length, use_improvements=False):
    model = Sequential()
    model.add(SimpleRNN(units, activation='tanh', return_sequences=True, input_shape=(seq_length, 1)))
    if use_improvements:
        model.add(BatchNormalization()) # Helps stabilize training
        model.add(Dropout(0.2)) # Prevents overfitting
    model.add(SimpleRNN(units, activation='tanh'))# other conditions
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

In [4]:
for seq_length in seq_lengths:
      for units in hidden_units:
        print(f"Training SimpleRNN with sequence length {seq_length} and {units} hidden units")
   
        X, y = generate_sine_wave(seq_length, num_samples)
        X = X.reshape((X.shape[0], X.shape[1], 1))

        test_size = int(0.2 * num_samples)
        X_train, X_test = X[:-test_size], X[-test_size:]
        y_train, y_test = y[:-test_size], y[-test_size:]

        # Build standard SimpleRNN model (prone to vanishing gradients)
        model = build_rnn(units, seq_length, use_improvements=False)
        history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test), verbose=0)

        # Plot training history
        plt.figure(figsize=(10, 4))
        plt.plot(history.history['loss'], label='Train Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.legend()
        plt.title(f"Loss Curve (SimpleRNN, Seq Length: {seq_length}, Units: {units})")
        plt.show()

        # Evaluate model
        predictions = model.predict(X_test)
        plt.figure(figsize=(10, 5))
        plt.plot(y_test, label='True Values', color='blue')
        plt.plot(predictions, label='Predictions', color='orange')
        plt.legend()
        plt.xlabel("Sample Index")
        plt.ylabel("Value")
        plt.title(f"True vs Predicted Values (SimpleRNN, Seq Length: {seq_length}, Units: {units})")
        plt.show()

Training SimpleRNN with sequence length 30 and 20 hidden units


ValueError: Unexpected result of `train_function` (Empty logs). This could be due to issues in input pipeline that resulted in an empty dataset. Otherwise, please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

In [None]:
# Using batch normalization and dropout to mitigate vanishing gradient
for seq_length in seq_lengths:
      for units in hidden_units:
        print(f"Training Improved SimpleRNN to overcome vanishing gradient problem (Seq Length: {seq_length}, Units: {units})")
   
        X, y = generate_sine_wave(seq_length, num_samples)
        X = X.reshape((X.shape[0], X.shape[1], 1))

        test_size = int(0.2 * num_samples)
        X_train, X_test = X[:-test_size], X[-test_size:]
        y_train, y_test = y[:-test_size], y[-test_size:]

        # Build improved RNN model
        model = build_rnn(units, seq_length, use_improvements=True)
        history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test), verbose=0)

        # Plot training history
        plt.figure(figsize=(10, 4))
        plt.plot(history.history['loss'], label='Train Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.legend()
        plt.title(f"Loss Curve (Improved SimpleRNN, Seq Length: {seq_length}, Units: {units})")
        plt.show()

        # Evaluate model
        predictions = model.predict(X_test)
        plt.figure(figsize=(10, 5))
        plt.plot(y_test, label='True Values', color='blue')
        plt.plot(predictions, label='Predictions', color='orange')
        plt.legend()
        plt.xlabel("Sample Index")
        plt.ylabel("Value")
        plt.title(f"True vs Predicted Values (Improved SimpleRNN, Seq Length: {seq_length}, Units: {units})")
        plt.show()
