<a href="https://colab.research.google.com/github/Odima-dev/Data-Science-and-Machine-Learning/blob/main/RecurrentNeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Problem 1: Simple Forward propagation implementation of RNN
class SimpleRNN:
    """
    Simple RNN layer implementation from scratch
    """
    def __init__(self, n_nodes, initializer='he', optimizer=None):
        """
        Initialize SimpleRNN layer

        Parameters:
        n_nodes: int, number of RNN nodes
        initializer: str, weight initialization method
        optimizer: optimizer object (not used in this basic implementation)
        """
        self.n_nodes = n_nodes
        self.initializer = initializer
        self.optimizer = optimizer

        # Weights and biases will be initialized in _initialize_weights
        self.W_x = None  # Input weights
        self.W_h = None  # Hidden weights
        self.B = None    # Bias

        # For storing values during forward propagation (needed for backprop)
        self.h_states = []  # This will store all hidden states
        self.a_states = []  # Stores all pre-activation states
        self.x_inputs = []  # Will store all inputs

    def _initialize_weights(self, n_features):
        """Initialize weights and biases"""
        if self.initializer == 'he':
            # He initialization
            self.W_x = np.random.randn(n_features, self.n_nodes) * np.sqrt(2.0 / n_features)
            self.W_h = np.random.randn(self.n_nodes, self.n_nodes) * np.sqrt(2.0 / self.n_nodes)
        else:
            # Simple random initialization
            self.W_x = np.random.randn(n_features, self.n_nodes) * 0.01
            self.W_h = np.random.randn(self.n_nodes, self.n_nodes) * 0.01

        self.B = np.zeros(self.n_nodes)

    def forward(self, X, h_prev=None):
        """
        Forward propagation

        Parameters:
        X: ndarray, shape (batch_size, n_sequences, n_features)
        h_prev: ndarray, shape (batch_size, n_nodes), initial hidden state

        Returns:
        h_final: ndarray, shape (batch_size, n_nodes), final hidden state
        """
        batch_size, n_sequences, n_features = X.shape

        # Initializing weights if not already done
        if self.W_x is None:
            self._initialize_weights(n_features)

        # Initializing hidden state if not provided
        if h_prev is None:
            h_prev = np.zeros((batch_size, self.n_nodes))

        # Clearing previous stored states
        self.h_states = []
        self.a_states = []
        self.x_inputs = []

        # Storing initial hidden state
        self.h_states.append(h_prev)

        h_t = h_prev

        # Processing each time step
        for t in range(n_sequences):
            x_t = X[:, t, :]  # Input at time t: (batch_size, n_features)

            # Storing input for backpropagation
            self.x_inputs.append(x_t)

            # Computing pre-activation: a_t = x_t @ W_x + h_{t-1} @ W_h + B
            a_t = np.dot(x_t, self.W_x) + np.dot(h_t, self.W_h) + self.B

            # Storing pre-activation for backpropagation
            self.a_states.append(a_t)

            # Applying activation function: h_t = tanh(a_t)
            h_t = np.tanh(a_t)

            # Storing hidden state for backpropagation
            self.h_states.append(h_t)

        return h_t

    def backward(self, dh_next):
        """
        Backward propagation

        Parameters:
        dh_next: ndarray, shape (batch_size, n_nodes), gradient from next layer

        Returns:
        dX: ndarray, shape (batch_size, n_sequences, n_features), gradient w.r.t. input
        """
        batch_size = dh_next.shape[0]
        n_sequences = len(self.x_inputs)
        n_features = self.x_inputs[0].shape[1]

        # Initializing gradients
        dW_x = np.zeros_like(self.W_x)
        dW_h = np.zeros_like(self.W_h)
        dB = np.zeros_like(self.B)
        dX = np.zeros((batch_size, n_sequences, n_features))

        # Initializing gradient flowing back through time
        dh_t = dh_next

        # Backpropagating through time (from last to first time step)
        for t in reversed(range(n_sequences)):
            # Getting stored values
            x_t = self.x_inputs[t]
            a_t = self.a_states[t]
            h_prev = self.h_states[t]  # h_{t-1}

            # Gradient of tanh activation: d_tanh/da = 1 - tanh^2(a)
            dtanh_da = 1 - np.tanh(a_t) ** 2

            # Gradient w.r.t. pre-activation: da_t = dh_t * dtanh_da
            da_t = dh_t * dtanh_da

            # Accumulating gradients for weights and bias
            dW_x += np.dot(x_t.T, da_t)
            dW_h += np.dot(h_prev.T, da_t)
            dB += np.sum(da_t, axis=0)

            # Gradient w.r.t. input at time t
            dX[:, t, :] = np.dot(da_t, self.W_x.T)

            # Gradient w.r.t. previous hidden state (flows back through time)
            dh_t = np.dot(da_t, self.W_h.T)

        # Storing gradients for weight updates
        self.dW_x = dW_x
        self.dW_h = dW_h
        self.dB = dB

        return dX

    def update_weights(self, learning_rate):
        """Update weights using gradients"""
        self.W_x -= learning_rate * self.dW_x
        self.W_h -= learning_rate * self.dW_h
        self.B -= learning_rate * self.dB

In [24]:
# Problem 2: Experiment of forward propagation with small sequence
x = np.array([[[1, 2], [2, 3], [3, 4]]]) / 100  # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]]) / 100  # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]]) / 100  # (n_nodes, n_nodes)

batch_size = x.shape[0]  # 1
n_sequences = x.shape[1]  # 3
n_features = x.shape[2]  # 2
n_nodes = w_x.shape[1]  # 4
h = np.zeros((batch_size, n_nodes))  # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1])  # (n_nodes,)

# Creating RNN layer and set the specified weights
rnn = SimpleRNN(n_nodes)
rnn.W_x = w_x
rnn.W_h = w_h
rnn.B = b

# Testing forward propagation

print("Input shape:", x.shape)
print("Input values:")
print(x)

# Checking if result matches expected output
result = rnn.forward(x, h)
print("\nFinal hidden state:", result)
expected = np.array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])
print("\nExpected output:", expected)

print("\nCode output of forward propagation matches the expected code")


Input shape: (1, 3, 2)
Input values:
[[[0.01 0.02]
  [0.02 0.03]
  [0.03 0.04]]]

Final hidden state: [[0.79494228 0.81839002 0.83939649 0.85584174]]

Expected output: [[0.79494228 0.81839002 0.83939649 0.85584174]]

Code output of forward propagation matches the expected code


In [25]:
# Problem 3: (Advance assignment) Implementation of backpropagation
class ScratchSimpleRNNClassifier:
    """
    Simple RNN Classifier implementation from scratch
    """
    def __init__(self, n_nodes=10, learning_rate=0.01, n_epochs=100):
        """
        Initialize RNN classifier

        Parameters:
        n_nodes: int, number of RNN nodes
        learning_rate: float, learning rate for optimization
        n_epochs: int, number of training epochs
        """
        self.n_nodes = n_nodes
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs

        # Layers will be initialized during fit
        self.rnn_layer = None
        self.output_layer = None

    def _softmax(self, x):
        """Softmax activation function"""
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def _cross_entropy_loss(self, y_true, y_pred):
        """Cross entropy loss function"""
        n_samples = y_true.shape[0]
        return -np.sum(y_true * np.log(y_pred + 1e-8)) / n_samples

    def fit(self, X, y):
        """
        Train the RNN classifier

        Parameters:
        X: ndarray, shape (n_samples, n_sequences, n_features)
        y: ndarray, shape (n_samples, n_classes) - one-hot encoded
        """
        n_samples, n_sequences, n_features = X.shape
        n_classes = y.shape[1]

        # Initializing layers
        self.rnn_layer = SimpleRNN(self.n_nodes)

        # Initializing output layer weights
        self.W_out = np.random.randn(self.n_nodes, n_classes) * 0.01
        self.b_out = np.zeros(n_classes)

        # Training loop
        for epoch in range(self.n_epochs):
            # Forward propagation
            h_final = self.rnn_layer.forward(X)

            # Output layer forward propagation
            z_out = np.dot(h_final, self.W_out) + self.b_out
            y_pred = self._softmax(z_out)

            # Computing loss
            loss = self._cross_entropy_loss(y, y_pred)

            # Backward propagation
            # Gradient w.r.t. output layer
            dz_out = y_pred - y
            dW_out = np.dot(h_final.T, dz_out) / n_samples
            db_out = np.sum(dz_out, axis=0) / n_samples

            # Gradient w.r.t. final hidden state
            dh_final = np.dot(dz_out, self.W_out.T)

            # RNN backward propagation
            self.rnn_layer.backward(dh_final)

            # Updating weights
            self.rnn_layer.update_weights(self.learning_rate)
            self.W_out -= self.learning_rate * dW_out
            self.b_out -= self.learning_rate * db_out

            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        """
        Make predictions

        Parameters:
        X: ndarray, shape (n_samples, n_sequences, n_features)

        Returns:
        predictions: ndarray, shape (n_samples,)
        """
        h_final = self.rnn_layer.forward(X)
        z_out = np.dot(h_final, self.W_out) + self.b_out
        y_pred = self._softmax(z_out)
        return np.argmax(y_pred, axis=1)

# Demonstrating the implementation for Problem 3 Above
# Creating synthetic data for classification
np.random.seed(42)
n_samples = 100
n_sequences = 5
n_features = 3
n_classes = 2

# Generating synthetic sequence data
X_demo = np.random.randn(n_samples, n_sequences, n_features)
# Creating labels based on sum of sequence
y_demo_labels = (np.sum(X_demo, axis=(1, 2)) > 0).astype(int)
# Converting to one-hot encoding
y_demo = np.eye(n_classes)[y_demo_labels]

print(f"Demo data shape: {X_demo.shape}")
print(f"Demo labels shape: {y_demo.shape}")

# Creating and training classifier
classifier = ScratchSimpleRNNClassifier(n_nodes=8, learning_rate=0.1, n_epochs=50)
print("\nTraining classifier...")
classifier.fit(X_demo, y_demo)

# Making predictions
predictions = classifier.predict(X_demo)
accuracy = np.mean(predictions == y_demo_labels)
print(f"\nTraining accuracy: {accuracy:.4f}")

Demo data shape: (100, 5, 3)
Demo labels shape: (100, 2)

Training classifier...
Epoch 0, Loss: 0.6943
Epoch 10, Loss: 0.3533
Epoch 20, Loss: 0.3998
Epoch 30, Loss: 0.2501
Epoch 40, Loss: 0.1191

Training accuracy: 0.9400
