<a href="https://colab.research.google.com/github/CSSamarasinghe/SE4050_Assignment/blob/IT21222740/GRU_From_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing import sequence

In [4]:

# Activation Functions and Derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2

In [7]:

# Load and Preprocess Data
top_words = 5000
max_words = 500

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [8]:
# Pad sequences (data preprocessing)
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

In [9]:
# GRU Cell
class GRUCell:
    def __init__(self, input_size, hidden_size):
        self.hidden_size = hidden_size

        # Initialize weights
        self.Wz = np.random.randn(input_size, hidden_size) * 0.01
        self.Wr = np.random.randn(input_size, hidden_size) * 0.01
        self.Wh = np.random.randn(input_size, hidden_size) * 0.01

        self.Uz = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Ur = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Uh = np.random.randn(hidden_size, hidden_size) * 0.01

        self.bz = np.zeros((1, hidden_size))
        self.br = np.zeros((1, hidden_size))
        self.bh = np.zeros((1, hidden_size))

    def forward(self, x, h_prev):
        # Update gate
        z_t = sigmoid(np.dot(x, self.Wz) + np.dot(h_prev, self.Uz) + self.bz)

        # Reset gate
        r_t = sigmoid(np.dot(x, self.Wr) + np.dot(h_prev, self.Ur) + self.br)

        # Candidate hidden state
        h_tilde = tanh(np.dot(x, self.Wh) + np.dot(r_t * h_prev, self.Uh) + self.bh)

        # Final hidden state
        h_next = (1 - z_t) * h_prev + z_t * h_tilde

        return h_next

In [7]:
# Fully Connected Layer (Dense Layer)
class DenseLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.bias = np.zeros((1, output_size))

    def forward(self, x):
        return np.dot(x, self.weights) + self.bias

In [10]:

# Binary Crossentropy Loss
def binary_crossentropy_loss(y_true, y_pred):
    epsilon = 1e-9
    return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))

def binary_crossentropy_loss_derivative(y_true, y_pred):
    epsilon = 1e-9
    return - (y_true / (y_pred + epsilon)) + ((1 - y_true) / (1 - y_pred + epsilon))

In [11]:
# Model
class GRUModel:
    def __init__(self, input_size, hidden_size, output_size):
        self.gru = GRUCell(input_size, hidden_size)
        self.dense = DenseLayer(hidden_size, output_size)

    def forward(self, X):
        batch_size, time_steps, _ = X.shape
        h = np.zeros((batch_size, self.gru.hidden_size))  # Initial hidden state
        for t in range(time_steps):
            h = self.gru.forward(X[:, t], h)  # GRU through time

        output = sigmoid(self.dense.forward(h))  # Final output
        return output

    def compute_loss(self, y_true, y_pred):
        return binary_crossentropy_loss(y_true, y_pred)

    def backprop(self, X, y_true, y_pred):
        # Placeholder for backpropagation (calculate gradients and update weights)
        # For full implementation, backpropagation through time (BPTT) would be necessary
        pass


In [12]:

# Accuracy calculation
def calculate_accuracy(y_true, y_pred):
    y_pred_class = (y_pred >= 0.5).astype(int)
    return np.mean(y_true == y_pred_class)

# Convert the input data to the right shape for the model
def reshape_input(X, max_words, vocab_size):
    X_reshaped = np.zeros((X.shape[0], max_words, vocab_size))
    for i in range(X.shape[0]):
        for t, word_idx in enumerate(X[i]):
            if word_idx < vocab_size:
                X_reshaped[i, t, word_idx] = 1  # One-hot encoding
    return X_reshaped


In [None]:


# Prepare the input data
vocab_size = top_words
X_train_reshaped = reshape_input(X_train, max_words, vocab_size)
X_test_reshaped = reshape_input(X_test, max_words, vocab_size)

# Hyperparameters
input_size = vocab_size  # Vocabulary size
hidden_size = 100
output_size = 1
learning_rate = 0.01
epochs = 3

# Initialize model
model = GRUModel(input_size, hidden_size, output_size)


In [None]:


# Train the model
for epoch in range(epochs):
    y_pred = model.forward(X_train_reshaped)  # Forward pass
    loss = model.compute_loss(y_train, y_pred)  # Compute loss
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss}")
    model.backprop(X_train_reshaped, y_train, y_pred)  # Placeholder for backpropagation

    accuracy = calculate_accuracy(y_train, y_pred)  # Training accuracy
    print(f"Training Accuracy: {accuracy * 100:.2f}%")

In [1]:



# Evaluate the model on test data
y_pred_test = model.forward(X_test_reshaped)
test_accuracy = calculate_accuracy(y_test, y_pred_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

NameError: name 'model' is not defined