# RNN

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
# Check if GPU is available
if tf.test.is_gpu_available():
    print("GPU available: True")
else:
    print("GPU available: False")

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU available: False


In [3]:
class SimpleRNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01  # Input to hidden
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # Hidden to hidden
        self.Why = np.random.randn(output_size, hidden_size) * 0.01  # Hidden to output
        self.bh = np.zeros((hidden_size, 1))  # Hidden bias
        self.by = np.zeros((output_size, 1))  # Output bias

    def forward(self, inputs, h_prev):
        xs, hs, ys, ps = {}, {}, {}, {}
        hs[-1] = np.copy(h_prev)
        for t in range(len(inputs)):
            xs[t] = np.zeros((self.input_size, 1))
            xs[t][inputs[t]] = 1  # One-hot encoding of input
            hs[t] = np.tanh(np.dot(self.Wxh, xs[t]) + np.dot(self.Whh, hs[t - 1]) + self.bh)  # Hidden state
            ys[t] = np.dot(self.Why, hs[t]) + self.by  # Unnormalized log probabilities for output
            ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))  # Softmax to get probabilities
        return xs, hs, ps

    def backward(self, inputs, targets, xs, hs, ps):
        dWxh, dWhh, dWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
        dbh, dby = np.zeros_like(self.bh), np.zeros_like(self.by)
        dhnext = np.zeros_like(hs[0])
        for t in reversed(range(len(inputs))):
            dy = np.copy(ps[t])
            dy[targets] -= 1  # Backprop into y, targets is now a single value
            dWhy += np.dot(dy, hs[t].T)
            dby += dy
            dh = np.dot(self.Why.T, dy) + dhnext  # Backprop into h
            dhraw = (1 - hs[t] * hs[t]) * dh  # Backprop through tanh nonlinearity
            dbh += dhraw
            dWxh += np.dot(dhraw, xs[t].T)
            dWhh += np.dot(dhraw, hs[t - 1].T)
            dhnext = np.dot(self.Whh.T, dhraw)
        for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(dparam, -5, 5, out=dparam)  # Clip gradients to prevent exploding gradients
        return dWxh, dWhh, dWhy, dbh, dby

    def train(self, inputs, targets, learning_rate=0.1, num_epochs=1000):
        for epoch in range(num_epochs):
            h_prev = np.zeros((self.hidden_size, 1))  # Initialize hidden state at the beginning of each epoch
            loss = 0
            xs, hs, ps = self.forward(inputs, h_prev)
            dWxh, dWhh, dWhy, dbh, dby = self.backward(inputs, targets, xs, hs, ps)
            self.Wxh -= learning_rate * dWxh
            self.Whh -= learning_rate * dWhh
            self.Why -= learning_rate * dWhy
            self.bh -= learning_rate * dbh
            self.by -= learning_rate * dby
            loss += -np.sum([np.log(ps[t][targets[t], 0]) for t in range(len(inputs))])
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")

    def sample(self, seed_index, n):
        x = np.zeros((self.input_size, 1))
        x[seed_index] = 1
        h = np.zeros((self.hidden_size, 1))
        indices = []
        for _ in range(n):
            h = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h) + self.bh)
            y = np.dot(self.Why, h) + self.by
            p = np.exp(y) / np.sum(np.exp(y))
            idx = np.random.choice(range(self.output_size), p=p.ravel())
            x = np.zeros((self.input_size, 1))
            x[idx] = 1
            indices.append(idx)
        return indices

    def predict(self, inputs):
        h_prev = np.zeros((self.hidden_size, 1))
        xs, hs, ps = self.forward(inputs, h_prev)
        predictions = [np.argmax(ps[t]) for t in range(len(inputs))]
        return predictions

### Class initialization Parameters
- **input_size:** The dimensionality of the input data. For example, if the input data is one-hot encoded vectors of size 100, then input_size would be 100.
- **hidden_size:** The number of units in the hidden layer. This determines the capacity of the network to capture information from the input sequence.
- **output_size:** The dimensionality of the output data. For example, if we are predicting a probability distribution over 10 classes, then output_size would be 10.

### Weights and Biases
- **Wxh:** The weight matrix for the connections between the input layer and the hidden layer. It has dimensions (hidden_size, input_size).
- **Whh:** The weight matrix for the connections within the hidden layer (i.e., from the previous hidden state to the current hidden state). It has dimensions (hidden_size, hidden_size).
- **Why:** The weight matrix for the connections between the hidden layer and the output layer. It has dimensions (output_size, hidden_size).
- **bh:** The bias vector for the hidden layer. It has dimensions (hidden_size, 1).
- **by:** The bias vector for the output layer. It has dimensions (output_size, 1).

### Forward Pass Variables
- **xs:** A dictionary storing the input vectors at each time step.
- **hs:** A dictionary storing the hidden state vectors at each time step.
- **ys:** A dictionary storing the unnormalized log probabilities (before applying softmax) for the outputs at each time step.
- **ps:** A dictionary storing the probabilities (after applying softmax) for the outputs at each time step.

### Backward Pass Variables
- **dWxh:** The gradient of the loss with respect to the weight matrix Wxh.
- **dWhh:** The gradient of the loss with respect to the weight matrix Whh.
- **dWhy:** The gradient of the loss with respect to the weight matrix Why.
- **dbh:** The gradient of the loss with respect to the bias vector bh.
- **dby:** The gradient of the loss with respect to the bias vector by.
- **dhnext:** The gradient of the loss with respect to the hidden state from the next time step, used in the backward pass to accumulate gradients through time.

### Training Parameters
- **inputs:** A list of integers representing the input sequence (e.g., one-hot encoded indices).
- **targets:** A list of integers representing the target sequence for training.
- **learning_rate:** The step size for updating the weights during training.
- **num_epochs:** The number of training iterations over the entire dataset.

### Sample Method Variables
- **seed_index:** The starting index for generating a sequence.
- **n:** The length of the sequence to generate.

In [4]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing import sequence

# Set random seed for reproducibility
np.random.seed(7)

# Load the IMDB dataset, keeping only the top 5000 words and using sequences of up to 500 words
top_words = 5000
max_review_length = 500
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=top_words)

# Pad sequences to ensure they are all the same length
x_train = sequence.pad_sequences(x_train, maxlen=max_review_length)
x_test = sequence.pad_sequences(x_test, maxlen=max_review_length)

In [None]:
def train_rnn_on_imdb(rnn, x_train, y_train, num_epochs=10, learning_rate=0.1):
    for epoch in range(num_epochs):
        loss = 0
        for i in range(len(x_train)):
            inputs = x_train[i]
            target = y_train[i]

            # Forward pass
            xs, hs, ps = rnn.forward(inputs, np.zeros((rnn.hidden_size, 1)))

            # Backward pass
            targets = target  # Single target value
            dWxh, dWhh, dWhy, dbh, dby = rnn.backward(inputs, targets, xs, hs, ps)

            # Update weights and biases
            rnn.Wxh -= learning_rate * dWxh
            rnn.Whh -= learning_rate * dWhh
            rnn.Why -= learning_rate * dWhy
            rnn.bh -= learning_rate * dbh
            rnn.by -= learning_rate * dby

            # Calculate loss
            loss += -np.sum(np.log(ps[len(inputs) - 1][targets, 0]))

        if epoch % 1 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")

# Initialize the RNN
input_size = top_words
hidden_size = 100
output_size = 2  # Binary classification (2 classes)
rnn = SimpleRNN(input_size, hidden_size, output_size)

# Train the RNN on IMDB dataset
train_rnn_on_imdb(rnn, x_train, y_train, num_epochs=10, learning_rate=0.1)

## Evaluation

In [None]:
def evaluate_model(rnn, x_test, y_test):
    correct_predictions = 0
    total_predictions = len(x_test)

    for i in range(total_predictions):
        inputs = x_test[i]
        target = y_test[i]
        predictions = rnn.predict(inputs)
        if predictions[-1] == target:
            correct_predictions += 1

    accuracy = correct_predictions / total_predictions
    return accuracy

# Assuming x_test and y_test are your test datasets
accuracy = evaluate_model(rnn, x_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

## Saving the Model

In [None]:
import pickle

def save_model(rnn, filename):
    with open(filename, 'wb') as file:
        pickle.dump(rnn, file)

def load_model(filename):
    with open(filename, 'rb') as file:
        return pickle.load(file)

save_model(rnn, 'rnn_model.pkl')

# BiRNN

In [None]:
class BiRNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.forward_rnn = SimpleRNN(input_size, hidden_size, output_size)
        self.backward_rnn = SimpleRNN(input_size, hidden_size, output_size)

    def forward(self, inputs):
        forward_outputs, _ = self.forward_rnn.forward(inputs)
        backward_outputs, _ = self.backward_rnn.forward(inputs[::-1])  # Reverse the input sequence

        # Concatenate the forward and backward outputs
        outputs = np.concatenate((forward_outputs, backward_outputs[::-1]), axis=1)

        return outputs

    def train(self, inputs, targets, learning_rate=0.1, num_epochs=1000):
        for epoch in range(num_epochs):
            forward_loss = self.forward_rnn.train(inputs, targets, learning_rate)
            backward_loss = self.backward_rnn.train(inputs[::-1], targets[::-1], learning_rate)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Forward Loss: {forward_loss}, Backward Loss: {backward_loss}")

    def predict(self, inputs):
        forward_predictions = self.forward_rnn.predict(inputs)
        backward_predictions = self.backward_rnn.predict(inputs[::-1])  # Reverse the input sequence

        # Concatenate the forward and backward predictions
        predictions = np.concatenate((forward_predictions, backward_predictions[::-1]), axis=1)

        return predictions

In [None]:
max_features = 10000 
maxlen = 500
batch_size = 32

input_size = max_features
hidden_size = 128
output_size = 1
learning_rate = 0.1
num_epochs = 10

bi_rnn = BiRNN(input_size, hidden_size, output_size)
bi_rnn.train(x_train, y_train, learning_rate, num_epochs)

## Evaluation

In [None]:
accuracy = evaluate_model(bi_rnn, x_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Saving the Model

In [None]:
save_model(bo_rnn, 'lstm_model.pkl')

# LSTM

In [None]:
class LSTM:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.Wf = np.random.randn(hidden_size, input_size + hidden_size) * 0.01  # Forget gate
        self.Wi = np.random.randn(hidden_size, input_size + hidden_size) * 0.01  # Input gate
        self.Wc = np.random.randn(hidden_size, input_size + hidden_size) * 0.01  # Candidate value
        self.Wo = np.random.randn(hidden_size, input_size + hidden_size) * 0.01  # Output gate
        self.Wy = np.random.randn(output_size, hidden_size) * 0.01  # Hidden to output

        self.bf = np.zeros((hidden_size, 1))  # Forget gate bias
        self.bi = np.zeros((hidden_size, 1))  # Input gate bias
        self.bc = np.zeros((hidden_size, 1))  # Candidate value bias
        self.bo = np.zeros((hidden_size, 1))  # Output gate bias
        self.by = np.zeros((output_size, 1))  # Output bias

    def forward(self, inputs, h_prev, c_prev):
        xs, hs, cs, ys, ps = {}, {}, {}, {}, {}
        hs[-1] = np.copy(h_prev)
        cs[-1] = np.copy(c_prev)

        for t in range(len(inputs)):
            xs[t] = np.zeros((self.input_size, 1))
            xs[t][inputs[t]] = 1  # One-hot encoding of input

            concat = np.vstack((hs[t-1], xs[t]))

            ft = self.sigmoid(np.dot(self.Wf, concat) + self.bf)  # Forget gate
            it = self.sigmoid(np.dot(self.Wi, concat) + self.bi)  # Input gate
            cct = np.tanh(np.dot(self.Wc, concat) + self.bc)  # Candidate value
            ct = ft * cs[t-1] + it * cct  # New cell state
            ot = self.sigmoid(np.dot(self.Wo, concat) + self.bo)  # Output gate
            ht = ot * np.tanh(ct)  # New hidden state

            ys[t] = np.dot(self.Wy, ht) + self.by  # Unnormalized log probabilities for output
            ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))  # Softmax to get probabilities

            hs[t] = ht
            cs[t] = ct

        return xs, hs, cs, ps

    def backward(self, inputs, targets, xs, hs, cs, ps):
        dWf, dWi, dWc, dWo, dWy = np.zeros_like(self.Wf), np.zeros_like(self.Wi), np.zeros_like(self.Wc), np.zeros_like(self.Wo), np.zeros_like(self.Wy)
        dbf, dbi, dbc, dbo, dby = np.zeros_like(self.bf), np.zeros_like(self.bi), np.zeros_like(self.bc), np.zeros_like(self.bo), np.zeros_like(self.by)
        dhnext = np.zeros_like(hs[0])
        dcnext = np.zeros_like(cs[0])

        for t in reversed(range(len(inputs))):
            dy = np.copy(ps[t])
            dy[targets[t]] -= 1  # Backprop into y

            dWy += np.dot(dy, hs[t].T)
            dby += dy
            dh = np.dot(self.Wy.T, dy) + dhnext  # Backprop into h

            do = dh * np.tanh(cs[t])
            do = self.sigmoid_derivative(do, hs[t])

            dc = dh * cs[t] * (1 - np.tanh(cs[t])**2) + dcnext
            dcct = dc * cs[t-1] * (1 - cs[t-1]**2)
            dcct = self.tanh_derivative(dcct, cs[t-1])

            di = dc * cs[t-1]
            di = self.sigmoid_derivative(di, cs[t-1])

            df = dc * cs[t-1]
            df = self.sigmoid_derivative(df, cs[t-1])

            dconcat = np.dot(self.Wf.T, df) + np.dot(self.Wi.T, di) + np.dot(self.Wc.T, dcct) + np.dot(self.Wo.T, do)
            dconcat = dconcat[:self.hidden_size, :] + dconcat[self.hidden_size:, :]

            dWf += np.dot(df, dconcat.T)
            dbf += df
            dWi += np.dot(di, dconcat.T)
            dbi += di
            dWc += np.dot(dcct, dconcat.T)
            dbc += dcct
            dWo += np.dot(do, dconcat.T)
            dbo += do

            dhnext = dconcat[:self.hidden_size, :]
            dcnext = dc * cs[t-1]

        for dparam in [dWf, dWi, dWc, dWo, dWy, dbf, dbi, dbc, dbo, dby]:
            np.clip(dparam, -5, 5, out=dparam)  # Clip gradients to prevent exploding gradients

        return dWf, dWi, dWc, dWo, dWy, dbf, dbi, dbc, dbo, dby

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, sigmoid, x):
        return sigmoid * (1 - sigmoid)

    def tanh_derivative(self, tanh, x):
        return 1 - tanh**2

    def train(self, inputs, targets, learning_rate=0.1, num_epochs=1000):
        for epoch in range(num_epochs):
            h_prev = np.zeros((self.hidden_size, 1))
            c_prev = np.zeros((self.hidden_size, 1))
            loss = 0

            xs, hs, cs, ps = self.forward(inputs, h_prev, c_prev)
            dWf, dWi, dWc, dWo, dWy, dbf, dbi, dbc, dbo, dby = self.backward(inputs, targets, xs, hs, cs, ps)

            self.Wf -= learning_rate * dWf
            self.Wi -= learning_rate * dWi
            self.Wc -= learning_rate * dWc
            self.Wo -= learning_rate * dWo
            self.Wy -= learning_rate * dWy
            self.bf -= learning_rate * dbf
            self.bi -= learning_rate * dbi
            self.bc -= learning_rate * dbc
            self.bo -= learning_rate * dbo
            self.by -= learning_rate * dby

            loss += -np.sum([np.log(ps[t][targets[t], 0]) for t in range(len(inputs))])

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")

    def predict(self, inputs):
        h_prev = np.zeros((self.hidden_size, 1))
        c_prev = np.zeros((self.hidden_size, 1))
        xs, hs, cs, ps = self.forward(inputs, h_prev, c_prev)
        predictions = [np.argmax(ps[t]) for t in range(len(inputs))]
        return predictions

- **input_size:** The size of the input vocabulary or the number of unique words in the input sequences. This determines the size of the input layer of the LSTM.
- **hidden_size:** The number of hidden units in the LSTM cell. This parameter determines the complexity and capacity of the LSTM to learn patterns in the input sequences.
- **output_size:** The size of the output layer, which is typically the number of classes in a classification task. For binary classification tasks like sentiment analysis, output_size is set to 2 (positive or negative).
- **Wf, Wi, Wc, Wo, Wy:** Weight matrices for the forget gate, input gate, candidate value, output gate, and output layer, respectively. These matrices are learned during training to capture patterns in the input sequences.
- **bf, bi, bc, bo, by:** Bias vectors for the forget gate, input gate, candidate value, output gate, and output layer, respectively. These biases help the LSTM model learn the appropriate transformations for the input data.
- **h_prev:** The previous hidden state of the LSTM cell, initialized as zeros. This state is used to initialize the hidden state at the beginning of each input sequence.
- **c_prev:** The previous cell state of the LSTM cell, initialized as zeros. This state is used to initialize the cell state at the beginning of each input sequence.
- **xs, hs, cs, ys, ps:** Dictionaries to store the input vectors, hidden states, cell states, unnormalized log probabilities, and softmax probabilities for each time step in the input sequence.

In [None]:
# Train the LSTM model
input_size = max_features
hidden_size = 128
output_size = 2  # Binary classification (positive or negative review)

lstm = LSTM(input_size, hidden_size, output_size)

# Training on a single sample for simplicity
inputs = x_train[0]
targets = [y_train[0]] * len(inputs)  # Repeat target for each time step

lstm.train(inputs, targets, learning_rate=0.1, num_epochs=10)

## Evaluation

In [None]:
accuracy = evaluate_model(lstm, x_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
save_model(lstm, 'lstm_model.pkl')

# GRU

In [None]:
class GRU:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Weight matrices
        self.Wz = np.random.randn(hidden_size, input_size + hidden_size) * 0.01  # Update gate weights
        self.Wr = np.random.randn(hidden_size, input_size + hidden_size) * 0.01  # Reset gate weights
        self.Wh = np.random.randn(hidden_size, input_size + hidden_size) * 0.01  # Output weights
        self.Wy = np.random.randn(output_size, hidden_size) * 0.01  # Hidden to output weights

        # Biases
        self.bz = np.zeros((hidden_size, 1))  # Update gate bias
        self.br = np.zeros((hidden_size, 1))  # Reset gate bias
        self.bh = np.zeros((hidden_size, 1))  # Output bias
        self.by = np.zeros((output_size, 1))  # Output bias

    def forward(self, inputs, h_prev):
        xs, zs, rs, hs, ys, ps = {}, {}, {}, {}, {}, {}
        hs[-1] = np.copy(h_prev)

        for t in range(len(inputs)):
            xs[t] = np.zeros((self.input_size, 1))
            xs[t][inputs[t]] = 1  # One-hot encoding of input

            concat = np.vstack((hs[t-1], xs[t]))

            # Update gate
            zs[t] = self.sigmoid(np.dot(self.Wz, concat) + self.bz)
            # Reset gate
            rs[t] = self.sigmoid(np.dot(self.Wr, concat) + self.br)
            # Hidden state
            hs[t] = zs[t] * hs[t-1] + (1 - zs[t]) * np.tanh(np.dot(self.Wh, np.vstack((rs[t] * hs[t-1], xs[t]))) + self.bh)
            # Output
            ys[t] = np.dot(self.Wy, hs[t]) + self.by
            # Softmax to get probabilities
            ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))

        return xs, zs, rs, hs, ps

    def backward(self, inputs, targets, xs, zs, rs, hs, ps):
        dWz, dWr, dWh, dWy = np.zeros_like(self.Wz), np.zeros_like(self.Wr), np.zeros_like(self.Wh), np.zeros_like(self.Wy)
        dbz, dbr, dbh, dby = np.zeros_like(self.bz), np.zeros_like(self.br), np.zeros_like(self.bh), np.zeros_like(self.by)
        dhnext = np.zeros_like(hs[0])

        for t in reversed(range(len(inputs))):
            dy = np.copy(ps[t])
            dy[targets[t]] -= 1  # Backprop into y

            dWy += np.dot(dy, hs[t].T)
            dby += dy
            dh = np.dot(self.Wy.T, dy) + dhnext  # Backprop into h

            dhraw = (1 - hs[t] ** 2) * dh  # Backprop through tanh

            dh_prev_reset = np.dot(self.Wh.T, dhraw)

            # Backprop through update gate
            dz = hs[t-1] - hs[t]
            dz = self.sigmoid_derivative(zs[t], dz)
            dWz += np.dot(dz, np.vstack((hs[t-1], xs[t])).T)
            dbz += dz

            # Backprop through reset gate
            dr = np.dot(self.Wh[:, :self.hidden_size].T, dhraw)
            dr = self.sigmoid_derivative(rs[t], dr)
            dWr += np.dot(dr, np.vstack((hs[t-1], xs[t])).T)
            dbr += dr

            # Backprop through hidden state
            dWh += np.dot(dhraw, np.vstack((rs[t] * hs[t-1], xs[t])).T)
            dbh += dhraw

            dhnext = np.dot(self.Wh[:, :self.hidden_size].T, dhraw)

        for dparam in [dWz, dWr, dWh, dWy, dbz, dbr, dbh, dby]:
            np.clip(dparam, -5, 5, out=dparam)  # Clip gradients to prevent exploding gradients

        return dWz, dWr, dWh, dWy, dbz, dbr, dbh, dby

    def train(self, inputs, targets, learning_rate=0.1, num_epochs=1000):
        for epoch in range(num_epochs):
            h_prev = np.zeros((self.hidden_size, 1))
            loss = 0

            xs, zs, rs, hs, ps = self.forward(inputs, h_prev)
            dWz, dWr, dWh, dWy, dbz, dbr, dbh, dby = self.backward(inputs, targets, xs, zs, rs, hs, ps)

            self.Wz -= learning_rate * dWz
            self.Wr -= learning_rate * dWr
            self.Wh -= learning_rate * dWh
            self.Wy -= learning_rate * dWy
            self.bz -= learning_rate * dbz
            self.br -= learning_rate * dbr
            self.bh -= learning_rate * dbh
            self.by -= learning_rate * dby

            loss += -np.sum([np.log(ps[t][targets[t], 0]) for t in range(len(inputs))])

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")

    def predict(self, inputs):
        h_prev = np.zeros((self.hidden_size, 1))
        xs, zs, rs, hs, ps = self.forward(inputs, h_prev)
        predictions = [np.argmax(ps[t]) for t in range(len(inputs))]
        return predictions

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, sigmoid, x):
        return sigmoid * (1 - sigmoid)

    def tanh_derivative(self, tanh, x):
        return 1 - tanh ** 2

In [None]:
hidden_size = 128
output_size = 1
num_epochs = 5

gru = GRU(input_size=max_features, hidden_size=hidden_size, output_size=output_size)
gru.train(input_train, y_train, num_epochs=num_epochs)

In [None]:
accuracy = evaluate_model(gru, x_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
save_model(gru, 'gru_model.pkl')