## Simple RNN

In [4]:
import torch
import torch.nn as nn

class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Define the RNN layer
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)

        # Define a fully connected layer for output
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Pass input through RNN
        # out: tensor of shape (batch_size, seq_length, hidden_size)
        # hn: tensor of shape (num_layers, batch_size, hidden_size)
        out, hn = self.rnn(x, h0)

        # Apply FC layer to the output of each time step for sequence-to-sequence prediction
        # Reshape 'out' from (batch_size, seq_length, hidden_size) to (batch_size * seq_length, hidden_size)
        # Apply FC layer to get (batch_size * seq_length, output_size)
        # Reshape back to (batch_size, seq_length, output_size) if needed, but not for CrossEntropyLoss
        out = self.fc(out)
        return out

In [2]:
# Example: Character-level prediction
text = "hello world"
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

input_seq = [char_to_int[ch] for ch in text[:-1]]
target_seq = [char_to_int[ch] for ch in text[1:]]

# One-hot encode input
input_one_hot = torch.zeros(len(input_seq), len(chars))
for i, char_idx in enumerate(input_seq):
    input_one_hot[i, char_idx] = 1

# Reshape for batch_first=True: (batch_size, seq_length, input_size)
input_tensor = input_one_hot.unsqueeze(0)
target_tensor = torch.tensor(target_seq).unsqueeze(0)

In [5]:
# Model parameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
num_layers = 1

# Instantiate the model
model = SimpleRNN(input_size, hidden_size, output_size, num_layers)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop (simplified)
num_epochs = 100
for epoch in range(num_epochs):
    optimizer.zero_grad()
    output = model(input_tensor) # output shape: (batch_size, seq_length, output_size)

    # Reshape output for CrossEntropyLoss: (N, C) where N is batch_size * seq_length
    # Target should be (N,)
    loss = criterion(output.reshape(-1, output_size), target_tensor.squeeze(0))
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Prediction (example)
with torch.no_grad():
    predicted_output = model(input_tensor) # shape: (batch_size, seq_length, output_size)

    # For sequence-to-sequence, get the argmax along the output_size dimension for each time step
    predicted_char_indices = torch.argmax(predicted_output, dim=2).squeeze(0) # shape: (seq_length,)
    predicted_chars = [int_to_char[idx.item()] for idx in predicted_char_indices]
    print("Predicted sequence:", "".join(predicted_chars))

Epoch [10/100], Loss: 0.1196
Epoch [20/100], Loss: 0.0041
Epoch [30/100], Loss: 0.0007
Epoch [40/100], Loss: 0.0003
Epoch [50/100], Loss: 0.0002
Epoch [60/100], Loss: 0.0001
Epoch [70/100], Loss: 0.0001
Epoch [80/100], Loss: 0.0001
Epoch [90/100], Loss: 0.0001
Epoch [100/100], Loss: 0.0001
Predicted sequence: ello world


## Bidirectional RNN

In [6]:
import warnings
warnings.filterwarnings('ignore')
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences

features = 2000
max_len = 50

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=features)

X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
from IPython.terminal.embed import EmbeddedMagics
from keras.models import Sequential
from keras.layers import Embedding,Bidirectional,SimpleRNN,Dense

embedding_dim = 128
hidden_units = 64

model = Sequential()

model.add(Embedding(features, embedding_dim, input_length=max_len))

model.add(Bidirectional(SimpleRNN(hidden_units)))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [8]:
batch_size = 32
epochs = 5

model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(X_test, y_test))

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 35ms/step - accuracy: 0.6213 - loss: 0.6246 - val_accuracy: 0.7777 - val_loss: 0.4821
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 35ms/step - accuracy: 0.8000 - loss: 0.4380 - val_accuracy: 0.7842 - val_loss: 0.4560
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 35ms/step - accuracy: 0.8547 - loss: 0.3429 - val_accuracy: 0.7665 - val_loss: 0.5014
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 34ms/step - accuracy: 0.9037 - loss: 0.2458 - val_accuracy: 0.7718 - val_loss: 0.5800
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 34ms/step - accuracy: 0.9449 - loss: 0.1536 - val_accuracy: 0.7442 - val_loss: 0.7218


<keras.src.callbacks.history.History at 0x7e4bf3232180>

In [9]:
loss, accuracy = model.evaluate(X_test, y_test)

print('Test accuracy:', accuracy)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.7395 - loss: 0.7316
Test accuracy: 0.744159996509552


In [10]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_test)

y_pred = (y_pred > 0.5)

print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 21ms/step
              precision    recall  f1-score   support

    Negative       0.79      0.67      0.72     12500
    Positive       0.71      0.82      0.76     12500

    accuracy                           0.74     25000
   macro avg       0.75      0.74      0.74     25000
weighted avg       0.75      0.74      0.74     25000

