<a href="https://colab.research.google.com/github/Redcoder815/Deep_Learning_PyTorch/blob/main/BidirectionalLSTMRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
class BiRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(BiRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, output_size)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        out, (h_n, c_n) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        out = self.sigmoid(out)
        return out
# Example usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BiRNN(input_size=32, hidden_size=64, num_layers=1, output_size=1).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters())

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class SequenceDataset(Dataset):
    def __init__(self, num_samples=2000, seq_len=25, input_size=32):
        self.X = torch.randn(num_samples, seq_len, input_size)
        self.y = torch.randint(0, 2, (num_samples, 1)).float()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [None]:
dataset = SequenceDataset(num_samples=2000, seq_len=25, input_size=32)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
model.train()
num_epochs = 10

for epoch in range(num_epochs):
    total_loss = 0

    for X_batch, y_batch in loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

Epoch 1, Loss: 43.8042
Epoch 2, Loss: 43.4687
Epoch 3, Loss: 43.1943
Epoch 4, Loss: 42.6748
Epoch 5, Loss: 41.8468
Epoch 6, Loss: 40.2690
Epoch 7, Loss: 37.6566
Epoch 8, Loss: 33.4677
Epoch 9, Loss: 27.9783
Epoch 10, Loss: 21.8307


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class LSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        # Input weights
        self.W = nn.Parameter(torch.randn(4 * hidden_size, input_size) * 0.1)
        # Hidden weights
        self.U = nn.Parameter(torch.randn(4 * hidden_size, hidden_size) * 0.1)
        # Bias
        self.b = nn.Parameter(torch.zeros(4 * hidden_size))

    def forward(self, x, h_prev, c_prev):
        # x: (batch, input_size)
        # h_prev, c_prev: (batch, hidden_size)

        gates = x @ self.W.T + h_prev @ self.U.T + self.b
        i, f, g, o = gates.chunk(4, dim=1)

        i = torch.sigmoid(i)
        f = torch.sigmoid(f)
        g = torch.tanh(g)
        o = torch.sigmoid(o)

        c = f * c_prev + i * g
        h = o * torch.tanh(c)

        return h, c


class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size

        self.fwd_cell = LSTMCell(input_size, hidden_size)
        self.bwd_cell = LSTMCell(input_size, hidden_size)

    def forward(self, x):
        """
        x: (batch, seq_len, input_size)
        returns: (batch, seq_len, 2*hidden_size)
        """
        batch, seq_len, _ = x.size()

        # Forward direction
        h_f = []
        h_prev = torch.zeros(batch, self.hidden_size, device=x.device)
        c_prev = torch.zeros(batch, self.hidden_size, device=x.device)

        for t in range(seq_len):
            h_prev, c_prev = self.fwd_cell(x[:, t], h_prev, c_prev)
            h_f.append(h_prev)

        h_f = torch.stack(h_f, dim=1)

        # Backward direction
        h_b = []
        h_prev = torch.zeros(batch, self.hidden_size, device=x.device)
        c_prev = torch.zeros(batch, self.hidden_size, device=x.device)

        for t in reversed(range(seq_len)):
            h_prev, c_prev = self.bwd_cell(x[:, t], h_prev, c_prev)
            h_b.append(h_prev)

        h_b.reverse()
        h_b = torch.stack(h_b, dim=1)

        # Concatenate forward + backward
        h_bi = torch.cat([h_f, h_b], dim=2)
        return h_bi

In [None]:
import torch
import torch.nn as nn

class BiLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.bilstm = BiLSTM(input_size, hidden_size)
        self.fc = nn.Linear(2 * hidden_size, num_classes)

    def forward(self, x):
        # x: (batch, seq_len, input_size)
        h_bi = self.bilstm(x)                 # (batch, seq_len, 2H)
        last_hidden = h_bi[:, -1, :]          # use last timestep
        logits = self.fc(last_hidden)
        return logits

In [None]:
class ToyDataset(torch.utils.data.Dataset):
    def __init__(self, n_samples=1000, seq_len=10, input_size=8):
        self.x = torch.randn(n_samples, seq_len, input_size)
        self.y = (self.x.sum(dim=(1,2)) > 0).long()

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [None]:
def train_model():
    # Hyperparameters
    input_size = 8
    hidden_size = 32
    num_classes = 2
    seq_len = 10
    batch_size = 32
    epochs = 10
    lr = 1e-3

    # Dataset + loader
    dataset = ToyDataset(n_samples=2000, seq_len=seq_len, input_size=input_size)
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Model, loss, optimizer
    model = BiLSTMClassifier(input_size, hidden_size, num_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Training loop
    for epoch in range(epochs):
        total_loss = 0
        correct = 0

        for x, y in loader:
            logits = model(x)
            loss = criterion(logits, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * x.size(0)
            correct += (logits.argmax(dim=1) == y).sum().item()

        avg_loss = total_loss / len(dataset)
        acc = correct / len(dataset)

        print(f"Epoch {epoch+1}: loss={avg_loss:.4f}, acc={acc:.4f}")

    return model

train_model()

Epoch 1: loss=0.6477, acc=0.6890
Epoch 2: loss=0.3200, acc=0.9090
Epoch 3: loss=0.1723, acc=0.9555
Epoch 4: loss=0.1276, acc=0.9640
Epoch 5: loss=0.1063, acc=0.9670
Epoch 6: loss=0.0921, acc=0.9715
Epoch 7: loss=0.0824, acc=0.9760
Epoch 8: loss=0.0772, acc=0.9765
Epoch 9: loss=0.0672, acc=0.9815
Epoch 10: loss=0.0662, acc=0.9810


BiLSTMClassifier(
  (bilstm): BiLSTM(
    (fwd_cell): LSTMCell()
    (bwd_cell): LSTMCell()
  )
  (fc): Linear(in_features=64, out_features=2, bias=True)
)