### Try COLAB

In [162]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False
if IN_COLAB:
    !pip3 install torch matplotlib torchmetrics scikit-image segmentation-models-pytorch

# Import

In [174]:
import torch
from torch import nn as nn
from torch.nn import functional as F

from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

import torchmetrics

import wandb

import seaborn as sns

import random

### CUDA

In [175]:
# GPU Support?
if torch.cuda.is_available():
    print("Using the GPU")
    device = torch.device('cuda')
else:
    print("Using the CPU")
    device = torch.device('cpu')

Using the GPU


# Generate Dataset

In [176]:
class FormalLanguageDataset(Dataset):
    def __init__(self, N=100):
        self.N = N
        self.dataset = self._generate_dataset()
        self.char_to_idx = {char: idx for idx, char in enumerate('abc')}
        self.input_size = len(self.char_to_idx)
        self.x_tensors, self.y_tensors = self._convert_to_tensors()
        
    def _generate_dataset(self):
        dataset = []
        for n in range(self.N):
            if n % 4 == 0:  # 25% of sequences are guaranteed to be actual members of the language.
                randint = random.randint(1, 6)
                sequence = 'a' * randint + 'b' * randint + 'c' * randint
                label = 1
            else:
                length = random.randint(3, 20)  # Random sequence length between 3 and 20
                counts = [0, 0, 0]
                # Distribute the length among a, b, and c
                for i in range(length):
                    counts[random.randint(0, 2)] += 1
                # Ensure alphabetical order and create the sequence (unlikely to become a member of the language)
                sequence = 'a' * counts[0] + 'b' * counts[1] + 'c' * counts[2]
                label = 0
            dataset.append((sequence, label))
        return dataset
    
    def _convert_to_tensors(self):
        x_tensors = [torch.tensor([self.char_to_idx[char] for char in seq], dtype=torch.long) for seq, _ in self.dataset]
        y_tensors = torch.tensor([label for _, label in self.dataset], dtype=torch.float)
        return x_tensors, y_tensors
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        return self.x_tensors[idx], self.y_tensors[idx]


In [185]:

def collate_fn(batch):
    sequences, labels = zip(*batch)
    sequences_padded = pad_sequence(sequences, batch_first=True, padding_value=0)
    lengths = torch.tensor([len(seq) for seq in sequences])
    labels = torch.tensor(labels, dtype=torch.float)
    return sequences_padded, lengths, labels

# Create the dataset and dataloader
N = 1000  # Number of sequences

dataset = FormalLanguageDataset(N)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=train_size, shuffle=True, collate_fn=collate_fn) # Load full batch
test_loader = DataLoader(test_dataset, batch_size=test_size, shuffle=False, collate_fn=collate_fn) # Load full batch

# RNN

In [186]:

# RNN Model
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, input_size)
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, num_layers=num_layers)
        self.fc = nn.Linear(hidden_size, output_size)
        # No activation function for the output layer, as we will use nn.BCEWithLogitsLoss which combines a sigmoid layer and the BCELoss.

    def forward(self, x, lengths):
        x = self.embedding(x)
        packed_input = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_output, hidden = self.rnn(packed_input)
        output, _ = pad_packed_sequence(packed_output, batch_first=True)
        output = hidden[-1]  # Use the last hidden state
        output = self.fc(output)

        return output


## Training Function RNN

In [187]:

# Training function
def train(model, train_loader, optimizer, criterion, device=device, num_epochs=20):
    with wandb.init(project="A2_RNN"):
        model.to(device)
        wandb.watch(model, log="all")
        for epoch in range(num_epochs):
            total_loss = 0
            for sequences, lengths, labels in train_loader:
                sequences, lengths, labels = sequences.to(device), lengths.to(device), labels.to(device)
                optimizer.zero_grad()

                # Forward pass
                output = model(sequences, lengths)
                loss = criterion(output.squeeze(), labels)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

            average_loss = total_loss / len(train_loader)
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")
            wandb.log({"average_loss": average_loss, "epoch": epoch, "total_loss": total_loss})
    wandb.finish()

In [188]:
# Hyperparameters
input_size = 3  # Size of vocabulary ( 'a', 'b', 'c' )
hidden_size = 10
output_size = 1
learning_rate = 0.01

model = RNN(input_size, hidden_size, output_size, num_layers=1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()

train(model, train_loader, optimizer, criterion, device=device, num_epochs=10)

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Epoch [1/10], Loss: 0.7105
Epoch [2/10], Loss: 0.6662
Epoch [3/10], Loss: 0.6328
Epoch [4/10], Loss: 0.6078
Epoch [5/10], Loss: 0.5896
Epoch [6/10], Loss: 0.5772
Epoch [7/10], Loss: 0.5697
Epoch [8/10], Loss: 0.5662
Epoch [9/10], Loss: 0.5654
Epoch [10/10], Loss: 0.5663


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
average_loss,█▆▄▃▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
total_loss,█▆▄▃▂▂▁▁▁▁

0,1
average_loss,0.56628
epoch,9.0
total_loss,0.56628


# Evaluate Models

### Evaluate RNN

In [189]:
# Evaluate model
def evaluate_model(model, test_loader, device=device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, lengths, labels in test_loader:
            sequences, lengths, labels = sequences.to(device), lengths.to(device), labels.to(device)
            output = model(sequences, lengths)
            predictions = (output.squeeze() > 0).float()
            correct += (predictions == labels).sum().item()
            total += labels.size(0)
    accuracy = correct / total
    print(f"Accuracy: {accuracy:.4f}")

In [190]:
evaluate_model(model, test_loader, device=device)

Accuracy: 0.7600
