In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import wandb


from datasets import load_dataset
from transformers import AutoTokenizer

# Define a simple 2-layer neural network
class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
    

In [3]:
dataset = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})

In [5]:

def tokenization(example):
    return tokenizer(example["text"], padding=True, truncation=True)

train_data = dataset['train'].map(tokenization, batched=True)
test_data = dataset['test'].map(tokenization, batched=True)

Map: 100%|██████████| 25000/25000 [00:17<00:00, 1454.06 examples/s]
Map: 100%|██████████| 25000/25000 [00:19<00:00, 1288.44 examples/s]


In [11]:
train_data

Dataset({
    features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 25000
})

In [18]:
# Hyperparameters
input_size = 128 
hidden_size = 128
output_size = 2
learning_rate = 0.001
batch_size = 3
num_epochs = 10

# Load and preprocess dataset
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False)

In [43]:
for batch in train_loader:
    print(batch['label'].shape)
    break

torch.Size([3])


In [38]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
output.backward()

In [39]:
input

tensor([[-1.4256,  1.1648, -0.7968, -0.5580,  1.0976],
        [-0.5932,  0.8108,  0.7337, -1.0441,  1.1736],
        [-1.0222, -0.1037,  0.7953, -1.4138,  0.5824]], requires_grad=True)

In [40]:
target

tensor([2, 0, 0])