In [104]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

from datasets import load_dataset

In [105]:
dataset = load_dataset("ylecun/mnist")

preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0,), (1,))  
])

def preprocess_ops(examples):
    examples['image'] = [preprocess(image) for image in examples['image']]
    return examples

dataset.set_transform(preprocess_ops)
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})


In [106]:
train_dataset, test_dataset = dataset['train'], dataset['test']

In [107]:
batch_size = 128
trainLoader = DataLoader(train_dataset, batch_size=batch_size)
testLoader = DataLoader(test_dataset, batch_size)

In [108]:
class CNN(nn.Module):
    def __init__(self, n_filters, hidden_dim, n_layers):
        super().__init__()
        self.conv1 = nn.Conv2d(1, n_filters, 5)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(n_filters, 2*n_filters, 5)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(2)
        self.input_dim = 960
        self.flatten = nn.Flatten()
        self.inp_layer = nn.Linear(self.input_dim, hidden_dim)
        self.classifier = nn.ModuleList([
            nn.Sequential(
                nn.Linear(hidden_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(),
                nn.Dropout(p=0.3)
            ) for i in range(n_layers)
        ])
        self.out_layer = nn.Linear(hidden_dim, 10)

    def forward(self, x):
        x = self.maxpool1(self.relu1(self.conv1(x)))
        x = self.maxpool2(self.relu2(self.conv2(x)))
        x = self.inp_layer(torch.flatten(x, start_dim=1))
        for layer in self.classifier:
            x = layer(x)
        x = self.out_layer(x)
        return x


    
params = {
    'n_filters': 30,
    'hidden_dim': 100,
    'n_layers': 2
}
model = CNN(**params)
model

CNN(
  (conv1): Conv2d(1, 30, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(30, 60, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (inp_layer): Linear(in_features=960, out_features=100, bias=True)
  (classifier): ModuleList(
    (0-1): 2 x Sequential(
      (0): Linear(in_features=100, out_features=100, bias=True)
      (1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Dropout(p=0.3, inplace=False)
    )
  )
  (out_layer): Linear(in_features=100, out_features=10, bias=True)
)

In [109]:
batch_size = 128
lr = 0.001
n_epochs = 10

optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss()

In [112]:
def train(model, trainLoader, testLoader, criterion, optimizer, n_epochs):
    train_losses = []
    train_accs = []
    test_losses = []
    test_accs = []
    for epoch in range(1, n_epochs+1):
        model.train()
        train_loss = 0
        train_acc = 0
        for batch in trainLoader:
            data, labels = batch['image'], batch['label']
            labels = labels.unsqueeze(1)
            out = model(data)
            preds = out.argmax(dim=1)
            loss = criterion(out, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_acc += (preds == labels).sum()

        train_loss /= batch_size
        train_acc /= len(trainLoader.dataset)
        train_accs.append(train_acc)
        train_losses.append(train_loss)

        model.eval()
        test_loss = 0
        test_acc = 0
        for batch in testLoader:
            data, labels = batch['image'], batch['label']
            out = model(data)
            loss = criterion(out, labels)
            preds = out.argmax(dim=1)
            test_loss += loss.item()
            test_acc += (preds == labels).sum()

        test_loss /= len(testLoader.dataset)
        test_acc /= len(testLoader.dataset)
        test_accs.append(test_acc)
        test_losses.append(test_loss)

        print(f'epoch {epoch} | train loss {train_loss} train acc {train_acc} | test loss {test_loss} test acc {test_acc}')
    return train_losses, train_accs, test_losses, test_accs


In [113]:
train_losses, train_accs, test_losses, test_accs = train(model, trainLoader, testLoader, criterion, optimizer, n_epochs)

ValueError: Using a target size (torch.Size([128, 1])) that is different to the input size (torch.Size([128, 10])) is deprecated. Please ensure they have the same size.