In [19]:
import torchvision
import torch
import torch.nn as nn

from torchvision import transforms

from tqdm import tqdm

from mlp_mixer.model.mlp_mixer import MLPMixer

In [20]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
dataset1 = torchvision.datasets.CIFAR10('./data', train=True, download=True,
                                        transform=transform)
dataset2 = torchvision.datasets.CIFAR10('./data', train=False, download=True,
                                        transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [21]:
model = MLPMixer(image_size=224, in_channels=3, patch_size=16)

In [22]:
train_params = {
    'epochs': 16,
    'lr': 0.1,
    'eval_portion': 0.2,
    'batch_size': 16
}

device = torch.device('cuda')

In [23]:
from torch.utils.data import DataLoader, random_split

EVAL_LENGTH = int(len(dataset1) * train_params['eval_portion'])

train_set, eval_set = random_split(dataset1, [len(dataset1) - EVAL_LENGTH, EVAL_LENGTH])
train_loader = DataLoader(train_set, batch_size=train_params['batch_size'],
                          shuffle=True)

eval_loader = DataLoader(eval_set, batch_size=train_params['batch_size'],
                         shuffle=True)

test_loader = DataLoader(dataset2, batch_size=train_params['batch_size'],
                         shuffle=True)


In [24]:
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=train_params['lr'], momentum=0.9, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.9)

cross_entropy = nn.CrossEntropyLoss()

best_accuracy = 0.0

for e in range(train_params['epochs']):
    train_loss = 0.0
    model.train()
    for images, labels in tqdm(iter(train_loader), desc='Training...'):
        optimizer.zero_grad()

        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = cross_entropy(outputs, labels)
        train_loss += loss.cpu().detach().numpy()

        loss.backward()
        optimizer.step()

    print(f"Training average loss: {train_loss / len(train_loader)}")

    test_acc_count = 0.0
    eval_loss = 0.0

    model.eval()
    with torch.no_grad():
        for images, labels in tqdm(iter(eval_loader), desc='Eval...'):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)

            loss = cross_entropy(outputs, labels)
            eval_loss += loss.cpu().detach().numpy()

            pred = torch.argmax(outputs, 1)
            test_acc_count += float(torch.sum(pred == labels))

    test_accuracy = float(test_acc_count) / float(len(eval_set))
    eval_loss /= len(eval_loader)

    print(f'Epoch: {e + 1}, eval accuracy {test_accuracy}, eval loss {eval_loss}')
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(), 'checkpoints/model.pth')
        best_accuracy = test_accuracy

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.