In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!pip install --upgrade torch torchvision

In [None]:
!cp -r /content/drive/MyDrive/sample_code sample_code/

In [None]:
from zipfile import ZipFile
from dataset import load_train_data, load_val_data
import os
os.chdir('/content/sample_code/vit')
with ZipFile('tiny-imagenet-200.zip', 'r') as zipObj:
   # Extract all the contents of zip file in current directory
   zipObj.extractall()

os.chdir('/content/sample_code/vit/tiny-imagenet-200')
!mv * ../

In [None]:
import os
# Takes approximately 8 minutes. From images, creates pickle dataset for fast reading
os.chdir('/content/sample_code/vit')
!python /content/sample_code/vit/fileio.py

torch.Size([100000, 3, 64, 64]) torch.Size([100000])
torch.Size([10000, 3, 64, 64]) torch.Size([10000])


In [None]:
from math import e
import numpy as np
import sys
import torch
import torch.nn as nn
from tqdm import tqdm_notebook, tqdm, trange
from importlib import reload
from torch.optim import Adam
from torchvision.datasets.mnist import MNIST
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision.datasets.mnist import MNIST
sys.path.insert(0, '/content/sample_code/vit/models/')
from vit_model import ViT

np.random.seed(0)
torch.manual_seed(0)

def main(config):

    # Loading data
    transform = ToTensor()

    img_size = config['img_size']
    randaug_magnitude = 0
    batch_size = config['batch_size']
    input_image_channels = config['input_image_channels']
    if config['dataset'] == 'MNIST':
      train_set = MNIST(root='./../datasets', train=True, download=True, transform=transform)
      test_set = MNIST(root='./../datasets', train=False, download=True, transform=transform)

      train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size)
      val_loader = DataLoader(test_set, shuffle=False, batch_size=batch_size)
    elif config['dataset'] == 'TINY_IMAGENET':
      train_loader = load_train_data(img_size, randaug_magnitude, batch_size)
      val_loader = load_val_data(img_size, batch_size)
    else:
      raise Exception('Wrong Dataset name!')


    # Defining model and training options
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device: ", device, f"({torch.cuda.get_device_name(device)})" if torch.cuda.is_available() else "")
    model = ViT((input_image_channels, img_size, img_size),
                                                          n_patches=config['n_patches'],
                                                          n_blocks=config['n_blocks'],
                                                          hidden_d=config['hidden_d'],
                                                          n_heads=config['n_heads'],
                                                          out_d=config['out_d'],
                                                          mlp_ratio=config['mlp_ratio']).to(device)
    print(f'# of Parameters in Model: {sum(p.numel() for p in model.parameters())}')
    N_EPOCHS = config['N_EPOCHS']
    LR = config['LR']
    weight_decay = config['weight_decay']

    # Training loop
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=weight_decay)
    criterion = torch.nn.CrossEntropyLoss()
    for epoch in trange(N_EPOCHS, desc="Training"):
        train_loss = 0.0
        for batch_idx, batch in enumerate(tqdm(train_loader, desc=f"Epoch {epoch + 1} in training", position=0, leave=True)):
            x, y = batch
            x, y = x.to(device), y.to(device)
            y_hat = model(x)
            loss = criterion(y_hat, y)

            batch_loss = loss.detach().cpu().item() / len(train_loader)
            train_loss += batch_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (batch_idx + 1) % config['print_every_nth_batch'] == 0:  # print every 50th batch
                print(f"{batch_idx + 1}th Batch loss: {batch_loss:.4f}")

        print(f"Epoch {epoch + 1}/{N_EPOCHS} loss: {train_loss:.4f}")

    # Test loop
    with torch.no_grad():
        correct, total = 0, 0
        test_loss = 0.0
        for batch in tqdm(val_loader, desc="Testing"):
            x, y = batch
            x, y = x.to(device), y.to(device)
            y_hat = model(x)
            loss = criterion(y_hat, y)
            test_loss += loss.detach().cpu().item() / len(val_loader)

            correct += torch.sum(torch.argmax(y_hat, dim=1) == y).detach().cpu().item()
            total += len(x)
        print(f"Test loss: {test_loss:.2f}")
        print(f"Test accuracy: {correct / total * 100:.2f}%")

In [None]:
"""
config = {
    'img_size' : 224,
    'batch_size': 16,
    'input_image_channels': 3,
    'n_patches': 7,
    'n_blocks': 12,
    'hidden_d': 768,
    'n_heads': 12,
    'out_d': 200,
    'N_EPOCHS' : 5,
    'LR' : 0.001,
    'weight_decay': 0.05,
    'dataset': 'TINY_IMAGENET',
    'print_every_nth_batch': 50,
    'mlp_ratio': 4
}
"""
config = {
    'img_size' : 28,
    'batch_size': 128,
    'input_image_channels': 1,
    'n_patches': 7,
    'n_blocks': 2,
    'hidden_d': 8,
    'n_heads': 2,
    'out_d': 10,
    'N_EPOCHS' : 3,
    'LR' : 0.005,
    'weight_decay': 0.05,
    'dataset': 'MNIST',
    'print_every_nth_batch': 50,
    'mlp_ratio': 4
}
main(config)

Using device:  cuda (Tesla T4)


Epoch 1 in training:  11%|█         | 50/469 [00:36<05:35,  1.25it/s]

Batch 50 loss: 0.0049


Epoch 1 in training:  21%|██▏       | 100/469 [01:11<04:06,  1.50it/s]

Batch 100 loss: 0.0047


Epoch 1 in training:  32%|███▏      | 150/469 [01:47<03:40,  1.45it/s]

Batch 150 loss: 0.0047


Epoch 1 in training:  43%|████▎     | 200/469 [02:23<03:46,  1.19it/s]

Batch 200 loss: 0.0046


Epoch 1 in training:  53%|█████▎    | 250/469 [02:59<02:24,  1.52it/s]

Batch 250 loss: 0.0044


Epoch 1 in training:  64%|██████▍   | 300/469 [03:35<01:56,  1.46it/s]

Batch 300 loss: 0.0045


Epoch 1 in training:  75%|███████▍  | 350/469 [04:11<01:38,  1.20it/s]

Batch 350 loss: 0.0044


Epoch 1 in training:  85%|████████▌ | 400/469 [04:48<00:50,  1.38it/s]

Batch 400 loss: 0.0044


Epoch 1 in training:  96%|█████████▌| 450/469 [05:24<00:13,  1.43it/s]

Batch 450 loss: 0.0045


Epoch 1 in training: 100%|██████████| 469/469 [05:39<00:00,  1.38it/s]
Training:  33%|███▎      | 1/3 [05:39<11:18, 339.07s/it]

Epoch 1/3 loss: 2.1389


Epoch 2 in training:  11%|█         | 50/469 [00:35<05:56,  1.18it/s]

Batch 50 loss: 0.0043


Epoch 2 in training:  21%|██▏       | 100/469 [01:11<04:09,  1.48it/s]

Batch 100 loss: 0.0042


Epoch 2 in training:  32%|███▏      | 150/469 [01:47<03:39,  1.45it/s]

Batch 150 loss: 0.0042


Epoch 2 in training:  43%|████▎     | 200/469 [02:22<03:27,  1.29it/s]

Batch 200 loss: 0.0042


Epoch 2 in training:  53%|█████▎    | 250/469 [02:57<02:25,  1.51it/s]

Batch 250 loss: 0.0039


Epoch 2 in training:  64%|██████▍   | 300/469 [03:34<01:59,  1.41it/s]

Batch 300 loss: 0.0039


Epoch 2 in training:  75%|███████▍  | 350/469 [04:12<01:38,  1.21it/s]

Batch 350 loss: 0.0037


Epoch 2 in training:  85%|████████▌ | 400/469 [04:47<00:45,  1.51it/s]

Batch 400 loss: 0.0040


Epoch 2 in training:  96%|█████████▌| 450/469 [05:23<00:13,  1.40it/s]

Batch 450 loss: 0.0038


Epoch 2 in training: 100%|██████████| 469/469 [05:37<00:00,  1.39it/s]
Training:  67%|██████▋   | 2/3 [11:16<05:37, 337.96s/it]

Epoch 2/3 loss: 1.8964


Epoch 3 in training:  11%|█         | 50/469 [00:34<04:36,  1.52it/s]

Batch 50 loss: 0.0038


Epoch 3 in training:  21%|██▏       | 100/469 [01:11<04:02,  1.52it/s]

Batch 100 loss: 0.0040


Epoch 3 in training:  32%|███▏      | 150/469 [01:47<04:00,  1.33it/s]

Batch 150 loss: 0.0038


Epoch 3 in training:  43%|████▎     | 200/469 [02:22<02:53,  1.55it/s]

Batch 200 loss: 0.0038


Epoch 3 in training:  53%|█████▎    | 250/469 [02:58<02:26,  1.50it/s]

Batch 250 loss: 0.0038


Epoch 3 in training:  64%|██████▍   | 300/469 [03:34<02:22,  1.19it/s]

Batch 300 loss: 0.0037


Epoch 3 in training:  75%|███████▍  | 350/469 [04:10<01:20,  1.48it/s]

Batch 350 loss: 0.0037


Epoch 3 in training:  85%|████████▌ | 400/469 [04:46<00:53,  1.30it/s]

Batch 400 loss: 0.0037


Epoch 3 in training:  96%|█████████▌| 450/469 [05:22<00:12,  1.53it/s]

Batch 450 loss: 0.0037


Epoch 3 in training: 100%|██████████| 469/469 [05:35<00:00,  1.40it/s]
Training: 100%|██████████| 3/3 [16:51<00:00, 337.23s/it]


Epoch 3/3 loss: 1.7809


Testing: 100%|██████████| 79/79 [00:34<00:00,  2.31it/s]

Test loss: 1.76
Test accuracy: 70.10%



