In [1]:
import PIL
import time
import torch
import torchvision
import torch.nn.functional as F
from einops import rearrange
from torch import nn
import torch.nn.init as init
from ViTResNet18 import *
from common import *
from TinyImageNet import TinyImageNet

In [2]:
PATH_TO_IMAGE_NET = "./data/tiny-imagenet-200"
BATCH_SIZE_TRAIN = 100
BATCH_SIZE_VAL = 100
device = torch.device("cuda")

transform_train = torchvision.transforms.Compose(
     [torchvision.transforms.RandomHorizontalFlip(),
     torchvision.transforms.RandomRotation(10, resample=PIL.Image.BILINEAR),
     torchvision.transforms.RandomAffine(8, translate=(.15,.15)),
     torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])


transform_val = torchvision.transforms.Compose([
     torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

train_dataset = TinyImageNet(PATH_TO_IMAGE_NET, split='train', transform=transform_train, in_memory=False)
val_dataset = TinyImageNet(PATH_TO_IMAGE_NET, split='val', transform=transform_val, in_memory=False)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE_TRAIN, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE_VAL, shuffle=False)



In [3]:
for data, target in train_loader:
    print(data.shape)
    break

torch.Size([100, 3, 224, 224])


In [4]:
#model = ViTResNet(BasicBlock, [3, 3, 3], BATCH_SIZE_TRAIN, num_classes=200, num_tokens=16).to(device)



In [5]:
model = model = ViTResNet18(BasicBlock, [2, 2, 2], BATCH_SIZE_TRAIN, num_classes=200, num_tokens=16).to(device)
EPOCHS = 90
check_on_dataset(model, train_loader, val_loader, EPOCHS, "TinyImageNet", "ViTResNet18")

Execution time: 245.46 seconds
Epoch: 43
Execution time: 243.19 seconds
Epoch: 44
Execution time: 242.06 seconds
Epoch: 45
Execution time: 245.90 seconds
Epoch: 46
Execution time: 249.03 seconds
Epoch: 47
Execution time: 252.87 seconds
Epoch: 48
Execution time: 248.55 seconds
Epoch: 49
Execution time: 247.73 seconds
Epoch: 50
Execution time: 248.20 seconds

Average train loss: 0.3632

Train accuracy: 89.6770

Average test loss: 2.2119

Test accuracy: 56.9400
Saved model's checkpoint
Epoch: 51
Execution time: 249.95 seconds
Epoch: 52
Execution time: 251.40 seconds
Epoch: 53
Execution time: 251.52 seconds
Epoch: 54
Execution time: 251.38 seconds
Epoch: 55
Execution time: 248.39 seconds
Epoch: 56
Execution time: 251.54 seconds
Epoch: 57
Execution time: 252.13 seconds
Epoch: 58
Execution time: 250.89 seconds
Epoch: 59
Execution time: 250.21 seconds
Epoch: 60
Execution time: 252.74 seconds

Average train loss: 0.2722

Train accuracy: 92.1760

Average test loss: 2.4049

Test accuracy: 55.620

In [12]:
from thop import profile
data, target = next(iter(val_loader))
flops, params = profile(model, inputs=(data.to(device), ))
print(data.shape)
print(f'flops = {flops}')
print(f'params = {params}')




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[91m[WARN] Cannot find rule for <class 'ViTResNet18.BasicBlock'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'Tokenizers.FilterBasedTokenizer'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'Projector.Projector'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.normalization.LayerNorm'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_l

In [3]:
model = ViTResNet18(BasicBlock, [2, 2, 2], BATCH_SIZE_TRAIN, num_classes=200,
                    num_tokens=16, tokenizers_type=['pooling', 'pooling']).to(device)
EPOCHS = 90
check_on_dataset(model, train_loader, val_loader, EPOCHS, "TinyImageNet", "ViTResNet18(pooling)")

Execution time: 248.05 seconds
Epoch: 43
Execution time: 249.42 seconds
Epoch: 44
Execution time: 249.34 seconds
Epoch: 45
Execution time: 248.25 seconds
Epoch: 46
Execution time: 248.06 seconds
Epoch: 47
Execution time: 249.33 seconds
Epoch: 48
Execution time: 248.41 seconds
Epoch: 49
Execution time: 248.71 seconds
Epoch: 50
Execution time: 246.55 seconds

Average train loss: 0.4695

Train accuracy: 87.0480

Average test loss: 2.3547

Test accuracy: 56.1900
Saved model's checkpoint
Epoch: 51
Execution time: 250.52 seconds
Epoch: 52
Execution time: 249.70 seconds
Epoch: 53
Execution time: 250.68 seconds
Epoch: 54
Execution time: 250.90 seconds
Epoch: 55
Execution time: 251.01 seconds
Epoch: 56
Execution time: 250.78 seconds
Epoch: 57
Execution time: 251.63 seconds
Epoch: 58
Execution time: 252.52 seconds
Epoch: 59
Execution time: 250.60 seconds
Epoch: 60
Execution time: 250.06 seconds

Average train loss: 0.3844

Train accuracy: 89.2330

Average test loss: 2.5215

Test accuracy: 55.380

In [2]:
PATH_TO_IMAGE_NET = "./data/tiny-imagenet-201"
BATCH_SIZE_TRAIN = 100
BATCH_SIZE_VAL = 100
device = torch.device("cuda")

transform_train = torchvision.transforms.Compose(
     [torchvision.transforms.RandomHorizontalFlip(),
     torchvision.transforms.RandomRotation(10, resample=PIL.Image.BILINEAR),
     torchvision.transforms.RandomAffine(8, translate=(.15,.15)),
     torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])


transform_val = torchvision.transforms.Compose([
     torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

train_dataset = TinyImageNet(PATH_TO_IMAGE_NET, split='train', transform=transform_train, in_memory=False)
val_dataset = TinyImageNet(PATH_TO_IMAGE_NET, split='val', transform=transform_val, in_memory=False)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE_TRAIN, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE_VAL, shuffle=False)



In [3]:
for data, target in train_loader:
    print(data.shape)
    break


torch.Size([100, 3, 64, 64])


In [4]:
model = model = ViTResNet18(BasicBlock, [2, 2, 2], BATCH_SIZE_TRAIN, num_classes=200, num_tokens=16).to(device)
EPOCHS = 90
check_on_dataset(model, train_loader, val_loader, EPOCHS, "TinyImageNet(64x64)", "ViTResNet18")

0
Execution time: 63.81 seconds
Epoch: 43
Execution time: 63.72 seconds
Epoch: 44
Execution time: 63.88 seconds
Epoch: 45
Execution time: 63.98 seconds
Epoch: 46
Execution time: 63.98 seconds
Epoch: 47
Execution time: 64.00 seconds
Epoch: 48
Execution time: 63.97 seconds
Epoch: 49
Execution time: 63.96 seconds
Epoch: 50
Execution time: 63.73 seconds

Average train loss: 0.9978

Train accuracy: 72.5910

Average test loss: 2.7750

Test accuracy: 44.2000
Saved model's checkpoint
Epoch: 51
Execution time: 63.68 seconds
Epoch: 52
Execution time: 63.63 seconds
Epoch: 53
Execution time: 63.64 seconds
Epoch: 54
Execution time: 63.64 seconds
Epoch: 55
Execution time: 63.60 seconds
Epoch: 56
Execution time: 63.61 seconds
Epoch: 57
Execution time: 63.59 seconds
Epoch: 58
Execution time: 63.61 seconds
Epoch: 59
Execution time: 63.63 seconds
Epoch: 60
Execution time: 63.71 seconds

Average train loss: 0.8699

Train accuracy: 75.5570

Average test loss: 2.9771

Test accuracy: 43.4700
Saved model's c

In [3]:
PATH_TO_IMAGE_NET = "./data/tiny-imagenet-200"
BATCH_SIZE_TRAIN = 100
BATCH_SIZE_VAL = 100
device = torch.device("cuda")

transform_train = torchvision.transforms.Compose(
     [torchvision.transforms.RandomHorizontalFlip(),
     torchvision.transforms.RandomRotation(10, resample=PIL.Image.BILINEAR),
     torchvision.transforms.RandomAffine(8, translate=(.15,.15)),
     torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])


transform_val = torchvision.transforms.Compose([
     torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

train_dataset = TinyImageNet(PATH_TO_IMAGE_NET, split='train', transform=transform_train, in_memory=False)
val_dataset = TinyImageNet(PATH_TO_IMAGE_NET, split='val', transform=transform_val, in_memory=False)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE_TRAIN, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE_VAL, shuffle=False)

In [4]:
model = ViTResNet18(BasicBlock, [2, 2, 2], BATCH_SIZE_TRAIN, num_classes=200,
                    num_tokens=16, tokenizers_type=['filter', 'recurrent']).to(device)
EPOCHS = 90
check_on_dataset(model, train_loader, val_loader, EPOCHS, "TinyImageNet", "ViTResNet18(recurrent)")

Loaded model's checkpoint
Epoch: 31
Execution time: 249.51 seconds
Epoch: 32
Execution time: 248.97 seconds
Epoch: 33


KeyboardInterrupt: 