In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cs231n/assignments/assignment3/'
FOLDERNAME = 'comp451/Project/visual-transformer'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
import os
sys.path.append('/content/drive/MyDrive/{}'.format(FOLDERNAME))

os.chdir('/content/drive/MyDrive/comp451/Project/visual-transformer')


Mounted at /content/drive


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
from models.vt_resnet import VTResNet
from models.resnet import BasicBlock

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
import torchvision
from torch.utils.data import DataLoader, Subset

In [None]:
transform = torchvision.transforms.Compose([
     torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [None]:
data_train = torchvision.datasets.CIFAR10('/data', train=True, download=True, transform=transform)
data_valid = torchvision.datasets.CIFAR10('/data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch.nn.functional as F

In [None]:
import pickle

def store_params(content, name):
    f = open("logs/" + name + ".pkl","wb")
    pickle.dump(content, f)
    f.close()

def load_params(name):
    fl = open("logs/" + name + ".pkl", "rb")
    loaded = pickle.load(fl)
    return loaded


In [None]:
def train(model, optimizer, data_loader):
   
    total_samples = len(data_loader.dataset)
    model.train()
    model.to(device)

    loss_history = []

    for i, (data, target) in enumerate(data_loader):
        
        data = data.to(device)
        target = target.to(device)

        optimizer.zero_grad()
        output = F.log_softmax(model(data), dim=1)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        
        loss_history.append(loss.item())
        if i % 100 == 0:
            print('[' +  '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(total_samples) +
                  ' (' + '{:3.0f}'.format(100 * i / len(data_loader)) + '%)]  Loss: ' +
                  '{:6.4f}'.format(loss.item()))
    return loss_history

In [None]:
def evaluate(model, data_loader, mode):
    
    model.eval()
    
    total_samples = len(data_loader.dataset)
    correct_samples = 0
    total_loss = 0
    loss_history = []

    with torch.no_grad():
        for data, target in data_loader:
            data = data.to(device)
            target = target.to(device)
            output = F.log_softmax(model(data), dim=1)
            loss = F.nll_loss(output, target, reduction='sum')
            _, pred = torch.max(output, dim=1)
            
            total_loss += loss.item()
            correct_samples += pred.eq(target).sum()

    avg_loss = total_loss / total_samples
    loss_history.append(avg_loss)
    print('\nAverage '+ mode + ' loss: ' + '{:.4f}'.format(avg_loss) +
          '  Accuracy:' + '{:5}'.format(correct_samples) + '/' +
          '{:5}'.format(total_samples) + ' (' +
          '{:4.2f}'.format(100.0 * correct_samples / total_samples) + '%)\n')
    accuracy = 100.0 * correct_samples / total_samples
    return accuracy, loss_history

In [None]:
N = 256

train_loader = DataLoader(data_train, batch_size=N, shuffle=True)
valid_loader = DataLoader(data_valid, batch_size=N, shuffle=False)

L = 8
C = 1
H = 32
W = 32
D = 128

model = VTResNet(
    resnet_block=BasicBlock,
    layers=[2,2,2,2],
    tokens=L,
    token_channels=D,
    input_dim=H,
    layer_planes=[16, 32, 64, 128],
    num_classes=10
)
lr = 0.01

In [None]:
import matplotlib.pyplot as plt

In [None]:
def train_attempt(model, optimizer, train_data, valid_data, epochs):

    EPOCHS = 10
    all_history = []
    valid_history = []
    final_train_acc = 0
    final_valid_acc = 0
    for i in range(epochs):

        history = train(model, optimizer, train_data)
        all_history = all_history + history
        plt.plot(history)
        plt.show()
        print("Epoch " + str(i) + " done.")
        final_valid_acc, valid_hist = evaluate(model, valid_data, 'test')
        final_train_acc, train_hist = evaluate(model, train_data, 'train')
        valid_history = valid_history + valid_hist

    plt.plot(all_history)

    return final_valid_acc, final_train_acc, all_history

In [None]:
import random
def param_search():

    valid_loader = DataLoader(Subset(data_valid, list(range(5000))), batch_size=128, shuffle=False)

    l = 0.001
    r = 0.02
    lr_trials = 13
    Ls = [4, 8, 16]
    Ns = [128, 256]
    Ds = [256, 512, 1024]
    optims = [torch.optim.Adam, torch.optim.SGD]

    best = {}
    best['valid_acc'] = 0

    all_count = 0
    best_count = 0


    for N in Ns:
        for D in Ds:
            for L in Ls:
                for optim in optims:
                    for _ in range(lr_trials):
                        
                        lr = random.uniform(l, r)
                        
                        train_loader = DataLoader(data_train, batch_size=N, shuffle=False)

                        model = VTResNet(
                            resnet_block=BasicBlock,
                            layers=[2,2,2,2],
                            tokens=L,
                            token_channels=D,
                            input_dim=32,
                            layer_planes=[16, 32, 64, 128],
                            num_classes=10
                        )
                        
                       
                        optimizer = optim(model.parameters(), lr=lr)

                        valid_acc, train_acc, all_hist = train_attempt(model, optimizer, train_loader, valid_loader, 10)

                        if valid_acc > best['valid_acc']:
                            best['valid_acc'] = valid_acc
                            best['train_acc'] = train_acc
                            best['lr'] = lr
                            best['L'] = L
                            best['N'] = N
                            best['D'] = D
                            best['optim'] = optim

                            store_params(best, "best_params" + str(best_count))
                            best_count += 1


                        store_params(best, "params" + str(all_count))
                        all_count += 1

                        print('Best So Far:')
                        print(best)

    return best

In [None]:
best = param_search()
print(best)
store_params(best, "best_of_all_test")

Output hidden; open in https://colab.research.google.com to view.

In [None]:
"""
{
    'valid_acc': tensor(65.9000, device='cuda:0'), 
    'train_acc': tensor(68.5020, device='cuda:0'), 
    'lr': 0.004095672975699754, 
    'L': 8, 
    'N': 128, 
    'D': 512, 
    'optim': <class 'torch.optim.adam.Adam'>
}
"""