In [1]:
import torch
import torch.nn as nn  
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import classification_report
import ResNet as resnet
import DenseNet as densenet
import EfficientNet as efficientnet
import MobileNetV2 as mobilenet
import ViT as vit
import solver.solver as solver
import solver.solver_v2 as solver_v2
import solver.solver_v3 as solver_v3

In [2]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

n_epochs = 100
batch_size = 64
learning_rate = 1e-4
diff_drop = "v3"

model = vit.ViT(num_classes=10, diff_drop=diff_drop).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [3]:
train_path = "/media/data1/hyunjun/cifar-10/train/"
test_path = "/media/data1/hyunjun/cifar-10/test/"

In [4]:
trans = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262)),
])

trainset = torchvision.datasets.ImageFolder(root=train_path, transform=trans)
testset = torchvision.datasets.ImageFolder(root=test_path, transform=trans)

train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, drop_last=False, )
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=True, drop_last=False, )

classes = trainset.classes
print(classes)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [5]:
def train_loop(dataloader, model, criterion, optimizer, epoch):
    size = len(dataloader.dataset)
    total_correct = 0
    total_loss = 0
    
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        model.train()
        correct = 0
        
        pred = None
        p = 0.0
        if diff_drop == "v3":
            pred, p = model(X.to(device), epoch)
        else:
            pred, p = model(X.to(device))
        
        loss = criterion(pred, y.to(device))
        correct += (torch.argmax(pred, dim=1) == y.to(device)).type(torch.float).sum().item()
        total_correct += correct
        
        model.train()
        total_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        
        if diff_drop == "v1":
            solver.PseudoPruning(model.classification_head.fc, p)
        elif diff_drop == "v2":
            solver_v2.PseudoPruning(model.classification_head.fc, p)
        elif diff_drop == "v3":
            solver_v3.PseudoPruning(model.classification_head.fc, p)
        
        optimizer.step()
        
        if (batch % 50 == 0):
            print(f"Batch {batch:>5d}: Loss per batch is {loss.item():>7.6f} and Accuracy is {correct / batch_size:>7.6f}")
    print(f"For Iteration: Loss is {(total_loss / (size // batch_size)):>7.6f} and Accuracy is {total_correct / size:>7.6f}")

In [6]:
def test_loop(dataloader, model, criterion):
    test_loss = 0
    correct = 0

    model.eval()
    
    y_true = []
    y_pred = []
    
    with torch.no_grad():
        for X, y in dataloader:
            pred, _ = model(X.to(device))
            test_loss += criterion(pred, y.to(device)).item()
            
            pred = torch.argmax(pred, dim=1)
            correct += (pred == y.to(device)).type(torch.float).sum().item()
            
            pred = pred.cpu().numpy()
            y = y.cpu().numpy()
            y_true.extend(y)
            y_pred.extend(pred)
    
    print(classification_report(y_true=y_true, y_pred=y_pred, target_names=classes))
    
    return correct / len(dataloader.dataset)

In [7]:
max = 0.0
for epoch in range(n_epochs):
    print(f"Training: Epoch {epoch + 1} ------------------")
    train_loop(train_loader, model, criterion, optimizer, epoch + 1)
    print(f"----Test: Epoch {epoch + 1} ------------------")
    temp = test_loop(test_loader, model, criterion)
    
    if temp > max:
        max = temp
        torch.save(model, './cifar10_vit_v3_best.pt')

Training: Epoch 1 ------------------
Batch     0: Loss per batch is 2.716955 and Accuracy is 0.125000
Batch    50: Loss per batch is 2.006515 and Accuracy is 0.187500
Batch   100: Loss per batch is 1.975181 and Accuracy is 0.218750
Batch   150: Loss per batch is 2.158013 and Accuracy is 0.187500
Batch   200: Loss per batch is 2.061702 and Accuracy is 0.203125
Batch   250: Loss per batch is 2.002179 and Accuracy is 0.234375
Batch   300: Loss per batch is 1.986631 and Accuracy is 0.203125
Batch   350: Loss per batch is 1.815287 and Accuracy is 0.265625
Batch   400: Loss per batch is 1.635621 and Accuracy is 0.375000
Batch   450: Loss per batch is 2.048755 and Accuracy is 0.312500
Batch   500: Loss per batch is 1.937621 and Accuracy is 0.218750
Batch   550: Loss per batch is 1.513128 and Accuracy is 0.390625
Batch   600: Loss per batch is 1.707201 and Accuracy is 0.453125
Batch   650: Loss per batch is 1.522680 and Accuracy is 0.453125
Batch   700: Loss per batch is 1.618624 and Accuracy 

In [None]:
torch.save(model, './cifar10_vit_v3_epoch100.pt')