In [1]:
import os
import shutil
import utils
import numpy as np
import random

from tqdm import tqdm
import torch
import torch.nn as nn
import torchvision.transforms as T
from torchvision import datasets, models
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [2]:
utils.set_random_seed()

In [3]:
### Setting up "constants"
NUM_LABELS = 196
BATCH_SIZE = 64
LR = 2e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_EPOCHS = 25

In [4]:
simple_transforms = T.Compose([
    T.ToTensor(),
    T.Resize((224,244)),
    T.RandomResizedCrop(224),
    T.RandomHorizontalFlip(p=0.25),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    # T.GaussianBlur(kernel_size=(5,5)),
    # T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2)
])

resize_test = T.Compose([
    T.ToTensor(),
    T.Resize((224,224)),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

In [5]:
# set number of labels manually
NUM_LABELS = 196
BATCH_SIZE = 64
LR = 2e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_EPOCHS = 15
print(DEVICE)

cuda


In [6]:
%%capture
train_set = datasets.StanfordCars(
    "./stanfordcars/", 
    split="train", 
    transform=simple_transforms, 
    download=True
)
test_set = datasets.StanfordCars(
    "./stanfordcars/", 
    split="test", 
    transform=resize_test,
    download=True
)


train_loader = torch.utils.data.DataLoader(
    train_set, 
    batch_size=BATCH_SIZE, 
    shuffle=True, 
    pin_memory=True
)

test_loader = torch.utils.data.DataLoader(
    test_set, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    pin_memory=True
)

In [7]:
# Tensorboard config and writter init
PROJECT = "Assignment3"
TBOARD_LOGS = os.path.join(os.getcwd(), "tboard_logs", PROJECT)
if not os.path.exists(TBOARD_LOGS):
    os.makedirs(TBOARD_LOGS)

shutil.rmtree(TBOARD_LOGS)
writer = SummaryWriter(TBOARD_LOGS)

In [8]:
# cutmix implementation, implemented from pseudo code given by the paper

def CutMix(input, target):
    # get batch size to shuffle index of minibatch, and set a shuffled list
    B = list(range(input.shape[0]))
    shuffled_batch_idx = torch.tensor(random.sample(B, len(B)))

    # alg according to paper
    W = input[0].shape[1]
    H = input[0].shape[2]
    Lambda = torch.rand(1)
    r_x = torch.rand(1) * H
    r_y = torch.rand(1) * W
    r_w = torch.sqrt(1 - Lambda) * H
    r_h = torch.sqrt(1 - Lambda) * W

    x1 = int(torch.clamp((r_x - r_w / 2), min=0, max=W))
    x2 = int(torch.clamp((r_x + r_w / 2), min=0, max=W))
    y1 = int(torch.clamp((r_y - r_h / 2), min=0, max=H))
    y2 = int(torch.clamp((r_y + r_h / 2), min=0, max=H))
    
    # target = Lambda * target + (1 - Lambda) * target[shuffled_batch_idx]
    Lambda = 1 - ((x2-x1) * (y2 - y1) / (W*H))
    input[:, :, y1:y2, x1:x2] = input[shuffled_batch_idx, :, y1:y2, x1:x2]

    return input, target, shuffled_batch_idx, Lambda

In [20]:
def train_epoch(model, train_loader, optimizer, criterion, epoch, device=DEVICE):
    """ Training a model for one epoch """
    
    loss_list = []
    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for i, (images, labels) in progress_bar:
        images = images.to(device)
        labels = labels.to(device)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
         
        prob_cutmix = torch.rand(1)
        if prob_cutmix > 0.75:
            # used for cutmix agumentation
            images, labels, shuffled_idx, Lambda = CutMix(images, labels)
            
            # Forward pass to get output/logits
            outputs = model(images)
            
            # Calculate Loss: softmax --> cross entropy loss
            # split loss values according to cutmix paper
            loss = criterion(outputs, labels) * Lambda  + criterion(outputs, labels[shuffled_idx]) * (1 - Lambda)
        else: 

            #compute output as usual
            outputs = model(images)
            loss = criterion(outputs, labels)

        loss_list.append(loss.item())
         
        # Getting gradients w.r.t. parameters
        loss.backward()
         
        # Updating parameters
        optimizer.step()

        progress_bar.set_description(f"Epoch {epoch+1} Iter {i+1}: loss {loss.item():.5f}. ")
        
    mean_loss = np.mean(loss_list)
    return mean_loss, loss_list


@torch.no_grad()
def eval_model(model, eval_loader, criterion, device=DEVICE):
    """ Evaluating the model for either validation or test """
    correct = 0
    total = 0
    loss_list = []
    
    for images, labels in eval_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass only to get logits/output
        outputs = model(images)
                 
        loss = criterion(outputs, labels)
        loss_list.append(loss.item())
            
        # Get predictions from the maximum value
        preds = torch.argmax(outputs, dim=1)
        correct += len( torch.where(preds==labels)[0] )
        total += len(labels)
                 
    # Total correct predictions and loss
    accuracy = correct / total * 100
    loss = np.mean(loss_list)
    
    return accuracy, loss


def train_model(model, optimizer, scheduler, criterion, train_loader, valid_loader, num_epochs, device, tboard, start_epoch=0):
    """ Training a model for a given number of epochs"""
    
    train_loss = []
    val_loss =  []
    loss_iters = []
    valid_acc = []
    
    for epoch in range(num_epochs):
           
        # validation epoch
        model.eval()  # important for dropout and batch norms
        accuracy, loss = eval_model(
                    model=model, eval_loader=valid_loader,
                    criterion=criterion, device=device
            )
        valid_acc.append(accuracy)
        val_loss.append(loss)
        tboard.add_scalar(f'Accuracy/Valid', accuracy, global_step=epoch+start_epoch)
        tboard.add_scalar(f'Loss/Valid', loss, global_step=epoch+start_epoch)
        
        # training epoch
        model.train()  # important for dropout and batch norms
        mean_loss, cur_loss_iters = train_epoch(
                model=model, train_loader=train_loader, optimizer=optimizer,
                criterion=criterion, epoch=epoch, device=device
            )
        scheduler.step()
        train_loss.append(mean_loss)
        tboard.add_scalar(f'Loss/Train', mean_loss, global_step=epoch+start_epoch)

        loss_iters = loss_iters + cur_loss_iters
        
        # if(epoch % 5 == 0 or epoch==num_epochs-1):
        # print(f"Epoch {epoch+1}/{num_epochs}")
        # print(f"    Train loss: {round(mean_loss, 5)}")
        # print(f"    Valid loss: {round(loss, 5)}")
        # print(f"    Accuracy: {accuracy}%")
        # print("\n")
    
    print(f"Training completed")
    return train_loss, val_loss, loss_iters, valid_acc

In [21]:
%%capture
# Capture, cause pretrained param has a deprecated warning, but still works
# Loading models
vgg_model = models.vgg16_bn(pretrained=True)

In [22]:
vgg_model.classifier = nn.Linear(7*7*512, NUM_LABELS)

vgg_model.to(DEVICE);

In [23]:
# classification loss function
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_vgg = torch.optim.Adam(vgg_model.parameters(), lr=3e-4)

# Decay LR by a factor of 0.1 every 7 epochs
scheduler_vgg = torch.optim.lr_scheduler.StepLR(optimizer_vgg, step_size=7, gamma=0.1)

In [24]:
TBOARD_LOGS = os.path.join(os.getcwd(), "tboard_logs", "VGG16_tuned")
if not os.path.exists(TBOARD_LOGS):
    os.makedirs(TBOARD_LOGS)

shutil.rmtree(TBOARD_LOGS)
writer_vgg = SummaryWriter(TBOARD_LOGS)

In [25]:
#model, optimizer, scheduler, criterion, train_loader, 
#    valid_loader, num_epochs, device, all_labels, tboard=None, start_epoch=0
#    ):
train_loss, val_loss, loss_iters, valid_acc = train_model(
        model=vgg_model, optimizer=optimizer_vgg, scheduler=scheduler_vgg, criterion=criterion,
        train_loader=train_loader, valid_loader=test_loader, num_epochs=20,
        device=DEVICE, tboard=writer_vgg
)

Epoch 1 Iter 128: loss 5.33086. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]
Epoch 2 Iter 81: loss 4.75970. :  63%|██████▎   | 81/128 [01:03<00:42,  1.11it/s]

In [15]:
#model, optimizer, scheduler, criterion, train_loader, 
#    valid_loader, num_epochs, device, all_labels, tboard=None, start_epoch=0
#    ):
train_loss, val_loss, loss_iters, valid_acc = train_model(
        model=vgg_model, optimizer=optimizer_vgg, scheduler=scheduler_vgg, criterion=criterion,
        train_loader=train_loader, valid_loader=test_loader, num_epochs=20,
        device=DEVICE, tboard=writer_vgg
)

Epoch 1 Iter 128: loss 4.50115. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 1/20
    Train loss: 5.11658
    Valid loss: 5.29631
    Accuracy: 0.7088670563362767%




Epoch 2 Iter 128: loss 4.09720. : 100%|██████████| 128/128 [01:43<00:00,  1.23it/s]


Epoch 2/20
    Train loss: 4.25511
    Valid loss: 4.8006
    Accuracy: 4.340256187041413%




Epoch 3 Iter 128: loss 2.47387. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 3/20
    Train loss: 3.36023
    Valid loss: 3.25228
    Accuracy: 21.10434025618704%




Epoch 4 Iter 128: loss 4.91178. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 4/20
    Train loss: 2.83432
    Valid loss: 2.52851
    Accuracy: 35.70451436388509%




Epoch 5 Iter 128: loss 1.55159. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 5/20
    Train loss: 2.52717
    Valid loss: 1.93573
    Accuracy: 49.27247854744435%




Epoch 6 Iter 128: loss 3.30698. : 100%|██████████| 128/128 [01:43<00:00,  1.23it/s]


Epoch 6/20
    Train loss: 2.07982
    Valid loss: 1.98197
    Accuracy: 47.8796169630643%




Epoch 7 Iter 128: loss 3.67052. : 100%|██████████| 128/128 [01:44<00:00,  1.22it/s]


Epoch 7/20
    Train loss: 2.14418
    Valid loss: 1.42047
    Accuracy: 61.882850391742316%




Epoch 8 Iter 128: loss 3.18588. : 100%|██████████| 128/128 [01:43<00:00,  1.23it/s]


Epoch 8/20
    Train loss: 1.79835
    Valid loss: 1.3135
    Accuracy: 63.611491108071135%




Epoch 9 Iter 128: loss 4.01721. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 9/20
    Train loss: 1.59674
    Valid loss: 0.95264
    Accuracy: 74.2693694814078%




Epoch 10 Iter 128: loss 0.79617. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 10/20
    Train loss: 1.49222
    Valid loss: 0.90122
    Accuracy: 75.46325083944782%




Epoch 11 Iter 128: loss 0.68025. : 100%|██████████| 128/128 [01:43<00:00,  1.23it/s]


Epoch 11/20
    Train loss: 1.48107
    Valid loss: 0.89023
    Accuracy: 75.67466732993408%




Epoch 12 Iter 128: loss 1.65169. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 12/20
    Train loss: 1.59193
    Valid loss: 0.8637
    Accuracy: 75.93582887700535%




Epoch 13 Iter 128: loss 1.20199. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 13/20
    Train loss: 1.31979
    Valid loss: 0.89965
    Accuracy: 76.34622559383162%




Epoch 14 Iter 128: loss 2.69872. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 14/20
    Train loss: 1.31809
    Valid loss: 0.83812
    Accuracy: 76.83123989553539%




Epoch 15 Iter 128: loss 0.65469. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 15/20
    Train loss: 1.42659
    Valid loss: 0.85356
    Accuracy: 77.29138166894664%




Epoch 16 Iter 128: loss 0.35069. : 100%|██████████| 128/128 [01:44<00:00,  1.22it/s]


Epoch 16/20
    Train loss: 1.30295
    Valid loss: 0.81413
    Accuracy: 77.25407287650789%




Epoch 17 Iter 128: loss 2.38656. : 100%|██████████| 128/128 [01:44<00:00,  1.22it/s]


Epoch 17/20
    Train loss: 1.26123
    Valid loss: 0.81097
    Accuracy: 77.4530531028479%




Epoch 18 Iter 128: loss 3.32715. : 100%|██████████| 128/128 [01:44<00:00,  1.22it/s]


Epoch 18/20
    Train loss: 1.28107
    Valid loss: 0.82264
    Accuracy: 77.00534759358288%




Epoch 19 Iter 128: loss 0.38047. : 100%|██████████| 128/128 [01:44<00:00,  1.23it/s]


Epoch 19/20
    Train loss: 1.3288
    Valid loss: 0.8172
    Accuracy: 77.41574431040915%




Epoch 20 Iter 128: loss 1.95745. : 100%|██████████| 128/128 [01:43<00:00,  1.23it/s]

Epoch 20/20
    Train loss: 1.30857
    Valid loss: 0.79827
    Accuracy: 77.92563114040543%


Training completed





In [None]:
#model, optimizer, scheduler, criterion, train_loader, 
#    valid_loader, num_epochs, device, all_labels, tboard=None, start_epoch=0
#    ):
train_loss, val_loss, loss_iters, valid_acc = train_model(
        model=vgg_model, optimizer=optimizer_vgg, scheduler=scheduler_vgg, criterion=criterion,
        train_loader=train_loader, valid_loader=test_loader, num_epochs=20,
        device=DEVICE, tboard=writer_vgg
)

Epoch 1 Iter 128: loss 3.93485. : 100%|██████████| 128/128 [01:39<00:00,  1.29it/s]


Epoch 1/20
    Train loss: 4.86316
    Valid loss: 5.27226
    Accuracy: 0.9949011317000372%




Epoch 2 Iter 128: loss 3.42686. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 2/20
    Train loss: 3.43878
    Valid loss: 4.00098
    Accuracy: 11.279691580649173%




Epoch 3 Iter 128: loss 2.44985. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 3/20
    Train loss: 2.48135
    Valid loss: 2.63334
    Accuracy: 33.2421340629275%




Epoch 4 Iter 128: loss 2.35421. : 100%|██████████| 128/128 [01:39<00:00,  1.29it/s]


Epoch 4/20
    Train loss: 1.93827
    Valid loss: 2.44167
    Accuracy: 38.452928740206445%




Epoch 5 Iter 128: loss 1.06484. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 5/20
    Train loss: 1.58504
    Valid loss: 1.95093
    Accuracy: 50.00621813207312%




Epoch 6 Iter 128: loss 1.15789. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 6/20
    Train loss: 1.40001
    Valid loss: 1.49327
    Accuracy: 59.22148986444472%




Epoch 7 Iter 128: loss 1.52628. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 7/20
    Train loss: 1.28941
    Valid loss: 1.3579
    Accuracy: 63.710981221241134%




Epoch 8 Iter 128: loss 0.93966. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 8/20
    Train loss: 0.98675
    Valid loss: 1.22664
    Accuracy: 67.5910956348713%




Epoch 9 Iter 128: loss 1.34231. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 9/20
    Train loss: 0.89926
    Valid loss: 0.8717
    Accuracy: 75.51299589603283%




Epoch 10 Iter 128: loss 0.47695. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 10/20
    Train loss: 0.84425
    Valid loss: 0.8477
    Accuracy: 76.43327944285537%




Epoch 11 Iter 128: loss 0.62704. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 11/20
    Train loss: 0.78819
    Valid loss: 0.84406
    Accuracy: 76.17211789578411%




Epoch 12 Iter 128: loss 0.97939. : 100%|██████████| 128/128 [01:39<00:00,  1.28it/s]


Epoch 12/20
    Train loss: 0.73609
    Valid loss: 0.83082
    Accuracy: 76.58251461261038%




Epoch 13 Iter 128: loss 0.99115. : 100%|██████████| 128/128 [01:39<00:00,  1.28it/s]


Epoch 13/20
    Train loss: 0.72114
    Valid loss: 0.81413
    Accuracy: 77.56497948016415%




Epoch 14 Iter 128: loss 0.96711. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 14/20
    Train loss: 0.70715
    Valid loss: 0.82879
    Accuracy: 77.17945529163039%




Epoch 15 Iter 128: loss 0.68214. : 100%|██████████| 128/128 [01:40<00:00,  1.27it/s]


Epoch 15/20
    Train loss: 0.69567
    Valid loss: 0.81102
    Accuracy: 78.07486631016043%




Epoch 16 Iter 128: loss 1.16340. : 100%|██████████| 128/128 [01:40<00:00,  1.27it/s]


Epoch 16/20
    Train loss: 0.68414
    Valid loss: 0.8087
    Accuracy: 78.26141027235418%




Epoch 17 Iter 128: loss 1.74474. : 100%|██████████| 128/128 [01:39<00:00,  1.28it/s]


Epoch 17/20
    Train loss: 0.66767
    Valid loss: 0.80396
    Accuracy: 77.98781246113667%




Epoch 18 Iter 128: loss 0.62671. : 100%|██████████| 128/128 [01:39<00:00,  1.28it/s]


Epoch 18/20
    Train loss: 0.62912
    Valid loss: 0.79925
    Accuracy: 78.09973883845292%




Epoch 19 Iter 128: loss 0.41274. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]


Epoch 19/20
    Train loss: 0.64678
    Valid loss: 0.7993
    Accuracy: 77.97537619699042%




Epoch 20 Iter 128: loss 0.33265. : 100%|██████████| 128/128 [01:40<00:00,  1.28it/s]

Epoch 20/20
    Train loss: 0.64386
    Valid loss: 0.79677
    Accuracy: 78.07486631016043%


Training completed



