In [None]:
import copy
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

Next we need to initialize the weights of our model.

In [None]:
class CIFAR10Subset(torchvision.datasets.CIFAR10):
    """
    Get a subset of the CIFAR10 dataset, according to the passed indices.
    """
    def __init__(self, *args, idx=None, **kwargs):
        super().__init__(*args, **kwargs)
        
        if idx is None:
            return
        
        self.data = self.data[idx]
        targets_np = np.array(self.targets)
        self.targets = targets_np[idx].tolist()

We next define transformations that change the images into PyTorch tensors, standardize the values according to the precomputed mean and standard deviation, and provide data augmentation for the training set.

In [None]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, 4),
    transforms.ToTensor(),
    normalize,
])
transform_eval = transforms.Compose([
    transforms.ToTensor(),
    normalize
])

In [None]:
ntrain = 45_000
train_set = CIFAR10Subset(root='./data', train=True, idx=range(ntrain), 
                          download=True, transform=transform_train)
val_set = CIFAR10Subset(root='./data', train=True, idx=range(ntrain, 50_000), 
                        download=True, transform=transform_eval)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform_eval)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [None]:
dataloaders = {}
dataloaders['train'] = torch.utils.data.DataLoader(train_set, batch_size=128,
                                                   shuffle=True, num_workers=2,
                                                   pin_memory=True)
dataloaders['val'] = torch.utils.data.DataLoader(val_set, batch_size=128,
                                                 shuffle=False, num_workers=2,
                                                 pin_memory=True)
dataloaders['test'] = torch.utils.data.DataLoader(test_set, batch_size=128,
                                                  shuffle=False, num_workers=2,
                                                  pin_memory=True)

Next we push the model to our GPU (if there is one).

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
resnet.to(device);

cuda


In [None]:
from torchvision import datasets, models, transforms
torch.cuda.manual_seed_all(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

resnet = models.resnet50(pretrained=True)
num_ftrs = resnet.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
resnet.fc = nn.Linear(num_ftrs, 10)

resnet = resnet.to(device)

Next we define a helper method that does one epoch of training or evaluation. We have only defined training here, so you need to implement the necessary changes for evaluation!

In [None]:
def run_epoch(model, optimizer, dataloader, train):
    """
    Run one epoch of training or evaluation.
    
    Args:
        model: The model used for prediction
        optimizer: Optimization algorithm for the model
        dataloader: Dataloader providing the data to run our model on
        train: Whether this epoch is used for training or evaluation
        
    Returns:
        Loss and accuracy in this epoch.
    """
    # TODO: Change the necessary parts to work correctly during evaluation (train=False)

    # device doesn't change depending on eval() or train()
    device = next(model.parameters()).device

    if not train: 
        model.eval()
        
        # for stats at the end
        epoch_loss = 0.0
        epoch_acc = 0.0
        
        # Iterate over data
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            # No optimizer, comp.graph.tracking, backprop is necessary
            pred =  model(xb)
            loss = F.cross_entropy(pred, yb)
            # Make probs
            #prob = F.softmax(pred, dim=1)
            # Get class with highest prob.
            top1 = torch.argmax(pred, dim=1)
            # Check how many predictions are correct for examples in current batch
            ncorrect = torch.sum(top1 == yb)
            
            # statistics
            epoch_loss += loss.item()
            epoch_acc += ncorrect.item()
            
        epoch_loss /= len(dataloader.dataset)
        epoch_acc /= len(dataloader.dataset)
                 
    else: 
        # Set model to training mode (for e.g. batch normalization, dropout)
        model.train()

        epoch_loss = 0.0
        epoch_acc = 0.0

        # Iterate over data
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            with torch.set_grad_enabled(True):
                pred = model(xb)
                loss = F.cross_entropy(pred, yb)

                loss.backward()
                optimizer.step()

            # statistics
            #prob = F.softmax(pred, dim=1)
            top1 = torch.argmax(pred, dim=1)
            ncorrect = torch.sum(top1 == yb)
            epoch_loss += loss.item()
            # Doesn't make total sense because here batch_norm and potentially dropout are still applied for the outputs
            epoch_acc += ncorrect.item()

        epoch_loss /= len(dataloader.dataset)
        epoch_acc /= len(dataloader.dataset)
        
    return epoch_loss, epoch_acc

Next we implement a method for fitting (training) our model. For many models early stopping can save a lot of training time. Your task is to add early stopping to the loop (based on validation accuracy). Early stopping usually means exiting the training loop if the validation accuracy hasn't improved for `patience` number of steps. Don't forget to save the best model parameters according to validation accuracy. You will need `copy.deepcopy` and the `state_dict` for this.

In [None]:
info_train = []
info_val = []
info_test = []
def fit(model, optimizer, lr_scheduler, dataloaders, start_epochs, max_epochs, patience):
    """
    Fit the given model on the dataset.
    
    Args:
        model: The model used for prediction
        optimizer: Optimization algorithm for the model
        lr_scheduler: Learning rate scheduler that improves training
                      in late epochs with learning rate decay
        dataloaders: Dataloaders for training and validation
        max_epochs: Maximum number of epochs for training
        patience: Number of epochs to wait with early stopping the
                  training if validation loss has decreased
                  
    Returns:
        Loss and accuracy in this epoch.
    """
    
    best_acc = 0
    curr_patience = 0
    
    for epoch in range(start_epochs,max_epochs):
        train_loss, train_acc = run_epoch(model, optimizer, dataloaders['train'], train=True)
        lr_scheduler.step()
        
        epoch += 1
        print(f"Epoch {epoch : >3}/{max_epochs}")
        print(f"train loss: {train_loss}, accuracy: {train_acc * 100:.2f}%")
        val_loss, val_acc = run_epoch(model, None, dataloaders['val'], train=False)
        print(f"val loss: {val_loss}, accuracy: {val_acc * 100:.2f}%")
        test_loss, test_acc = run_epoch(resnet, None, dataloaders['test'], train=False)
        print(f"Test loss: {test_loss}, accuracy: {test_acc * 100:.2f}%")
        

        info_train.append({'epoch':epoch,'loss':train_loss,'acc':train_acc})
        info_val.append({'epoch':epoch,'loss':val_loss,'acc':val_acc}) 
        info_test.append({'epoch':epoch,'loss':test_loss,'acc':test_acc})  
          
        # TODO: Add early stopping and save the best weights (in best_model_weights)
        curr_patience += 1
        
        if val_acc > best_acc: 
            # Copy current best model, simple assignment won't work because it only copies a pointer on model which is still trained
            best_model = copy.deepcopy(model.state_dict())
            # Save Model, Where is copy.deepcopy necessary?
            #torch.save(model.state_dict(), 'best_model.pt')
            # Reset iteration variables 
            curr_patience = 0 
            best_acc = val_acc
        
        if curr_patience > patience: 
            break 
  
    # Both ways should work (Together with the above)
    #model.load_state_dict(torch.load('best_model.pt'))
    # load_state_dict takes in a dict_object NOT a path directly!
    model.load_state_dict(best_model)

In most cases you should just use the Adam optimizer for training, because it works well out of the box. However, a well-tuned SGD (with momentum) will in most cases outperform Adam. And since the original paper gives us a well-tuned SGD we will just use that.

In [None]:
optimizer = torch.optim.SGD(resnet.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1)

# Fit model
fit(resnet, optimizer, lr_scheduler, dataloaders,start_epoch=1 max_epochs=100, patience=50)

SyntaxError: ignored

In [None]:
optimizer = torch.optim.SGD(resnet.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1)

# Fit model
fit(resnet, optimizer, lr_scheduler, dataloaders,start_epochs=100, max_epochs=200, patience=50)

Epoch 101/200
train loss: 0.0030724759429693223, accuracy: 86.39%
val loss: 0.004363092267513275, accuracy: 82.40%
Test loss: 0.004375452882051468, accuracy: 82.39%
Epoch 102/200
train loss: 0.0030346389796998764, accuracy: 86.44%
val loss: 0.0041631247699260715, accuracy: 84.24%
Test loss: 0.004352417472004891, accuracy: 82.73%
Epoch 103/200
train loss: 0.003071366681655248, accuracy: 86.34%
val loss: 0.004502340495586395, accuracy: 82.28%
Test loss: 0.004484102170169354, accuracy: 81.83%
Epoch 104/200
train loss: 0.0030369005660216015, accuracy: 86.24%
val loss: 0.004570019352436065, accuracy: 82.56%
Test loss: 0.004546239498257637, accuracy: 80.95%
Epoch 105/200
train loss: 0.0030637056701713137, accuracy: 86.23%
val loss: 0.0043779697895050046, accuracy: 82.26%
Test loss: 0.004305039814114571, accuracy: 82.08%
Epoch 106/200
train loss: 0.002981569932235612, accuracy: 86.67%
val loss: 0.00425837140083313, accuracy: 83.02%
Test loss: 0.004500808531045914, accuracy: 81.92%
Epoch 107/2

In [None]:
optimizer = torch.optim.SGD(resnet.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1)

# Fit model
fit(resnet, optimizer, lr_scheduler, dataloaders,start_epochs=100, max_epochs=200, patience=50)

Once the model is trained we run it on the test set to obtain our final accuracy.
Note that we can only look at the test set once, everything else would lead to overfitting. So you _must_ ignore the test set while developing your model!

In [None]:
test_loss, test_acc = run_epoch(resnet, None, dataloaders['test'], train=False)
print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}")

Test loss: 0.0041, accuracy: 0.8342


In [None]:
info_train

In [None]:
info_val

In [None]:
info_test

In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [None]:
torch.save(resnet,f'drive/MyDrive/ResNetModel/resnetConstantDepth200-0.8342.model')

In [None]:
from torchsummary import summary
# summary(resnet, (3, 224, 224),print_summary=True)
# print(count(resnet))
# resnet.count_params()

from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    # print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
params = count_parameters(model_new)
print(params)

Total Trainable Params: 23528522
23528522


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import copy
model_new = copy.deepcopy(resnet)
def test_remove_layer():
    model_new = copy.deepcopy(resnet)
    
    print("Without Removed Layer")
    test_loss, test_acc = run_epoch(resnet, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")


    print("CONV_4_2 Removed")
    del model_new.layer4[2]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")


    print("CONV_4_1 Removed")
    del model_new.layer4[1]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")

    
    print("CONV_3_5 Removed")
    del model_new.layer3[5]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")

    
    print("CONV_3_4 Removed")
    del model_new.layer3[4]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")


    print("CONV_3_3 Removed")
    del model_new.layer3[3]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")

    print("CONV_3_2 Removed")
    del model_new.layer3[2]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")


    print("CONV_3_1 Removed")
    del model_new.layer3[1]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")

    print("CONV_2_3 Removed")
    del model_new.layer2[3]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")

    print("CONV_2_2 Removed")
    del model_new.layer2[2]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")

    
    print("CONV_2_1 Removed")
    del model_new.layer2[1]
    test_loss, test_acc = run_epoch(model_new, None, dataloaders['test'], train=False)
    params = count_parameters(model_new)
    print(f"Test loss: {test_loss:.4f}, accuracy: {test_acc}, params: {params}")
   
    # return result_1,result_2,result_3,result_4,result_5

In [None]:
test_remove_layer()

Without Removed Layer
Total Trainable Params: 23528522
Test loss: 0.0041, accuracy: 0.8342, params: 23528522
CONV_4_2 Removed
Total Trainable Params: 19065930
Test loss: 0.0045, accuracy: 0.8347, params: 19065930
CONV_4_1 Removed
Total Trainable Params: 14603338
Test loss: 0.0050, accuracy: 0.8355, params: 14603338
CONV_3_5 Removed
Total Trainable Params: 13486154
Test loss: 0.0051, accuracy: 0.8277, params: 13486154
CONV_3_4 Removed
Total Trainable Params: 12368970
Test loss: 0.0051, accuracy: 0.825, params: 12368970
CONV_3_3 Removed
Total Trainable Params: 11251786
Test loss: 0.0052, accuracy: 0.814, params: 11251786
CONV_3_2 Removed
Total Trainable Params: 10134602
Test loss: 0.0054, accuracy: 0.8015, params: 10134602
CONV_3_1 Removed
Total Trainable Params: 9017418
Test loss: 0.0057, accuracy: 0.7895, params: 9017418
CONV_2_3 Removed
Total Trainable Params: 8737354
Test loss: 0.0068, accuracy: 0.7519, params: 8737354
CONV_2_2 Removed
Total Trainable Params: 8457290
Test loss: 0.007