In [2]:
! pip3 install torch torchvision



In [3]:
from torchvision import datasets, transforms 
from torchvision.models import resnet18, ResNet18_Weights
from torchvision.models import resnet50, ResNet50_Weights
from tempfile import TemporaryDirectory
from matplotlib import pyplot as plt
import torch.nn as nn
import torch.utils.data
import numpy as np
import os, time
from torchvision.datasets import OxfordIIITPet
from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights
from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights

In [4]:
def show(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)), interpolation='nearest')
    plt.show()

In [5]:
def test_model(model, test_data, weights_path, device_type='cpu'):
    if torch.cuda.is_available():
        device_type = 'cuda:0'
    elif torch.backends.mps.is_available():
        device_type = 'mps'

    device = torch.device(device_type)
    model.load_state_dict(torch.load(weights_path))
    model = model.to(device)

    start = time.time()
    with torch.no_grad():
        model.eval()
        correct = 0
        total = 0
        for images, labels in test_data:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)

            predicted = torch.argmax(outputs, 1)

            total += labels.size(0)
            correct += torch.sum(predicted == labels.data)
        print('Time taken:', time.time() - start)
        print(f'Accuracy of the network on the test images: {100 * correct / total}%')

In [6]:
def train_model(model, dataloaders, dataset_sizes, suffix, parameters, scheduler=0, num_epochs=25, device_type='cpu'):
    if torch.cuda.is_available():
        device_type = 'cuda:0'
    elif torch.backends.mps.is_available():
        device_type = 'mps'

    device = torch.device(device_type)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(parameters, lr=0.001)

    since = time.time()

    losses = {'train': [], 'val': []}
    accuracies = {'train': [], 'val': []}

    # Create a directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join("./", suffix + '.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        start = time.time()

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)
            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                model.train() if phase == 'train' else model.eval()

                running_loss = 0.0
                running_corrects = 0

                print('phase:', phase)
                
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs) # forward pass

                        # outputs is probability of each class
                        # labels needs to be probability of each class

                        # we need the one-hot for the labels to get the loss
                        true_outputs = torch.nn.functional.one_hot(labels, num_classes=37).float()

                        # create preds by thresholding outputs
                        loss = criterion(outputs, true_outputs)

                        preds = torch.argmax(outputs, 1)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects / dataset_sizes[phase]

                losses[phase].append(epoch_loss)
                accuracies[phase].append(epoch_acc)
                
                outputstr = f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}'
                print(outputstr)
                # f.write(outputstr + '\n')

                # deep copy the model
                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save(model.state_dict(), best_model_params_path)
            print('epoch took:', time.time() - start)
            start = time.time()

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

        # load best model weights
        model.load_state_dict(torch.load(best_model_params_path))
    return best_model_params_path, best_acc, losses, accuracies

In [7]:
from torch.utils.data import Subset

def get_transforms(conf):
    base_transforms = [
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]

    training_transforms = []

    crop = conf.get('crop', False)
    random_flip = conf.get('random_flip', False)
    rotate_angle = conf.get('rotate_angle', 0)
    random_resized_crop = conf.get('random_resized_crop', False)

    if random_resized_crop:
        training_transforms.append(transforms.RandomResizedCrop(224))
    elif crop:
        base_transforms.append(transforms.Resize(256))
        base_transforms.append(transforms.CenterCrop(224))
    else:
        base_transforms.append(transforms.Resize((224, 224)))

    # [transforms.Resize((224, 224))]
    
    if random_flip:
        training_transforms.append(transforms.RandomHorizontalFlip())
    if rotate_angle != 0:
        training_transforms.append(transforms.RandomRotation(rotate_angle))
    
    return base_transforms, training_transforms

def train_test_val_split(base_transforms, training_transforms, split_indices):
    train_dataset=OxfordIIITPet(root="./", download=True, target_types='category', transform=transforms.Compose(base_transforms + training_transforms + [transforms.Resize((224, 224))]))
    val_dataset=OxfordIIITPet(root="./", download=True, target_types='category', transform=transforms.Compose(base_transforms + [transforms.Resize((224, 224))]))
    test_dataset=OxfordIIITPet(root="./", download=True, target_types='category', transform=transforms.Compose(base_transforms + [transforms.Resize((224, 224))]))

    train_dataset = Subset(train_dataset, split_indices['train'])
    val_dataset = Subset(val_dataset, split_indices['val'])
    test_dataset = Subset(test_dataset, split_indices['test'])
    
    return train_dataset, val_dataset, test_dataset

def get_split_indices(dataset=OxfordIIITPet(root="./", download=True, target_types='category'), train_split=0.7, val_split=0.2, seed=None):
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    
    if seed is not None:
        np.random.seed(42)
    np.random.shuffle(indices)
    
    train_size = int(train_split * dataset_size)
    val_size = int(val_split * dataset_size)
    test_size = dataset_size - train_size - val_size
    
    # Split indices
    train_indices = indices[:train_size]
    val_indices = indices[train_size:train_size + val_size]
    test_indices = indices[train_size + val_size:]
    
    # Save the indices for reuse
    split_indices = {
        'train': train_indices,
        'val': val_indices,
        'test': test_indices
    }
    
    return split_indices

def generate_datasets(
        conf, split_indices
    ):

    base_transforms, training_transforms = get_transforms(conf)
    train_dataset, val_dataset, test_dataset = train_test_val_split(base_transforms, training_transforms, split_indices)

    dataloaders = {
        'train': torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True),
        'test': torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True),
        'val': torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=True),
    }

    dataset_sizes = {
        'train': len(train_dataset),
        'test': len(test_dataset),
        'val': len(val_dataset),
    }

    return dataloaders, dataset_sizes

In [8]:
def multiclass_resnet(resnet_size = "18"):
    if resnet_size == "18":
        resnet = resnet18(weights=ResNet18_Weights.DEFAULT)
    elif resnet_size == "50":
        resnet = resnet50(weights=ResNet50_Weights.DEFAULT)
    
    for param in resnet.parameters():
        param.requires_grad = False
    
    print(resnet.fc)

    #resnet fc input size
    input_size = resnet.fc.in_features

    resnet.fc = nn.Sequential(
        nn.Flatten(),
        nn.Linear(input_size, 256),
        nn.ReLU(),
        # nn.Dropout(0.2),
        nn.Linear(256, 37),
        nn.Softmax()
    )

    return resnet

def multiclass_efficientnet(name = "efficientnet_b4"):
    if name == "efficientnet_b4":
        efficientnet = efficientnet_b4(weights=EfficientNet_B4_Weights.DEFAULT)
    elif name == "efficientnet_v2_m":
        efficientnet = efficientnet_v2_m(weights=EfficientNet_V2_M_Weights.DEFAULT)
    
    for param in efficientnet.parameters():
        param.requires_grad = False
    
    print(efficientnet.classifier)

    #efficientnet fc input size
    input_size = efficientnet.classifier[1].in_features

    efficientnet.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(input_size, 256),
        nn.ReLU(),
        # nn.Dropout(0.2),
        nn.Linear(256, 37),
        nn.Softmax()
    )

    return efficientnet

In [57]:
def create_suffix(
        conf,
        unfrozen_layers=None
    ):
    device_type = 'cpu'
    if torch.cuda.is_available():
        device_type = 'cuda'
    elif torch.backends.mps.is_available():
        device_type = 'mps'
    
    suffix = ""
    for key, value in conf.items():
        suffix += key + ':' + str(value) + ';'
    suffix += device_type
    if unfrozen_layers is not None:
        suffix += 'unfrozen:' + str(unfrozen_layers)

    return suffix

In [10]:
model_name = 'efficientnet_v2_m'
random_flip = False
rotate_angle = 0
crop = False
random_resized_crop = 1

base_conf = {
    'model_name': model_name,
    'crop': crop,
    'random_flip': random_flip,
    'rotate_angle': rotate_angle,
    'scale': random_resized_crop
}


In [11]:
def get_model(model_name):
    if model_name == 'resnet18':
        return multiclass_resnet("18")
    elif model_name == 'resnet50':
        return multiclass_resnet("50")
    elif model_name == 'efficientnet_b4':
        return multiclass_efficientnet(model_name)
    elif model_name == 'efficientnet_v2_m':
        return multiclass_efficientnet(model_name)
    else:
        return None

In [12]:
base_transforms, training_transforms = get_transforms(base_conf)
print(base_transforms)
print(training_transforms)
print(base_transforms + training_transforms)

[ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)]
[]
[ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)]


## Data augmentation experiments

In [13]:


def generate_confs():
    model_name = 'efficientnet_v2_m'
    crops = ['off', 'center-crop', 'random-resized-crop']
    random_flips = [True, False]
    rotate_angles = [0, 30]

    # Generate confs 
    # confs = [
    #     {**base_conf, 'model_name': model}
    #     for model in models
    # ]
    # confs = [
    #     # crop and flip false
    #     # baseline
    #      {
    #         'model_name': model_name,
    #         'crop': False,
    #         'random_flip': False,
    #         'rotate_angle': 0,
    #         'random_resized_crop': False,
    #     },
    # val Loss: 2.9030 Acc: 0.7622
    # epoch took: 117.75068998336792
    # Training complete in 8m 35s
    # Best val Acc: 0.777174

    #     # crop on
    #     {
    #         'model_name': model_name,
    #         'crop': True,
    #         'random_flip': False,
    #         'rotate_angle': 0,
    #         'random_resized_crop': False,
    #     },
    # val Loss: 2.8738 Acc: 0.7976
    # epoch took: 129.6922209262848
    # Training complete in 10m 27s
    # Best val Acc: 0.802989

    #     # flip on
    #     {
    #         'model_name': model_name,
    #         'crop': False,
    #         'random_flip': True,
    #         'rotate_angle': 0,
    #         'random_resized_crop': False,
    #     },
    # val Loss: 2.8629 Acc: 0.8125
    # epoch took: 99.80412697792053
    # Training complete in 8m 51s
    # Best val Acc: 0.812500

    #     # rotate 45
    #     {
    #         'model_name': model_name,
    #         'crop': False,
    #         'random_flip': False,
    #         'rotate_angle': 45,
    #         'random_resized_crop': False,
    #     },
    # val Loss: 2.8905 Acc: 0.7812
    # epoch took: 145.18285083770752
    # Training complete in 12m 6s
    # Best val Acc: 0.782609

    #     # random_resized_crop on
    #     {
    #         'model_name': model_name,
    #         'crop': False,
    #         'random_flip': False,
    #         'rotate_angle': 0,
    #         'random_resized_crop': False,
    #     },
    # ]

    confs = []
    for crop in crops:
        for random_flip in random_flips:
            for rotate_angle in rotate_angles:
                conf = {
                    'model_name': model_name,
                    'crop': crop == 'center-crop',
                    'random_flip': random_flip,
                    'rotate_angle': rotate_angle,
                    'random_resized_crop': crop == 'random-resized-crop',
                }
                confs.append(conf)

    return confs

split_indices = get_split_indices(seed=42)
confs = generate_confs()

weights_and_accs = []
for conf in confs:
    dataloaders, dataset_sizes = generate_datasets(conf, split_indices)
    # print(dataloaders['train'].dataset)

    # # print(dataloaders['train'].dataset)

    model = get_model(conf['model_name'])
    suffix = create_suffix(conf)
    print(suffix)
    parameters = model.fc.parameters() if conf['model_name'].startswith('resnet') else model.classifier.parameters()
    weights_path, best_acc, losses, accuracies = train_model(model, dataloaders, dataset_sizes, suffix, parameters=parameters, num_epochs=5)
    weights_and_accs.append((weights_path, best_acc))

print(weights_and_accs)


Sequential(
  (0): Dropout(p=0.3, inplace=True)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)
model_name:efficientnet_v2_m;crop:False;random_flip:True;rotate_angle:0;random_resized_crop:False;cpu
Epoch 0/4
----------
phase: train


KeyboardInterrupt: 

## Freezing experiments

In [54]:
# unfrozen_layers are the number of layers that are unfrozen
def get_model_frozen_until(unfrozen_layers, model_name):
    model = get_model(model_name)
    children = list(model.children())
    
    for child in children[:-unfrozen_layers]:
        for param in child.parameters():
            param.requires_grad = False

    return model

In [59]:
for unfrozen_layers in range(1, 6):
    model = get_model_frozen_until(unfrozen_layers, 'efficientnet_v2_m')
    suffix = create_suffix(conf, unfrozen_layers)
    print(suffix)
    parameters = model.classifier.parameters()
    weights_path, best_acc, losses, accuracies = train_model(model, dataloaders, dataset_sizes, suffix, parameters=parameters, num_epochs=5)
    weights_and_accs.append((weights_path, best_acc))

Sequential(
  (0): Dropout(p=0.3, inplace=True)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)
model_name:efficientnet_v2_m;crop:False;random_flip:True;rotate_angle:0;random_resized_crop:False;cpuunfrozen:1
Epoch 0/4
----------
phase: train


KeyboardInterrupt: 

In [None]:
model_names = [
    "model_name:efficientnet_v2_m;crop:False;random_flip:False;rotate_angle:0;scale:1;cudaunfrozen:1.pt",
    "model_name:efficientnet_v2_m;crop:False;random_flip:False;rotate_angle:0;scale:1;cudaunfrozen:2.pt",
    "model_name:efficientnet_v2_m;crop:False;random_flip:False;rotate_angle:0;scale:1;cudaunfrozen:3.pt",
    "model_name:efficientnet_v2_m;crop:False;random_flip:False;rotate_angle:0;scale:1;cudaunfrozen:4.pt",
    "model_name:efficientnet_v2_m;crop:False;random_flip:False;rotate_angle:0;scale:1;cudaunfrozen:5.pt",
    "model_name:efficientnet_v2_m;crop:False;random_flip:False;rotate_angle:0;scale:1;cudaunfrozen:6.pt",
    ]

def evaluate_models(model_names):
    for model_name in model_names:
        model = get_model('efficientnet_v2_m')
        weights_path = model_name
        test_model(model, dataloaders['val'], weights_path)