Install requirements

In [None]:
# Check version of packages
import torch
print(torch)

import torchvision
print(torchvision)

In [None]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from torch.optim import lr_scheduler
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

Check Colab GPU and CPU Setup

In [None]:
# Check GPU info
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
# Check CPU info
!lscpu |grep 'Model name'

## Setup

In [None]:
%%capture
if not os.path.exists("/content/images.tar.gz"):
  # Download dataset
  !wget https://thor.robots.ox.ac.uk/~vgg/data/pets/images.tar.gz

  # Download truth label
  !wget https://thor.robots.ox.ac.uk/~vgg/data/pets/annotations.tar.gz
  # Unpack the dataset and ground truth
  filename_images = "/content/images.tar.gz"
  !tar -zxvf {filename_images}

  filename_annotations = "/content/annotations.tar.gz"
  !tar -zxvf {filename_annotations}

## CATEGORIZE DATA AFTER ANIMAL BREED (37 IN TOTAL)

In [None]:
import os
import shutil
from collections import defaultdict
import random
SEED_FOR_DATA_SPLIT = 13948571
TRAINING_SPLIT = 0.7
VAL_SPLIT = 0.2
TEST_SPLIT = 0.1

try:
  # 37 classes
  annotations_file = "/content/annotations/list.txt"
  images_base_path = "images/"
  classes = defaultdict(list)
  with open(annotations_file) as f:
      for line in f:
          if line.startswith("#"):
              continue
          name, class_id, species, _ = line.strip().split(" ")
          classes[int(class_id)].append(name + ".jpg")

  for class_id, files in classes.items():
      os.makedirs(f"train/{str(class_id)}/", exist_ok=True)
      os.makedirs(f"val/{str(class_id)}/", exist_ok=True)
      os.makedirs(f"test/{str(class_id)}/", exist_ok=True)
      
      random.seed(SEED_FOR_DATA_SPLIT)
      random.shuffle(files)
      number_of_files = len(files)
      for i in range(number_of_files):
          if i / number_of_files <= TRAINING_SPLIT:
              shutil.move(os.path.join(images_base_path, files[i]), f"train/{str(class_id)}/{files[i]}")
          elif i / number_of_files <= TRAINING_SPLIT + VAL_SPLIT:
              shutil.move(os.path.join(images_base_path, files[i]), f"val/{str(class_id)}/{files[i]}")
          else:
              shutil.move(os.path.join(images_base_path, files[i]), f"test/{str(class_id)}/{files[i]}")
except:
  print("Error in moving files occurred. They have probably already been moved")

In [None]:
print(len(os.listdir("/content/train/11")))
print(len(os.listdir("/content/val/11")))
print(len(os.listdir("/content/test/11")))

Download model

### Finetune resnet-18

In [None]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import time
import os
import copy
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

In [None]:
import torchvision; print (torchvision)

In [None]:
!python --version

In [None]:
# Top level data directory. Here we assume the format of the directory conforms
#   to the ImageFolder structure
data_dir = "/content/"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Helper functions

In [None]:
def test_acc(model, dataloader_dict):
  model.eval()
  running_corrects = 0
  for i, (inputs, labels) in enumerate(dataloader_dict["test"]):
    inputs = inputs.to(device)
    labels = labels.to(device)
    outputs = model(inputs)
    _, pred_for_labled_data = torch.max(outputs, 1)                    
    running_corrects += torch.sum(pred_for_labled_data == labels.data)
  test_acc = running_corrects.double() / len(dataloader_dict["test"].dataset)
  
  return test_acc.item()

In [None]:
def load_resnet_with_unfrozen_layers(num_unfrozen_layers, unfreeze_bn, num_classes=37):
  model_ft = models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1)

  for name, param in model_ft.named_parameters():
    if not any(map(lambda layer_str: layer_str in name, [f"layer{4-i}" for i in range(num_unfrozen_layers)])) or (not unfreeze_bn and "bn" in name):
      param.requires_grad = False

  num_ftrs = model_ft.fc.in_features
  model_ft.fc = nn.Linear(num_ftrs, num_classes)
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  model_ft = model_ft.to(device)

  return model_ft

In [None]:
def train_model(model, dataloaders, criterion, optimizers, schedulers, num_epochs=25):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        #loop = tqdm(loader) # Progress bar

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                for optimizer in optimizers:
                  optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        for optimizer in optimizers:
                          optimizer.step()
                        for scheduler in schedulers:
                          if type(scheduler) == lr_scheduler.OneCycleLR:
                            scheduler.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
            
            if phase == 'train':
              for scheduler in schedulers:
                if scheduler is None or scheduler == lr_scheduler.OneCycleLR:
                  continue
                if type(scheduler) == lr_scheduler.ReduceLROnPlateau:
                  scheduler.step(epoch_acc)
                else:
                  scheduler.step()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [49]:
from tqdm.notebook import tqdm
def alpha_scheduler(current_epoch_num, T1, T2, max_alpha):
  # PART 1: [ALPHA = 0]
  if (current_epoch_num) < (T1): 
    return 0

  # PART 2: epoch "T1" ---> epoch "T2"
  elif (current_epoch_num) < (T2):
    return (current_epoch_num-T1)/(T2-T1) * max_alpha
  
  # PART 3: [ALPHA = max_alpha]
  return max_alpha


def train_model_pseudo_labeling(model, dataloaders, criterion, optimizers, schedulers, percent_of_pseudolables=0.1, num_epochs=25):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 20)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                alpha = alpha_scheduler(epoch, T1=0, T2=3, max_alpha=1.0) # Update alpha
                print('ALPHA =', alpha)
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Create tqdm progress bar object
            print(f"Processing {phase} data")
            progress_bar = tqdm(dataloaders[phase])

            # Iterate over data.
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                if phase == 'train':
                  batch_size = inputs.shape[0]
                  supervised_size = round(batch_size * (1 - percent_of_pseudolables))
                  (data, unlabled_data)  = torch.split(inputs, [supervised_size, batch_size - supervised_size], dim=0)
                  (supervised_labels, _) = torch.split(labels, [supervised_size, batch_size - supervised_size], dim=0)

                # zero the parameter gradients
                for optimizer in optimizers:
                  optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    if phase == 'train':
                      # supervised forward pass
                      supervised_forward_pass_outputs = model(data)
                      _, pred_for_labled_data = torch.max(supervised_forward_pass_outputs, 1)
                      supervised_loss = criterion(supervised_forward_pass_outputs, supervised_labels)
                      if supervised_loss.isnan().any():
                        supervised_loss = 0
                      # pseudo labeled forward pass
                      pseudo_label_forward_pass_outputs = model(unlabled_data)
                      _, pseudo_labels = torch.max(pseudo_label_forward_pass_outputs, dim=1)
                      pseudo_label_loss = criterion(pseudo_label_forward_pass_outputs, pseudo_labels)
                      if pseudo_label_loss.isnan().any():
                        pseudo_label_loss = 0

                      # combined loss
                      loss = supervised_loss + alpha * pseudo_label_loss

                      # backward + optimize only if in training phase
                      loss.backward()
                      for optimizer in optimizers:
                        optimizer.step()
                      for scheduler in schedulers:
                        if type(scheduler) == lr_scheduler.OneCycleLR:
                          scheduler.step()
                      labels = supervised_labels # Due to calc. of running_corrects
                    else:
                      supervised_forward_pass_outputs = model(inputs)
                      _, pred_for_labled_data = torch.max(supervised_forward_pass_outputs, 1)
                      loss = criterion(supervised_forward_pass_outputs, labels)

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(pred_for_labled_data == labels.data)

                # Update progress bar
                progress_bar.update(1)
                if phase == 'train':
                  progress_bar.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
                  progress_bar.set_postfix(loss=torch.rand(1).item(), acc=torch.rand(1).item())

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
            
            if phase == 'train':
              for scheduler in schedulers:
                if scheduler is None or scheduler == lr_scheduler.OneCycleLR:
                  continue
                if type(scheduler) == lr_scheduler.ReduceLROnPlateau:
                  scheduler.step(epoch_acc)
                else:
                  scheduler.step()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [None]:
# Batch size for training (change depending on how much memory you have)
batch_size = 9 #Experiement around

# Number of epochs to train for
num_epochs = 15


Load data

In [None]:
def create_dataloader(batch_size):
# Data augmentation and normalization for training
# Just normalization for validation
  input_size = 224
  data_transforms = {
      'train': transforms.Compose([
          transforms.RandomResizedCrop(input_size,scale=(0.9,1.0)),
          transforms.RandomHorizontalFlip(),
          transforms.ToTensor(),
          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
      ]),
      'val': transforms.Compose([
          transforms.Resize(input_size),
          transforms.CenterCrop(input_size),
          transforms.ToTensor(),
          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
      ]),
      'test': transforms.Compose([
          transforms.Resize(input_size),
          transforms.CenterCrop(input_size),
          transforms.ToTensor(),
          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
      ]),
  }

  print("Initializing Datasets and Dataloaders...")

  # Create training and validation datasets
  image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val', "test"]}

  # Create training and validation dataloaders
  dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=2) for x in ['train', 'val', "test"]}

  return dataloaders_dict

Cell for experimentation

In [None]:
#@title
#model_ft = load_resnet_with_unfrozen_layers(1, True, num_classes=37)
#for x,y in dataloaders_dict['train']:
#  x = x.to(device)
#  y = y.to(device)
#  soft_out = nn.functional.softmax(model_ft(x), dim=1)
#  print(soft_out)
#  _, max_i = torch.max(soft_out, dim=1)
#  print(max_i)
#  print(x.shape, y.shape)
#  print(y)
#  #(set1, set2) = torch.split(y, [18, 2], dim=0)
#  #print(set1.shape)
#  #print(set2.shape)
#  break

Run Training and Validation Step


In [None]:
model_ft = load_resnet_with_unfrozen_layers(1, True, num_classes=37)
dataloaders_dict = create_dataloader(batch_size)

# creates a dict that groups model params into a dict with lists.
# all params for ex. layer4 can be accessed using layer_param_dict["layer4"]

layer_param_dict = defaultdict(list)
for name,param in model_ft.named_parameters():
  if param.requires_grad:
    layer_param_dict[name.split(".")[0]].append(param)

all_params = []
for key, param_list in layer_param_dict.items():
  all_params += param_list

print(layer_param_dict.keys())
optimizers = [
    optim.Adam(layer_param_dict["fc"], lr=0.001, weight_decay=1e-3),
    optim.Adam(layer_param_dict["layer4"], lr=0.001, weight_decay=1e-5),
    #optim.Adam(layer_param_dict["layer3"], lr=0.001),
]
schedulers = [
    lr_scheduler.ReduceLROnPlateau(optimizers[0], patience=2),
    lr_scheduler.LambdaLR(optimizers[1], lr_lambda=lambda epoch: 0.95 ** epoch),
    #lr_scheduler.LambdaLR(optimizers[2], lr_lambda=lambda epoch: 0.95 ** epoch),
]

# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

print(type(schedulers[0]) == lr_scheduler.ReduceLROnPlateau)


# Train and evaluate
model_ft, hist = train_model_pseudo_labeling(model_ft, dataloaders_dict, criterion, optimizers, schedulers, percent_of_pseudolables=0.1, num_epochs=num_epochs)
#model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizers, schedulers, num_epochs=num_epochs)

In [None]:
# NOTE: run this line very sparinlgly
#print(f"Test acc: {test_acc(model_ft, dataloaders_dict)}")