### Or Wolkimir (308402163) and Afek Adler (204249239) 


In [11]:
import numpy as np
!pip install tensorboardcolab
import torch
from torchvision import datasets, transforms
import helper
from torch import nn, optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import summary
import datetime
import pandas as pd
from collections import OrderedDict
from os import mkdir
from os.path import isdir
from glob import glob
from pathlib import Path
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
import torch.nn.init as init
from torch.autograd import Variable
from IPython.display import display



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


# 2. Model


In [0]:
class ResnetAll(nn.Module):
  def __init__(self,num_classes=10):
    super().__init__()
    self.linear = nn.Linear(128,num_classes)
    self.softmax = nn.LogSoftmax(dim=1)
    self.dropout = nn.Dropout(0.1)
    self.resnet = ResNet18()

  def forward_1(self, x):
    return self.resnet(x)

  def forward_2(self, x):
    x = self.dropout(x)
    x = self.linear(x)
    x = self.softmax(x)
    return x

  def forward(self, x):
    x1 = self.forward_1(x)
    x2 = self.forward_2(x1)
    return x2, x1

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes=10):
    super(ResNet, self).__init__()
    self.in_planes = 64

    self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
    self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
    self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
    self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
    self.linear = nn.Linear(512*block.expansion, 128)

  def _make_layer(self, block, planes, num_blocks, stride):
    strides = [stride] + [1]*(num_blocks-1)
    layers = []
    for stride in strides:
        layers.append(block(self.in_planes, planes, stride))
        self.in_planes = planes * block.expansion
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = F.avg_pool2d(out, 4)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out

def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

# 3. Trainer

In [0]:
class Trainer:
  def __init__(self,model, config):
    self.model = model
    self.model_name = config.model_name
    self.seed = config.seed
    self.lr = config.lr
    self.epochs = config.epochs
    self.warmup_epochs = config.warmup_epochs
    self.save_model = config.save_model
    self.upload_model = config.upload_model
    self.model_weights_path = config.model_weights_path
    self.batch_size =  config.batch_size
    self.dropout_std_n_times = config.dropout_std_n_times
    self.momentum = config.momentum
    self.milestones = config.milestones
    self.gamma = config.gamma
    self.save_points = config.save_points
    self.optimizer = optim.SGD(self.model.parameters(), lr = self.lr, weight_decay = config.weight_decay, momentum=self.momentum)
    torch.manual_seed(self.seed)
    self.criterion = nn.NLLLoss(reduction='none')
    self.results = {}
    self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    self.sgd_spv_matrix = {}
    self.eps = config.eps
    self.per_sample_prediction = None
    if torch.cuda.is_available():
      self.model.to(self.device)

  def get_results(self):
    return pd.DataFrame.from_dict(self.results)

  def get_test_per_sample_predictions(self):
    return self.per_sample_prediction

  def record(self,epoch,**kwargs):
    epoch = "{:02d}".format(epoch)
    temp = f"| epoch   # {epoch} :"
    for key, value in kwargs.items():
      key = f"{self.model_name}_{key}"
      if not self.results.get(key):
        self.results[key] =[]
      self.results[key].append(value)
      val = '{:.4f}'.format(np.round(value,4))
      temp += f"{key} : {val}      |       "
    print(temp)

  def load_last_saved_model(self):
    starting_epoch = -1
    epoch = 1
    for point in self.save_points[::-1]:
      weights_path = Path(f"{self.model_weights_path}/{self.model_name}_{point}_seed_{self.seed}.pth")
      if weights_path.exists() and self.upload_model:
        epoch_train_accuracy = self.load_checkpoint(weights_path,point)
        starting_epoch = point
        return starting_epoch,epoch+starting_epoch, epoch_train_accuracy
      else:
        weights_path = Path(f"{self.model_weights_path}/baseline_{self.warmup_epochs}_seed_{self.seed}.pth")
        if weights_path.exists() and self.upload_model:
          epoch_train_accuracy = self.load_checkpoint(weights_path,self.warmup_epochs)
          starting_epoch = self.warmup_epochs
          return starting_epoch,epoch+starting_epoch, epoch_train_accuracy
    return starting_epoch,epoch, None

  def fit(self,trainloader, testloader, exp_name): 
    train_accu, test_accu, output_plot = [],[],[]
    starting_epoch, epoch, epoch_train_accuracy = self.load_last_saved_model()
    if epoch==self.epochs+1:
      epoch_test_accuracy, test_loss = self.test(testloader,epoch-1)
      self.record(epoch-1,
                  test_loss = test_loss,
                  test_accuracy = epoch_test_accuracy)
    self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=self.milestones, gamma=self.gamma,last_epoch = starting_epoch)
    while epoch <= self.epochs:
      epoch_train_accuracy, train_loss = self.run_epoch(epoch)
      epoch_test_accuracy, test_loss = self.test(testloader,epoch)
      self.record(epoch,train_accuracy = epoch_train_accuracy,
                  train_losss = train_loss,
                  test_loss = test_loss,
                  test_accuracy = epoch_test_accuracy)
      self.save_checkpoint(epoch_train_accuracy, epoch)
      self.scheduler.step()
      epoch += 1

  def run_epoch(self, epoch):
    self.model.train()
    train_loss_unweighted , train_accuracy= 0, 0
    for i , (images, labels) in enumerate(trainloader):
      images = images.to(self.device)
      labels = labels.to(self.device)
      log_ps, hidden = self.model(images)
      accuracy, _ , __ = self.calc_accuracy(log_ps,labels)
      loss = self.get_weighted_loss(log_ps,hidden, images, labels, i, epoch) 
      self.optimizer.zero_grad()
      loss.backward()
      self.optimizer.step()
      train_loss_unweighted += self.criterion(log_ps, labels).mean().item()
      train_accuracy += accuracy
    epoch_train_accuracy = train_accuracy/len(trainloader)
    return epoch_train_accuracy,train_loss_unweighted

  def test(self, test_loader, epoch):
    self.model.eval()
    test_loss,test_accuracy = 0,0
    true_labels ,predicted_labels, softmax_p , variance = [], [],[],[]
    with torch.no_grad():
      for i , (images, labels) in enumerate(test_loader):
        images = images.to(self.device)
        labels = labels.to(self.device)
        log_ps, hidden = self.model(images)
        acc, predicted_l, ps_labels = self.calc_accuracy(log_ps,labels)
        test_accuracy+=acc
        test_loss += self.criterion(log_ps, labels).mean().item()
        if epoch==self.epochs: 
          predicted_labels += predicted_l
          true_labels += labels.cpu().tolist()
          softmax_p += ps_labels 
          variance += self.weighted_loss(log_ps, hidden, images, labels, i, epoch, True).view(-1).cpu().tolist()
    if epoch==self.epochs:
      colnames = ['true_label','p_true_label', 'variance', 'predicted_label'] 
      per_sample_prediction = pd.DataFrame(np.array([true_labels,softmax_p,variance,predicted_labels]).T, columns = colnames)
      self.per_sample_prediction = per_sample_prediction
    return test_accuracy/len(test_loader), test_loss

  def get_weighted_loss(self,log_ps,hidden, x, y, batch, epoch):
    loss = self.criterion(log_ps, y)
    if (epoch <= self.warmup_epochs) or self.model_name == 'baseline':
      return loss.mean()
    else:
      epoch_of_weighted_training = epoch - (self.warmup_epochs + 1) 
      weights = self.weighted_loss(log_ps,hidden,x, y,batch, epoch_of_weighted_training) 
      return (loss*weights).mean()
    

  def weighted_loss(self,log_p, hidden, x, labels, batch, epoch_weighted, is_test=False):
    total_weighted_epochs = self.epochs - self.warmup_epochs
    batch_size = x.size()[0] 
    #row_indexes = [i for i in range(self.batch_size)]
    with torch.no_grad():
      weights = torch.ones(batch_size, device= self.device)
      if self.model_name == 'MCdropout' or is_test:
        self.model.train()
        all_p =  torch.zeros((batch_size ,self.dropout_std_n_times), device= self.device)
        for i in range(self.dropout_std_n_times):
          log_p = self.model.forward_2(hidden)
          p = torch.exp(log_p)
          p_true_labels = torch.gather(p, 1, labels.view(-1,1))
          all_p[:,i] = p_true_labels.view(-1)
          weights = all_p.std(axis = 1)
        self.model.eval()
      elif self.model_name == 'entropy':
        weights = -torch.mul(log_p, torch.exp(log_p)).sum(axis = 1)
      elif self.model_name == 'SGD-WPV':
        self.sgd_spv_matrix.setdefault(batch, torch.zeros((batch_size ,total_weighted_epochs), device= self.device))
        # update_matrix
        p_true_labels = torch.gather(torch.exp(log_p), 1, labels.view(-1,1))
        self.sgd_spv_matrix[batch][:,epoch_weighted] = p_true_labels.view(-1)
        if epoch_weighted not in [0,1]:
          indexes = [i for i in range(epoch_weighted)]
          var = self.sgd_spv_matrix[batch][:,indexes].var(axis = 1)
          var +=  var.pow(2)/(len(indexes) -1)
          weights = var.sqrt()
      weights += self.eps
      return (weights)

  def calc_accuracy(self, log_ps, labels):
    self.model.eval()
    ps = torch.exp(log_ps)
    top_p, top_class = ps.topk(1, dim=1)
    equals = top_class == labels.view(*top_class.shape)
    acc = torch.mean(equals.type(torch.FloatTensor))
    predicted_lables = top_class.view(-1).cpu().tolist()
    ps_labels = torch.gather(ps, 1, labels.view(-1,1))
    ps_labels = ps_labels.view(-1).cpu().tolist()
    self.model.train()
    return acc, predicted_lables, ps_labels

  def save_checkpoint(self,loss, epoch):
    weights_path_baseline = Path(f"{self.model_weights_path}/baseline_{self.warmup_epochs}_seed_{self.seed}.pth")
    weights_path_model = Path(f"{self.model_weights_path}/{self.model_name}_{epoch}_seed_{self.seed}.pth")
    if (epoch == self.warmup_epochs and self.model_name == 'baseline')  and (not weights_path_baseline.exists()) and self.save_model: 
      print('saving_model: ')
      torch.save({'model_state_dict': self.model.state_dict(),
              'optimizer_state_dict': self.optimizer.state_dict(),'loss': loss}, weights_path_baseline)
    elif (epoch in self.save_points) and (not weights_path_model.exists()) and self.save_model:
      print('saving_model: ')
      torch.save({'model_state_dict': self.model.state_dict(),
              'optimizer_state_dict': self.optimizer.state_dict(),'loss': loss}, weights_path_model) 
      
  def load_checkpoint(self,weights_path,epoch):
    checkpoint = torch.load(weights_path)
    self.model.load_state_dict(checkpoint['model_state_dict'])
    self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    self.model.to(self.device)
    loss = checkpoint['loss']
    print(f"Uploaded weights succesfuly at epoch number {epoch}")
    return loss

# 4. Utils
Utility functions for the code

In [0]:
class Config:  
  def __init__(self, **kwargs):
    for key, value in kwargs.items():
      setattr(self, key, value)

  def add_attributes(self,**kwargs):
    for key, value in kwargs.items():
      setattr(self, key, value)

def create_directories(l):
  for directory_path in l:
    if not (isdir(directory_path)):
      mkdir(directory_path)

def get_train_test_loaders(batch_size):
  # Define a transform to normalize the data
  transform_train = transforms.Compose([
      transforms.RandomCrop(32, padding=4),
      transforms.RandomHorizontalFlip(),
      transforms.ToTensor(),
      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])

  transform_test = transforms.Compose([
      transforms.ToTensor(),
      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])
  
  # Download and load the training data
  trainset = datasets.CIFAR10(root='./data', download=True, train=True, transform=transform_train)
  # Download and load the test data
  testset = datasets.CIFAR10(root='./data', download=True, train=False, transform=transform_test)
  trainloader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle=True)
  testloader = torch.utils.data.DataLoader(testset, batch_size= batch_size, shuffle=True)
  return trainloader, testloader

# 5. Configurations 

In [0]:
ROOT_PATH = '/content/drive/My Drive/university/projects/DL/Project/project_204249239_308402163/experiments/cifar/resnet18/'
MODEL_WEIGHTS_DIR = 'model_weights'
GRAPHS_FOLDER_NAME = 'graphs'
PER_SAMPLE_RESULTS_DIR = 'per_samples_results'
model_weights_dir = f"{ROOT_PATH}{MODEL_WEIGHTS_DIR}" 
graphs_dir = f"{ROOT_PATH}{GRAPHS_FOLDER_NAME}" 
sample_results_dir = f"{ROOT_PATH}{PER_SAMPLE_RESULTS_DIR}" 
SAVE_FIGS = True
BATCH_SIZE = 128

def get_base_config():
  ####################################################################
  # model consistency options
  SAVE_TO_CHECKPOINTS = False # if ture, saves model.name_epcoch file into the weights folder
  LOAD_CHECKPOINTS = True # # if ture, every epoch tries to load pretrained weights
  ####################################################################
  # if needed, can be modified to upload the 'best model'
  return Config(lr = 0.1, 
                epochs = 170,
                warmup_epochs = 150,
                eps= 0.00001, 
                step_size=2, 
                gamma=0.001, 
                weight_decay=5e-4,
                dropout_std_n_times = 15,
                momentum = 0.9, 
                milestones=[150],
                save_points=[100,150,170],
                save_model = SAVE_TO_CHECKPOINTS,
                upload_model = LOAD_CHECKPOINTS,
                model_weights_path = model_weights_dir,
                batch_size = BATCH_SIZE)
EXPERIMENTS = ['baseline', 'SGD-WPV', 'MCdropout', 'entropy'] 

# 6. Run experiments

In [16]:
def run_exp(exp_name,seed):
  # create weights directory
  create_directories([graphs_dir,model_weights_dir,sample_results_dir])
  print(f"{'#'*50} \n running {exp_name} experiment\n{'#'*50} \n") 
  config.add_attributes(model_name = exp_name,seed=seed)
  trainer = Trainer(ResnetAll(),config)
  trainer.fit(trainloader, testloader, exp_name)
  convergence_results = trainer.get_results()
  per_sample_results = trainer.get_test_per_sample_predictions()
  return convergence_results, per_sample_results

config = get_base_config()
trainloader, testloader = get_train_test_loaders(BATCH_SIZE)
for seed in np.arange(5): 
  for exp in EXPERIMENTS:
    convergence_path = Path(f"{sample_results_dir}/convergence_results_{exp}_seed_{seed}.csv")
    per_sample_path = Path(f"{sample_results_dir}/per_sample_results_{exp}_seed_{seed}.csv")
    if not (convergence_path.exists() and per_sample_path.exists()):
      convergence_results, per_sample_results = run_exp(exp,seed)
      convergence_results.to_csv(convergence_path, index=False)
      per_sample_results.to_csv(per_sample_path, index=False)
    else:
      display(pd.read_csv(convergence_path).tail(1))

Files already downloaded and verified
Files already downloaded and verified


Unnamed: 0,baseline_train_accuracy,baseline_train_losss,baseline_test_loss,baseline_test_accuracy
19,tensor(0.9901),11.377878,17.383656,tensor(0.9416)


Unnamed: 0,SGD-WPV_train_accuracy,SGD-WPV_train_losss,SGD-WPV_test_loss,SGD-WPV_test_accuracy
19,tensor(0.9835),23.618552,12.937264,tensor(0.9450)


Unnamed: 0,MCdropout_train_accuracy,MCdropout_train_losss,MCdropout_test_loss,MCdropout_test_accuracy
19,tensor(0.9442),70.549129,18.177092,tensor(0.9235)


Unnamed: 0,entropy_train_accuracy,entropy_train_losss,entropy_test_loss,entropy_test_accuracy
19,tensor(0.9925),10.902049,15.31978,tensor(0.9436)


Unnamed: 0,baseline_train_accuracy,baseline_train_losss,baseline_test_loss,baseline_test_accuracy
19,tensor(0.9900),11.799405,19.720841,tensor(0.9350)


Unnamed: 0,SGD-WPV_train_accuracy,SGD-WPV_train_losss,SGD-WPV_test_loss,SGD-WPV_test_accuracy
19,tensor(0.9850),23.079705,13.71232,tensor(0.9430)


Unnamed: 0,MCdropout_train_accuracy,MCdropout_train_losss,MCdropout_test_loss,MCdropout_test_accuracy
19,tensor(0.9453),70.166269,19.107746,tensor(0.9189)


Unnamed: 0,entropy_train_accuracy,entropy_train_losss,entropy_test_loss,entropy_test_accuracy
19,tensor(0.9914),11.763421,15.719599,tensor(0.9437)


Unnamed: 0,baseline_train_accuracy,baseline_train_losss,baseline_test_loss,baseline_test_accuracy
19,tensor(0.8977),119.260359,31.054033,tensor(0.8751)


Unnamed: 0,SGD-WPV_train_accuracy,SGD-WPV_train_losss,SGD-WPV_test_loss,SGD-WPV_test_accuracy
19,tensor(0.8922),125.189,41.583127,tensor(0.8362)


Unnamed: 0,MCdropout_train_accuracy,MCdropout_train_losss,MCdropout_test_loss,MCdropout_test_accuracy
19,tensor(0.6746),382.887725,70.592286,tensor(0.6925)


Unnamed: 0,entropy_train_accuracy,entropy_train_losss,entropy_test_loss,entropy_test_accuracy
19,tensor(0.8840),135.927006,36.94471,tensor(0.8514)


Unnamed: 0,baseline_train_accuracy,baseline_train_losss,baseline_test_loss,baseline_test_accuracy
19,tensor(0.8969),119.294657,36.398441,tensor(0.8505)


Unnamed: 0,SGD-WPV_train_accuracy,SGD-WPV_train_losss,SGD-WPV_test_loss,SGD-WPV_test_accuracy
19,tensor(0.8899),125.685914,38.131341,tensor(0.8412)


Unnamed: 0,MCdropout_train_accuracy,MCdropout_train_losss,MCdropout_test_loss,MCdropout_test_accuracy
19,tensor(0.6959),361.926371,74.036198,tensor(0.6981)


Unnamed: 0,entropy_train_accuracy,entropy_train_losss,entropy_test_loss,entropy_test_accuracy
19,tensor(0.8831),134.302769,44.228288,tensor(0.8278)


Unnamed: 0,baseline_train_accuracy,baseline_train_losss,baseline_test_loss,baseline_test_accuracy
19,tensor(0.8971),118.01405,38.498119,tensor(0.8432)


Unnamed: 0,SGD-WPV_train_accuracy,SGD-WPV_train_losss,SGD-WPV_test_loss,SGD-WPV_test_accuracy
19,tensor(0.8931),123.9795,40.719777,tensor(0.8353)


Unnamed: 0,MCdropout_train_accuracy,MCdropout_train_losss,MCdropout_test_loss,MCdropout_test_accuracy
19,tensor(0.6604),397.741775,82.663507,tensor(0.6734)


Unnamed: 0,entropy_train_accuracy,entropy_train_losss,entropy_test_loss,entropy_test_accuracy
19,tensor(0.8859),134.673092,88.356617,tensor(0.7260)
