### Or Wolkimir (308402163) and Afek Adler (204249239) 


In [10]:
import numpy as np
!pip install tensorboardcolab
import torch
from torchvision import datasets, transforms
import helper
from torch import nn, optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import summary
import datetime
import pandas as pd
from collections import OrderedDict
from os import mkdir
from os.path import isdir
from glob import glob
from pathlib import Path
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
from glob import glob



In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 2. Model


In [0]:
class LeNet5(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5,padding=2)
    self.max_pool_1 = nn.MaxPool2d(kernel_size=2)
    self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
    self.max_pool_2 = nn.MaxPool2d(kernel_size=2)  
    self.fc1 = nn.Linear(16*5*5, 120) 
    self.fc2 = nn.Linear(120, 84) 
    self.fc3 = nn.Linear(84, 10) 
    self.softmax = nn.LogSoftmax(dim=1)
    self.dropout1 = nn.Dropout(0.1)
    self.dropout2 = nn.Dropout(0.1)


  def forward_1(self, x):
    x = F.relu(self.conv1(x))  
    x = self.max_pool_1(x) 
    x = F.relu(self.conv2(x))
    x = self.max_pool_2(x)
    x = x.view(-1, 16*5*5)
    x = F.relu(self.fc1(x))
    return x
  
  def forward_2(self,x):
    x = self.dropout1(x)
    x = F.relu(self.fc2(x))
    x = self.dropout1(x)
    x = self.fc3(x)
    return self.softmax(x)


  def forward(self, x):
    x1 = self.forward_1(x)
    x2 = self.forward_2(x1)
    return x2, x1 

# 3. Trainer

In [0]:
class Trainer:
  def __init__(self,model, config):
    self.model = model
    self.model_name = config.model_name
    self.seed = config.seed
    self.lr = config.lr
    self.epochs = config.epochs
    self.warmup_epochs = config.warmup_epochs
    self.save_model = config.save_model
    self.upload_model = config.upload_model
    self.model_weights_path = config.model_weights_path
    self.batch_size =  config.batch_size
    self.dropout_std_n_times = config.dropout_std_n_times
    self.momentum = config.momentum
    self.optimizer = optim.Adam(self.model.parameters(), lr = self.lr) #optim.SGD(self.model.parameters(), lr = self.lr, weight_decay = config.weight_decay, momentum=self.momentum)
    torch.manual_seed(self.seed)
    self.criterion = nn.NLLLoss(reduction='none')
    self.results = {}
    self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    self.sgd_spv_matrix = {}
    self.eps = config.eps
    if torch.cuda.is_available():
      self.model.to(self.device)

  def get_results(self):
    return pd.DataFrame.from_dict(self.results)

  def get_test_per_sample_predictions(self):
    return self.per_sample_prediction

  def record(self,epoch,**kwargs):
    epoch = "{:02d}".format(epoch)
    temp = f"| epoch   # {epoch} :"
    for key, value in kwargs.items():
      key = f"{self.model_name}_{key}"
      if not self.results.get(key):
        self.results[key] =[]
      self.results[key].append(value)
      val = '{:.4f}'.format(np.round(value,4))
      temp += f"{key} : {val}      |       "
    print(temp)


  # def fit(self,trainloader, testloader, exp_name): 
  #   train_accu, test_accu, output_plot = [],[],[]
  #   for epoch in range(1,self.epochs+1):
  #     weights_path = Path(f"{self.model_weights_path}/{config.model_name}_{epoch}.pth")
  #     if weights_path.exists() and self.upload_model:
  #       epoch_train_accuracy = self.load_checkpoint(weights_path,epoch)
  #     else:
  #       epoch_train_accuracy, train_loss = self.run_epoch(epoch)
  #     epoch_test_accuracy, test_loss = self.test(testloader,epoch)
  #     self.record(epoch,train_accuracy = epoch_train_accuracy,
  #                 train_losss = train_loss,
  #                 test_loss = test_loss,
  #                 test_accuracy = epoch_test_accuracy)
  #     self.save_checkpoint(weights_path,epoch_train_accuracy)

  def fit(self,trainloader, testloader, exp_name): 
    train_accu, test_accu, output_plot = [],[],[]
    weights_path = Path(f"{self.model_weights_path}/baseline_{self.warmup_epochs}_seed_{self.seed}.pth")
    epoch = 1
    if weights_path.exists() and self.upload_model:
      epoch_train_accuracy = self.load_checkpoint(weights_path,epoch)
      print('uplaoded model succesfuly')
      epoch += self.warmup_epochs 
    starting_epoch =  self.warmup_epochs -1 if (weights_path.exists() and self.upload_model) else -1
    self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[10,20,30], gamma=0.5,last_epoch = starting_epoch) #optim.lr_scheduler.StepLR(self.optimizer, step_size=config.step_size, gamma=config.gamma)
    while epoch <= self.epochs:
      epoch_train_accuracy, train_loss = self.run_epoch(epoch)
      epoch_test_accuracy, test_loss = self.test(testloader,epoch)
      self.record(epoch,train_accuracy = epoch_train_accuracy,
                  train_losss = train_loss,
                  test_loss = test_loss,
                  test_accuracy = epoch_test_accuracy)
      self.save_checkpoint(weights_path,epoch_train_accuracy, epoch)
      self.scheduler.step()
      epoch += 1

  def run_epoch(self, epoch):
    self.model.train()
    train_loss_unweighted , train_accuracy= 0, 0
    for i , (images, labels) in enumerate(trainloader):
      images = images.to(self.device)
      labels = labels.to(self.device)
      log_ps, hidden = self.model(images)
      accuracy, _ , __ = self.calc_accuracy(log_ps,labels)
      loss = self.get_weighted_loss(log_ps,hidden, images, labels, i, epoch) 
      self.optimizer.zero_grad()
      loss.backward()
      self.optimizer.step()
      train_loss_unweighted += self.criterion(log_ps, labels).mean().item()
      train_accuracy += accuracy
    epoch_train_accuracy = train_accuracy/len(trainloader)
    return epoch_train_accuracy,train_loss_unweighted

  def test(self, test_loader, epoch):
    self.model.eval()
    test_loss,test_accuracy = 0,0
    true_labels ,predicted_labels, softmax_p , variance = [], [],[],[]
    with torch.no_grad():
      for i , (images, labels) in enumerate(test_loader):
        images = images.to(self.device)
        labels = labels.to(self.device)
        log_ps, hidden = self.model(images)
        acc, predicted_l, ps_labels = self.calc_accuracy(log_ps,labels)
        test_accuracy+=acc
        test_loss += self.criterion(log_ps, labels).mean().item()
        if epoch==self.epochs: 
          predicted_labels += predicted_l
          true_labels += labels.cpu().tolist()
          softmax_p += ps_labels 
          variance += self.weighted_loss(log_ps, hidden, images, labels, i, epoch, True).view(-1).cpu().tolist()
    if epoch==self.epochs:
      colnames = ['true_label','p_true_label', 'variance', 'predicted_label'] 
      per_sample_prediction = pd.DataFrame(np.array([true_labels,softmax_p,variance,predicted_labels]).T, columns = colnames)
      self.per_sample_prediction = per_sample_prediction
    return test_accuracy/len(test_loader), test_loss

  def get_weighted_loss(self,log_ps,hidden, x, y, batch, epoch):
    loss = self.criterion(log_ps, y)
    if (epoch <= self.warmup_epochs) or self.model_name == 'baseline':
      return loss.mean()
    else:
      epoch_of_weighted_training = epoch - (self.warmup_epochs + 1) 
      weights = self.weighted_loss(log_ps,hidden,x, y,batch, epoch_of_weighted_training) 
      return (loss*weights).mean()
    

  def weighted_loss(self,log_p, hidden, x, labels, batch, epoch_weighted, is_test=False):
    total_weighted_epochs = self.epochs - self.warmup_epochs
    batch_size = x.size()[0] 
    #row_indexes = [i for i in range(self.batch_size)]
    with torch.no_grad():
      weights = torch.ones(batch_size, device= self.device)
      if self.model_name == 'MCdropout' or is_test:
        self.model.train()
        all_p =  torch.zeros((batch_size ,self.dropout_std_n_times), device= self.device)
        for i in range(self.dropout_std_n_times):
          log_p = self.model.forward_2(hidden)
          p = torch.exp(log_p)
          p_true_labels = torch.gather(p, 1, labels.view(-1,1))
          all_p[:,i] = p_true_labels.view(-1)
          weights = all_p.std(axis = 1)
        self.model.eval()
      elif self.model_name == 'entropy':
        weights = -torch.mul(log_p, torch.exp(log_p)).sum(axis = 1)
      elif self.model_name == 'SGD-WPV':
        self.sgd_spv_matrix.setdefault(batch, torch.zeros((batch_size ,total_weighted_epochs), device= self.device))
        # update_matrix
        p_true_labels = torch.gather(torch.exp(log_p), 1, labels.view(-1,1))
        self.sgd_spv_matrix[batch][:,epoch_weighted] = p_true_labels.view(-1)
        if epoch_weighted not in [0,1]:
          indexes = [i for i in range(epoch_weighted)]
          var = self.sgd_spv_matrix[batch][:,indexes].var(axis = 1)
          var +=  var.pow(2)/(len(indexes) -1)
          weights = var.sqrt()
      weights += self.eps
      return (weights)

  def calc_accuracy(self, log_ps, labels):
    self.model.eval()
    ps = torch.exp(log_ps)
    top_p, top_class = ps.topk(1, dim=1)
    equals = top_class == labels.view(*top_class.shape)
    acc = torch.mean(equals.type(torch.FloatTensor))
    predicted_lables = top_class.view(-1).cpu().tolist()
    ps_labels = torch.gather(ps, 1, labels.view(-1,1))
    ps_labels = ps_labels.view(-1).cpu().tolist()
    self.model.train()
    return acc, predicted_lables, ps_labels


  def save_checkpoint(self,weights_path,loss, epoch):
    if (epoch == self.warmup_epochs and self.model_name == 'baseline')  and (not weights_path.exists()) and self.save_model: 
      print('saving_model: ')
      torch.save({'model_state_dict': self.model.state_dict(),
              'optimizer_state_dict': self.optimizer.state_dict(),'loss': loss}, weights_path)
      
  def load_checkpoint(self,weights_path,epoch):
    checkpoint = torch.load(weights_path)
    self.model.load_state_dict(checkpoint['model_state_dict'])
    self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    self.model.to(self.device)
    loss = checkpoint['loss']
    # print(f"Uploaded weights succesfuly at epoch number {epoch}")
    return loss

# 4. Utils
Utility functions for the code

In [0]:
class Config:  
  def __init__(self, **kwargs):
    for key, value in kwargs.items():
      setattr(self, key, value)

  def add_attributes(self,**kwargs):
    for key, value in kwargs.items():
      setattr(self, key, value)

def create_directories(l):
  for directory_path in l:
    if not (isdir(directory_path)):
      mkdir(directory_path)

def get_train_test_loaders(batch_size):
  # Define a transform to normalize the data
  # transform = transforms.Compose([transforms.ToTensor(),
  #                               transforms.Normalize((0.1307,), (0.3081,))])
  transform = transforms.Compose([
    transforms.ToTensor()])
  # Download and load the training data
  trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
  # Download and load the test data
  testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
  trainloader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle=True)
  testloader = torch.utils.data.DataLoader(testset, batch_size= batch_size, shuffle=True)
  return trainloader, testloader


# 5. Configurations 

In [0]:
ROOT_PATH = '/content/drive/My Drive/DL/Project/experiments/fminst/'
MODEL_WEIGHTS_DIR = 'model_weights'
GRAPHS_FOLDER_NAME = 'graphs'
PER_SAMPLE_RESULTS_DIR = 'per_samples_results'
model_weights_dir = f"{ROOT_PATH}{MODEL_WEIGHTS_DIR}" 
graphs_dir = f"{ROOT_PATH}{GRAPHS_FOLDER_NAME}" 
sample_results_dir = f"{ROOT_PATH}{PER_SAMPLE_RESULTS_DIR}" 
SAVE_FIGS = True
BATCH_SIZE = 32

def get_base_config():
  ####################################################################
  # model consistency options
  SAVE_TO_CHECKPOINTS = True # if ture, saves model.name_epcoch file into the weights folder
  LOAD_CHECKPOINTS = True # # if ture, every epoch tries to load pretrained weights
  ####################################################################
  # if needed, can be modified to upload the 'best model'
  return Config(lr = 1, 
                epochs = 150,
                warmup_epochs = 100,
                eps= 0.00001, 
                # step_size=1, 
                # gamma=0.95, 
                # weight_decay=0.005,
                dropout_std_n_times = 15,
                momentum = 0.9, 
                save_model = SAVE_TO_CHECKPOINTS,
                upload_model = LOAD_CHECKPOINTS,
                model_weights_path = model_weights_dir,
                batch_size = BATCH_SIZE)
EXPERIMENTS = ['baseline','SGD-WPV','MCdropout','entropy'] # ['MCdropout']# 


# 6. Run experiments

In [16]:
def run_exp(exp_name,seed):
  # create weights directory
  create_directories([graphs_dir,model_weights_dir,sample_results_dir])
  print(f"{'#'*50} \n running {exp_name} experiment\n{'#'*50} \n") 
  config.add_attributes(model_name = exp_name,seed=seed)
  trainer = Trainer(LeNet5(),config)
  trainer.fit(trainloader, testloader, exp_name)
  convergence_results = trainer.get_results()
  per_sample_results = trainer.get_test_per_sample_predictions()
  return convergence_results, per_sample_results

config = get_base_config()
trainloader, testloader = get_train_test_loaders(BATCH_SIZE)
for seed in np.arange(10): 
  for exp in EXPERIMENTS:
    convergence_path = Path(f"{sample_results_dir}/convergence_results_{exp}_seed_{seed}.csv")
    per_sample_path = Path(f"{sample_results_dir}/per_sample_results_{exp}_seed_{seed}.csv")
    if not (convergence_path.exists() and per_sample_path.exists()):
      convergence_results, per_sample_results = run_exp(exp,seed)
      convergence_results.to_csv(convergence_path, index=False)
      per_sample_results.to_csv(per_sample_path, index=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw
Processing...
Done!
