Downloanding third-party code for resnet model

In [1]:
!wget https://raw.githubusercontent.com/akamaster/pytorch_resnet_cifar10/d1872999394aa0c234e8d855e3c853eb061f7c06/resnet.py


--2020-03-15 18:25:49--  https://raw.githubusercontent.com/akamaster/pytorch_resnet_cifar10/d1872999394aa0c234e8d855e3c853eb061f7c06/resnet.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5001 (4.9K) [text/plain]
Saving to: ‘resnet.py’


2020-03-15 18:25:49 (38.9 MB/s) - ‘resnet.py’ saved [5001/5001]



Downloading/uploading with kaggle kernel

In [2]:
def download_model(source_name, saving_name):
  !cp "../input/cinic10-models/{source_name}" {saving_name}

def upload_model(source_name, saving_name):
    pass

def upload_logs():
    pass

In [3]:
!cp -r '../input/cinic10/train' train
!cp -r '../input/cinic10/test' test
!cp -r '../input/cinic10/valid' valid

Libs importing

In [4]:
import gc
import os
import datetime

import numpy as np
np.random.seed(42)

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils as utils
from torch.utils.data import Subset, ConcatDataset, DataLoader

from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
from sklearn.metrics import accuracy_score

from resnet import resnet20, BasicBlock, _weights_init, resnet32, resnet56, resnet44, ResNet

def resnet7():return ResNet(BasicBlock, [1, 1, 1])

Declaration of utils

In [5]:
class AverageMeter:
  """Class for computing average values
  """    
  def __init__(self):
    """Init class
    """      
    self.sum_ = 0
    self.count = 0
  
  def update(self, val, count=1):
    """Add new value to track
    
    Arguments:
        val {float} -- new value
    
    Keyword Arguments:
        count {int} -- weigth of val (default: {1})
    """      
    self.sum_ += val
    self.count += count

  def average(self):
    """return average value for given values
    """      
    return self.sum_ / self.count

In [6]:
class TensorBoardLogger:
    """Class for logging into TensorBoard
    """    
    def __init__(self, log_dir, dataset, net, experiment_name):
        """Init logger
        
        Arguments:
            log_dir {string} -- log dir
            dataset {string} -- name of dataset
            experiment_name {string} -- name of experiment
        """        
        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        log_dir_full = os.path.join(log_dir, dataset, net, experiment_name, current_time)
        self.writer = tf.summary.create_file_writer(log_dir_full)
        self.step_ = 0
        
    def log_scalar(self, tag, value, step=None, description=None):
        """Log scalar
        
        Arguments:
            tag {string} -- name of variable to log
            value {float} -- value of variable
        
        Keyword Arguments:
            step {int} -- current epoch number (default: {None})
            description {string} -- [description] (default: {None})
        """        
        if step is None:
            step = self.step_
        with self.writer.as_default():
            tf.summary.scalar(tag, value, step=step, description=description)
            
    def step(self):
        """Increase epoch number by 1
        """        
        self.step_+=1

    def log_hparams(self, hparams):
        """log hparams
        
        Arguments:
            hparams {dict} -- dict to log
        """      
        with self.writer.as_default():
            hp.hparams(hparams)
        

Dataset declaration

In [7]:
batch_size = 64
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
dataset_name = "cinic_aug_cached"


In [8]:
def collate_fn(batch):
  imgs = [i[0] for i in batch]
  labels = [i[1] for i in batch]
  return imgs, labels

In [9]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

# augmentation and normaliztion for training
transform_train = transforms.Compose(
    [
     transforms.RandomHorizontalFlip(p=0.5),
     transforms.RandomCrop(32, 4),
     transforms.ToTensor(),
     normalize
     ])

# only normalization for testing
transform_test = transforms.Compose(
    [
     transforms.ToTensor(),
     normalize
     ])


trainset = torchvision.datasets.ImageFolder(root='train',
                                            # transform=transform_train
                                            )
valset = torchvision.datasets.ImageFolder(root='valid', 
                                          # transform=transform_test
                                          )

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2,
                                          pin_memory=True, 
                                          collate_fn=collate_fn
                                          )
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                          shuffle=True, num_workers=2,
                                          pin_memory=True, 
                                          collate_fn=collate_fn
                                          )

testset = torchvision.datasets.ImageFolder(root='test', 
                                          #  transform=transform_test
                                           )
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2,
                                         pin_memory=True, 
                                         collate_fn=collate_fn
                                         )

Training functions definition

In [10]:
def accuracy_minibatch(outputs, labels):
  """Compute accuracy for batch
  
  Arguments:
      outputs {list or np.array or torch.Tensor} -- outputs from model (vectors of probabilities)
      labels {list or np.array or torch.Tensor} -- labels (one number for each sample)
  
  Returns:
      float -- accuracy for minibatch
  """  
  if isinstance(outputs, torch.Tensor):
    outputs = outputs.cpu().detach().numpy()
  if isinstance(labels, torch.Tensor):
    labels = labels.cpu().detach().numpy()
  
  predict_= np.argmax(outputs, axis=1)
  true_labels_= labels
  micro_acc_score = accuracy_score(predict_, true_labels_)
  return micro_acc_score

In [11]:
def validate(net, testloader, logger=None, verbose=True, prename="val",
             cuda=True,
             transform_tensor=transform_test,
             transform_repeats=1
             ):
  """Function for compute metrics on validation set
  
  Arguments:
      net {torch net} -- model
      testloader {DataLoader} -- set to validation
      
  Keyword Arguments:
      logger {TensorBoardLogger} -- logger (default: {None})
      compression_f {function} -- function to preprocess input (default: {None})
      verbose {bool} -- show metrics (default: {True})
      prename {string} -- prename to name of metric (default: {"val"})
  
  Returns:
      [floats] -- scores for computing metrics
  """  
  # change net to evaluation mode
  net.eval()
  ce_loss_avg = AverageMeter()
  accuracy_score_avg = AverageMeter()
  criterion = nn.CrossEntropyLoss()
  
  # evaluate dataset
  for i, data in enumerate(testloader, 0):
    inputs, labels = data
    labels = torch.tensor(labels)

    if cuda:
      labels = labels.cuda()

    inputs_ = [torch.stack([transform_tensor(j) for j in inputs]) for i in range(transform_repeats)]

    current_batch_size = len(labels)

    outputs = 0
    for i in range(transform_repeats):
        outputs += net(inputs_[i].cuda() if cuda else inputs_[i])
    outputs/=transform_repeats

    loss = criterion(outputs, labels).cpu().detach().numpy()
    
    micro_acc_score = accuracy_minibatch(outputs, labels)

    accuracy_score_avg.update(micro_acc_score*current_batch_size, current_batch_size)
    ce_loss_avg.update(loss.item()*current_batch_size, current_batch_size)

  accuracy = accuracy_score_avg.average()
  ce_loss = ce_loss_avg.average()
  scores = {
      "%s_accuracy"%prename: accuracy,
      "%s_overall_loss"%prename: ce_loss,
       }
  
  # log scores
  for name, score in scores.items():
    if logger:
      logger.log_scalar(name, score)
    if verbose:
      print(name, score)
  
  if verbose:
    print("__________________")
  # change net to training mode
  net.train()
  return scores

In [12]:
def train_distillation_cached(
    net_student, 
    trainloader, 
    testloader, 
    logger=None,
    cuda=True,
    epoches=150,
    verbose=True, 
    return_best=False,
    init_lr=0.1,
    temperature=1,
    cos_alpha=0,
    l_alpha=0,
    p=2,
    shuffle=True,
    patience=5,
    wd=1e-4,
    transform_tensor=transform_train
    ):
  """Training using knowledge distillation approach
  
  Arguments:
      net_student {torch model} -- student model
      trainloader {list} -- cached train set
      testloader {DataLoader} -- test set
  
  Keyword Arguments:
      logger {TensorBoardLogger} -- logger (default: {None})
      compression_f {function} -- function to preprocess input (default: {None})
      epoches {int} -- epochs to train (default: {150})
      verbose {bool} -- show metrics (default: {True})
      return_best {bool} -- return best model (default: {False})
      init_lr {float} -- initial learning rate (default: {0.1})
      temperature {int} -- temperature (default: {1})
      cos_alpha {float} -- coefficeint of combining distillation (KL) and cosine disimilarity loss (default: {0})
      shuffle {bool} -- shuffle dataset each epoch (default: {True})
      temperature {int} -- temperature (default: {1})
      l_alpha {float} -- coefficeint of L^p loss in distillation loss (default: {0.0})
      p {int} -- parametr for L^p loss (default: {2})
  
  Returns:
      torch model -- best or last model
  """    
  # change net to training mode
  net_student.train()
  net_teacher.eval()
  # use gpu to train
  net_student.cuda()

  criterion_ce = nn.CrossEntropyLoss().cuda()
  criterion_nll = nn.NLLLoss().cuda()
  criterion_bce = nn.BCELoss().cuda() 
  criterion_kl = nn.KLDivLoss(reduction="batchmean").cuda()
  cosine_similarity = nn.CosineSimilarity(dim=1, eps=1e-6)
  criterion_mse = nn.MSELoss()
  criterion_mae = nn.L1Loss()

  optimizer = torch.optim.SGD(
      net_student.parameters(), 
      lr=init_lr,
      momentum=0.9,
      weight_decay=wd
      )
  scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [80, 105, 125, 140])

  validation_scores = []
  os.makedirs("models", exist_ok=True)
  saving_path_template = "models/model_epoch%s.dms"

  for epoch in range(epoches):  # loop over the dataset multiple times
    saving_name = saving_path_template%epoch
    
    loss_avg = AverageMeter()
    accuracy_score_avg = AverageMeter()
    loss_kl_avg = AverageMeter()
    loss_cos_dis_avg = AverageMeter()
    loss_ce_avg = AverageMeter()

    if shuffle:
      np.random.shuffle(trainloader)

    for i, data in enumerate(trainloader, 1):
        # get the inputs; data is a list of [inputs, labels]
        inputs_pil, out_teacher, labels = data
        current_batch_size = len(out_teacher)

        inputs = torch.stack([transform_tensor(j) for j in inputs_pil])
        inputs, out_teacher = inputs.cuda(), out_teacher.cuda()

        if cuda:
          inputs, labels = inputs.cuda(), labels.cuda()
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        out_student = net_student(inputs)
        soft_log_probs = F.log_softmax(out_student / temperature, dim=1)
        soft_output = F.softmax(out_student / temperature, dim=1)
        soft_targets = out_teacher#F.softmax(out_teacher / temperature, dim=1)
        
        # loss_bce = criterion_bce(F.sigmoid(out_student), F.sigmoid(out_teacher))
        kl_loss = criterion_kl(soft_log_probs, soft_targets.detach())
        cos_dis_loss =  (1 - cosine_similarity(out_student - torch.mean(out_student, dim=1, keepdim=True), out_teacher.detach() - torch.mean(out_teacher.detach(), dim=1, keepdim=True))).mean()
        
        
        l_loss = (torch.abs(out_student - out_teacher)**p).mean()**(1/p)

        loss = (1 - cos_alpha - l_alpha) * kl_loss + cos_alpha * cos_dis_loss + l_alpha * l_loss
        loss.backward()
        optimizer.step()

        micro_acc_score = accuracy_minibatch(out_student, labels)

        loss_avg.update(loss.item()*current_batch_size, current_batch_size)
        accuracy_score_avg.update(micro_acc_score*current_batch_size, current_batch_size)
        loss_kl_avg.update(kl_loss.item()*current_batch_size, current_batch_size)
        loss_cos_dis_avg.update(cos_dis_loss.item()*current_batch_size, current_batch_size)
        
    if verbose:
        print(saving_name)
        print('overall loss {:.3}'.format(loss_avg.average()))
        print('current lr {:.3e}'.format(optimizer.param_groups[0]['lr']))
        print("__________________")
    # clear memory 
    gc.collect()
    torch.cuda.empty_cache()  
    
    # save scores to take best model in the future
    validation_score = validate(net_student, valloader, 
                                logger=logger, 
                                verbose=verbose, 
                                # compression_f=compression_f
                                )
    accuracy = validation_score['val_accuracy']
    validation_scores.append(accuracy)
    # save model
    torch.save(net_student.state_dict(), saving_name)

    if logger:
        logger.log_scalar("overall_loss", loss_avg.average())
        logger.log_scalar("accuracy", accuracy_score_avg.average())
        logger.log_scalar("kl_loss", loss_kl_avg.average())
        logger.log_scalar("cos_dis_loss", loss_cos_dis_avg.average())
        logger.step()
    # scheduler.step(loss_avg.average())
    scheduler.step()
    
  best_epoch = np.argmax(validation_scores)
  if return_best:
    choosen_epoch = best_epoch
  else:
    choosen_epoch = epoch
  if verbose:
    print("choosen epoch:", choosen_epoch, ", score:", validation_scores[choosen_epoch])
    print("best epoch:", best_epoch, ", score:", validation_scores[best_epoch])
  model_name = saving_path_template%choosen_epoch
  net_student.load_state_dict(torch.load(model_name))
  return net_student

In [13]:
def cache_loader(net_teacher, loader, cuda=True,
                 transform_tensor=transform_train,
                 transform_repeats=4
                 ):
  """Cache loader, to prevent computing teacher model output
  
  Arguments:
      net_teacher {torch model} -- teacher model
      loader {DataLoader} -- dataset to cache
  
  Keyword Arguments:
      cuda {bool} -- use cuda or not(default: {True})
      transform_tensor {func} -- image transformation (default: {transform_train})
      transform_repeats {int} -- amount of repeats (default: {int})
  
  Returns:
      List -- List of (inputs, teacher model outputs)
  """    
  net_teacher.eval()
  cached = []
  for inputs_pil, labels in loader:
    labels = torch.tensor(labels)
    if cuda:
      labels = labels.cuda()

    inputs_ = [torch.stack([transform_tensor(j) for j in inputs_pil]) for i in range(transform_repeats)]

    outputs = 0
    for i in range(transform_repeats):
        outputs += torch.softmax(net_teacher(inputs_[i].cuda() if cuda else inputs_[i]), dim=1)
    outputs/=transform_repeats

    cached.append([inputs_pil, outputs.cpu().detach(), labels.cpu().detach()])
  return cached

In [16]:
for i in range(1):
  for net_teacher, net_student, teacher_name, student_name in [
                    (resnet20(), resnet7(), "resnet20_classic.pt", ""), 
                    ]:

    epochs = 150
    download_model(teacher_name, teacher_name)
    state_dict_teacher = torch.load(teacher_name)
    net_teacher.cuda().load_state_dict(state_dict_teacher)
    
    loader_cached = cache_loader(net_teacher, trainloader, transform_repeats=8)

    experiment_name = teacher_name[:8]+"_kl_soft_aug.pt"
    logger = TensorBoardLogger("logs", dataset_name, "resnet7", experiment_name)
    net_student = train_distillation_cached(
                              net_student,
                              loader_cached,
                              valloader,
                              epoches=epochs, 
                              init_lr=1e-1, 
                              logger=logger,
                              return_best=True,
                              cos_alpha=0.0,
                              l_alpha=0.0,
                              p=2,
                              patience=5,
                              wd=1e-4,
                              temperature=1
                              )

    print("-----------------")
    print("*****************")
    print(experiment_name)
    score_test = validate(net_student, testloader, prename='test')
    score_val = validate(net_student, valloader, prename='val')
    score_train=validate(net_student, trainloader, prename='train')

    print("----------------")
    score_teacher_test = validate(net_teacher, testloader, prename='test')
    score_teacher_val = validate(net_teacher, valloader, prename='val')
    score_teacher_train = validate(net_teacher, trainloader, prename='train')
    print("----------------")

    hparams = {"experiment_name":experiment_name, "teacher":teacher_name, "dataset":dataset_name}
    for key, value in score_val.items():
      hparams["student/"+key] = value
    for key, value in score_test.items():
      hparams["student/"+key] = value
    for key, value in score_train.items():
      hparams["student/"+key] = value
    
    for key, value in score_teacher_test.items():
      hparams["teacher/"+key] = value
    for key, value in score_teacher_val.items():
      hparams["teacher/"+key] = value
    for key, value in score_teacher_train.items():
      hparams["teacher/"+key] = value

    logger.log_hparams(hparams) 

    torch.save(net_student.state_dict(), experiment_name)

    saving_path_template = "models/model_epoch%s.dms"
    logger.step_=0

models/model_epoch0.dms
overall loss 1.22
current lr 1.000e-01
__________________
val_accuracy 0.48054444444444444
val_overall_loss 1.4785727812449136
__________________
models/model_epoch1.dms
overall loss 0.928
current lr 1.000e-01
__________________
val_accuracy 0.5389888888888889
val_overall_loss 1.2933208312352498
__________________
models/model_epoch2.dms
overall loss 0.826
current lr 1.000e-01
__________________
val_accuracy 0.5961888888888889
val_overall_loss 1.1390091682857937
__________________
models/model_epoch3.dms
overall loss 0.751
current lr 1.000e-01
__________________
val_accuracy 0.6045555555555555
val_overall_loss 1.0903941322114732
__________________
models/model_epoch4.dms
overall loss 0.704
current lr 1.000e-01
__________________
val_accuracy 0.6401888888888889
val_overall_loss 1.020443856853909
__________________
models/model_epoch5.dms
overall loss 0.668
current lr 1.000e-01
__________________
val_accuracy 0.6532333333333333
val_overall_loss 0.9688951449712118


Saving logs and remove extra files for kaggle kernel

In [17]:
!tar -czf logs.tar.gz logs 

In [18]:
!rm -rf test train valid resnet.py models logs