Downloanding third-party code for resnet model

In [0]:
!wget https://raw.githubusercontent.com/akamaster/pytorch_resnet_cifar10/d1872999394aa0c234e8d855e3c853eb061f7c06/resnet.py


--2020-02-08 09:17:35--  https://raw.githubusercontent.com/akamaster/pytorch_resnet_cifar10/d1872999394aa0c234e8d855e3c853eb061f7c06/resnet.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5001 (4.9K) [text/plain]
Saving to: ‘resnet.py’


2020-02-08 09:17:36 (59.4 MB/s) - ‘resnet.py’ saved [5001/5001]



Downloading/uploading with kaggle kernel

In [0]:
!mkdir -p models

# upload model to google drive
def upload_model(source_name, saving_name):
  !cp {source_name} "models/{saving_name}" 

def upload_logs():
    pass

In [None]:
!cp -r '../input/cinic10/train' train
!cp -r '../input/cinic10/test' test
!cp -r '../input/cinic10/valid' valid

Libs importing

In [0]:
import gc
import os
import datetime

import numpy as np
np.random.seed(42)

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils as utils
from torch.utils.data import Subset, ConcatDataset, DataLoader

from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import accuracy_score

from resnet import resnet20, BasicBlock, _weights_init, resnet32, resnet56, resnet44, ResNet

def resnet7():return ResNet(BasicBlock, [1, 1, 1])

Declaration of utils

In [0]:
class AverageMeter:
  """Class for computing average values
  """    
  def __init__(self):
    """Init class
    """      
    self.sum_ = 0
    self.count = 0
  
  def update(self, val, count=1):
    """Add new value to track
    
    Arguments:
        val {float} -- new value
    
    Keyword Arguments:
        count {int} -- weigth of val (default: {1})
    """      
    self.sum_ += val
    self.count += count

  def average(self):
    """return average value for given values
    """      
    return self.sum_ / self.count

In [0]:
class TensorBoardLogger:
    """Class for logging into TensorBoard
    """    
    def __init__(self, log_dir, dataset, experiment_name):
        """Init logger
        
        Arguments:
            log_dir {string} -- log dir
            dataset {string} -- name of dataset
            experiment_name {string} -- name of experiment
        """        
        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        log_dir_full = os.path.join(log_dir, dataset, current_time, experiment_name)
        self.writer = tf.summary.create_file_writer(log_dir_full)
        self.step_ = 0
        
    def log_scalar(self, tag, value, step=None, description=None):
        """Log scalar
        
        Arguments:
            tag {string} -- name of variable to log
            value {float} -- value of variable
        
        Keyword Arguments:
            step {int} -- current epoch number (default: {None})
            description {string} -- [description] (default: {None})
        """        
        if step is None:
            step = self.step_
        with self.writer.as_default():
            tf.summary.scalar(tag, value, step=step, description=description)
            
    def step(self):
        """Increase epoch number by 1
        """        
        self.step_+=1
        

Dataset declaration

In [0]:
batch_size = 64
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')



In [0]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

# augmentation and normaliztion for training
transform_train = transforms.Compose(
    [
     transforms.RandomHorizontalFlip(p=0.5),
     transforms.RandomCrop(32, 4),
     transforms.ToTensor(),
     normalize
     ])

# only normalization for testing
transform_test = transforms.Compose(
    [
     transforms.ToTensor(),
     normalize
     ])


trainset = torchvision.datasets.ImageFolder(root='train', transform=transform_train)
valset = torchvision.datasets.ImageFolder(root='valid', transform=transform_test)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2,
                                          pin_memory=True, 
                                          )
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                          shuffle=True, num_workers=2,
                                          pin_memory=True, 
                                          )

testset = torchvision.datasets.ImageFolder(root='test', transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2,
                                         pin_memory=True, )

In [0]:
len(testloader), len(valloader), len(trainloader)

(1407, 1407, 1407)

Training functions definition

In [0]:
def accuracy_minibatch(outputs, labels):
  if isinstance(outputs, torch.Tensor):
    outputs = outputs.cpu().detach().numpy()
  if isinstance(labels, torch.Tensor):
    labels = labels.cpu().detach().numpy()
  
  predict_= np.argmax(outputs, axis=1)
  true_labels_= labels
  micro_acc_score = accuracy_score(predict_, true_labels_)
  return micro_acc_score

In [0]:
def validate(net, testloader, logger=None, compression_f=None, verbose=True, prename="val"):
  """Function for compute metrics on validation set
  
  Arguments:
      net {torch net} -- model
      testloader {DataLoader} -- set to validation
      
  Keyword Arguments:
      logger {TensorBoardLogger} -- logger (default: {None})
      compression_f {function} -- function to preprocess input (default: {None})
      verbose {bool} -- show metrics (default: {True})
      prename {string} -- prename to name of metric (default: {"val"})
  
  Returns:
      [floats] -- scores for computing metrics
  """  
  # change net to evaluation mode
  net.eval()
  ce_loss_avg = AverageMeter()
  accuracy_score_avg = AverageMeter()
  criterion = nn.CrossEntropyLoss()
  
  # evaluate dataset
  for i, data in enumerate(testloader, 0):
    inputs, labels = data
    inputs, labels = inputs.cuda(), labels.cuda()
    current_batch_size = len(labels)

    if compression_f is not None:
      inputs = compression_f(inputs)
    outputs = net(inputs)
    loss = criterion(outputs, labels).cpu().detach().numpy()
    
    micro_acc_score = accuracy_minibatch(outputs, labels)

    accuracy_score_avg.update(micro_acc_score*current_batch_size, current_batch_size)
    ce_loss_avg.update(loss.item()*current_batch_size, current_batch_size)

  accuracy = accuracy_score_avg.average()
  ce_loss = ce_loss_avg.average()
  scores = {"%s_accuracy"%prename:accuracy, "%s_ce_loss"%prename:ce_loss}
  
  # log scores
  for name, score in scores.items():
    if logger:
      logger.log_scalar(name, score)
    if verbose:
      print(name, score)
  
  if verbose:
    print("__________________")
  # change net to training mode
  net.train()
  return scores

In [0]:
def train_classic_approach(
    net, 
    trainloader, 
    testloader, 
    epoches=300,
    compression_f=None, 
    verbose=True, 
    return_best=True,
    logger=None,
    init_lr=0.1
    ):
  """Training using classic approach
  
  Arguments:
      net {torch model} -- model
      trainloader {DataLoader} -- train set
      testloader {DataLoader} -- test set
  
  Keyword Arguments:
      epoches {int} -- epocs to train (default: {300})
      compression_f {[type]} -- function to preprocess input (default: {None})
      verbose {bool} -- show metrics (default: {True})
      return_best {bool} -- return best model (default: {True})
      logger {TensorBoardLogger} -- logger (default: {None})
      init_lr {float} -- initial learning rate (default: {0.1})
  
  Returns:
      torch model -- last or best model
  """  
  # change net to training mode
  net.train()
  # use gpu to train
  net.cuda()
  # loss of training
  criterion = nn.CrossEntropyLoss()
  
  optimizer = torch.optim.SGD(
      net.parameters(), 
      lr=init_lr,
      momentum=0.9,
      weight_decay=1e-4)
  scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, threshold=0.001, patience=4)

  validation_scores = []
  
  os.makedirs("models", exist_ok=True)
  saving_path_template = "models/model_epoch%s.dms"

  for epoch in range(epoches):  # loop over the dataset multiple times
      saving_name = saving_path_template%epoch
      
      loss_avg = AverageMeter()
      accuracy_score_avg = AverageMeter()

      for i, data in enumerate(trainloader, 1):
          # get the inputs; data is a list of [inputs, labels]
          inputs, labels = data
          current_batch_size = len(labels)
          if compression_f:
            inputs = compression_f(inputs)
          inputs, labels = inputs.cuda(), labels.cuda()

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = net(inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          micro_acc_score = accuracy_minibatch(outputs, labels)

          loss_avg.update(loss.item()*current_batch_size, current_batch_size)
          accuracy_score_avg.update(micro_acc_score*current_batch_size, current_batch_size)

      if verbose:
        print(saving_name)
        print('loss {:.3}'.format(loss_avg.average()))
        print('current lr {:.3e}'.format(optimizer.param_groups[0]['lr']))
        print("__________________")
      
      # clear memory 
      gc.collect()
      torch.cuda.empty_cache()  
      
      validation_score = validate(net, valloader, logger=logger, verbose=verbose, compression_f=compression_f)
      accuracy = validation_score['val_accuracy']
      
      validation_scores.append(accuracy)
      torch.save(net.state_dict(), saving_name)
      
      if logger:
        logger.log_scalar("ce_loss", loss_avg.average())
        logger.log_scalar("accuracy", accuracy_score_avg.average())
        logger.step()
      scheduler.step(loss_avg.average())

  best_epoch = np.argmax(validation_scores)
  if return_best:
    choosen_epoch = best_epoch
  else:
    choosen_epoch = epoch
  if verbose:
    print("choosen epoch:", choosen_epoch, ",", "score:", validation_scores[choosen_epoch])
    print("best epoch:", best_epoch, ",", "score:", validation_scores[best_epoch])
  model_name = saving_path_template%choosen_epoch
  net.load_state_dict(torch.load(model_name))
  return net

In [0]:
for net, name in [
    (resnet20(), "resnet20_classic.pt"), 
    (resnet32(), "resnet32_classic.pt"), 
]:
  logger = TensorBoardLogger("logs", "cinic10", name)
  net = train_classic_approach(net, trainloader, valloader, logger=logger, return_best=True, epoches=150)
  print("-----------------")
  print(name)
  validate(net, testloader, prename="test")
  validate(net, valloader, prename="val")
  validate(net, trainloader, prename="train")
  print("----------------")
  torch.save(net.state_dict(),name)
  upload_model(name, name)
  upload_logs()

models/model_epoch0.dms
loss 1.66
current lr 1.000e-01
__________________
val_accuracy 0.4469666666666667
val_ce_loss 1.5165933913760714
__________________
models/model_epoch1.dms
loss 1.31
current lr 1.000e-01
__________________
val_accuracy 0.49194444444444446
val_ce_loss 1.4608890915764703
__________________
models/model_epoch2.dms
loss 1.16
current lr 1.000e-01
__________________
val_accuracy 0.6122555555555556
val_ce_loss 1.1029332371817695
__________________
models/model_epoch3.dms
loss 1.07
current lr 1.000e-01
__________________
val_accuracy 0.6263222222222222
val_ce_loss 1.0366073227140638
__________________
models/model_epoch4.dms
loss 1.01
current lr 1.000e-01
__________________
val_accuracy 0.5937666666666667
val_ce_loss 1.2511868357552423
__________________
models/model_epoch5.dms
loss 0.965
current lr 1.000e-01
__________________
val_accuracy 0.6193666666666666
val_ce_loss 1.1084816803826225
__________________
models/model_epoch6.dms
loss 0.937
current lr 1.000e-01
______

Saving logs and remove extra files for kaggle kernel

In [0]:
!tar -czf logs.tar.gz logs 

In [0]:
!rm -rf test train valid resnet.py models logs