In [2]:
!curl https://colab.chainer.org/install | sh -

!apt-get install graphviz
!pip install 'chaineripy'
!pip install 'chainercv'

import sys
import os
from functools import partial
import json

from chainer.links.caffe.caffe_function import CaffeFunction
from chainer.serializers import npz
from chainer.datasets import cifar, split_dataset_random, TransformDataset, tuple_dataset
from chainer.dataset.convert import concat_examples
from chainer import links as L
from chainer import initializers, serializers, iterators
from chainer.training import extensions

from google.colab import drive
import numpy as np



drive.mount('/content/drive')
base_dir = './drive/My Drive/study'
sys.path.append(base_dir)
caffemodel = CaffeFunction(base_dir + '/squeezenet/caffe/squeezenet_v1.1.caffemodel')
npz.save_npz('squeezenet_v1_1.npz', caffemodel, compression=False)
caffemodel = CaffeFunction(base_dir + '/squeezenet/caffe/squeezeNet_residual.caffemodel')
npz.save_npz('squeeze_resnet.npz', caffemodel, compression=False)

from resnet.networks.resnet101 import ResNet101
from resnet.networks.resnet152 import ResNet152
from squeezenet.networks.squeezenet import SqueezeNet
from squeezenet.networks.squeezenet_bn import SqueezeNetBN
from squeezenet.networks.squeeze_resnet import SqueezeResNet
from squeezenet.networks.squeeze_pre_resnet import SqueezePreResNet
from distill.knowledge_distill import DistillClassifier, softmax_cross_entropy_softlabel
from distill.utils import save_softlabels, generate_softlabel
from execute import run_train
from transform import transform, transform_with_softlabel

# Dataset setup
train_val, test = cifar.get_cifar10(scale=255.)
train_size      = int(len(train_val) * 0.9)
train, valid    = split_dataset_random(train_val, train_size, seed=0)
mean = np.mean([x for x, _ in train], axis=(0, 2, 3))
std  = np.std([x for x, _ in train], axis=(0, 2, 3))

params = json.load(open(base_dir + '/params.json', "r"))
distill_params = json.load(open(base_dir + '/distill/distill_params.json', "r"))
train_soft_path = base_dir + '/soft_labels.npy'
valid_soft_path = base_dir + '/soft_labels_valid.npy'

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  1580  100  1580    0     0  14766      0 --:--:-- --:--:-- --:--:-- 14766
+ apt -y -q install cuda-libraries-dev-10-0
Reading package lists...
Building dependency tree...
Reading state information...
cuda-libraries-dev-10-0 is already the newest version (10.0.130-1).
0 upgraded, 0 newly installed, 0 to remove and 8 not upgraded.
+ pip install -q cupy-cuda100  chainer 
+ set +ex
Installation succeeded!
Reading package lists... Done
Building dependency tree       
Reading state information... Done
graphviz is already the newest version (2.40.1-2).
0 upgraded, 0 newly installed, 0 to remove and 8 not upgraded.
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleuser

  'support %s layer' % (layer.name, layer.type))
  'support %s layer' % (layer.name, layer.type))
  'support %s layer' % (layer.name, layer.type))
Downloading from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz...


## Generating soft labels

In [0]:
teacher = ResNet101(10, pretrained_model=None)
serializers.load_npz(
    'drive/My Drive/study/result/snapshot_epoch-33',
    teacher, path='updater/model:main/predictor/')
teacher.to_gpu()
fun_generate_soft = partial(generate_softlabel, model=teacher, mean=mean, std=std, **params)

if not os.path.exists(train_soft_path):
    save_softlabels(train, fun_generate_soft, base_dir + '/soft_labels')
if not os.path.exists(valid_soft_path):
    save_softlabels(valid, fun_generate_soft, base_dir + '/soft_labels_valid')

## Main

In [0]:
img_t, lab_t = concat_examples(train)
soft_labels_t = np.load(base_dir + '/soft_labels.npy')
train_soft = tuple_dataset.TupleDataset(img_t, soft_labels_t, lab_t)

img_v, lab_v = concat_examples(valid)
soft_labels_v = np.load(base_dir + '/soft_labels_valid.npy')
valid_soft = tuple_dataset.TupleDataset(img_v, soft_labels_v, lab_v)

# DATA AUGMENTATION
transform_train = partial(transform_with_softlabel, mean=mean, std=std, train=True, **params)
transform_valid = partial(transform_with_softlabel, mean=mean, std=std, train=False, **params)
processed_train = TransformDataset(train_soft, transform_train)
processed_valid = TransformDataset(valid_soft, transform_valid)

train_iter = iterators.SerialIterator(processed_train, params['batchsize'])
valid_iter = iterators.SerialIterator(processed_valid, params['batchsize'], repeat=False, shuffle=False)

# MODEL DEFINITION AND FINETUNING
student = SqueezeNet(10, init_param=initializers.HeNormal())
serializers.load_npz(
    'drive/My Drive/study/result/snapshot_epoch-40',
    student, path='updater/model:main/predictor/')

# DISTILL
alpha = distill_params['alpha']
t = distill_params['temperature']
net = DistillClassifier(student, lossfun_soft=softmax_cross_entropy_softlabel, alpha=alpha, t=t)

# Evaluator
evaluator = extensions.Evaluator(valid_iter, net, device=params['gpu_id'])

# RUN!
params['save_trainer_interval'] = 6
params['batchsize']     = 128
params['result_dir']    = 'drive/My Drive/study/result'
run(train_iter, net, evaluator, **params)

epoch       main/loss   main/accuracy  val/main/loss  val/main/accuracy  lr          elapsed_time
[J1           24.8915     0.139138       23.1447        0.193359           0.001       175.15        
[J2           21.9046     0.2504         18.4107        0.345508           0.001       351.456       
[J3           18.8533     0.350739       15.8025        0.45625            0.001       527.89        
[J4           15.4474     0.486728       12.8443        0.574023           0.001       705.171       
[J5           12.3497     0.609731       10.6911        0.674609           0.001       882.474       
[J6           9.55761     0.711648       7.22654        0.786914           0.001       1060.41       
[J7           7.53711     0.780783       5.84999        0.838086           0.001       1238.89       
[J8           6.56787     0.81068        5.3367         0.857813           0.001       1416.59       
[J9           6.03087     0.826682       4.80809        0.874219           0.