## train VGG model with PAC-Bayes information bottleneck.

In [1]:
import numpy as np
import torch
import os
import pdb

from src.dataset import load_data
from src.utils import img_preprocess, setup_seed, predict, eval_metric, feature_map_size
from src.utils import train
from src.models import VGG
from src.pib_utils import train_pib

In [2]:
__data_set__ = 'cifar10'

__prior_ckpt__ = './checkpoints/{}/vgg_prior.pt'.format(__data_set__)
__save_ckpt__ = './checkpoints/{}/vgg_pib.pt'.format(__data_set__)

opt = {
    'num_epoch':100,
    'batch_size':32,
    'lr':1e-4, 
    'weight_decay':0,
    'beta':1e-1,
    'noise_scale':1e-10,
    'schedule': [50, 80],
    'early_stop': 10,
}
if not os.path.exists('./checkpoints/{}'.format(__data_set__)):
    os.makedirs('./checkpoints/{}'.format(__data_set__))

# set random seed
setup_seed(2020)

## load data & preprocess

In [3]:
x_tr, y_tr, x_va, y_va, x_te, y_te = load_data(__data_set__)

all_tr_idx = np.arange(len(x_tr))
num_class = np.unique(y_va).shape[0]

x_tr, y_tr = img_preprocess(x_tr, y_tr,)
x_va, y_va = img_preprocess(x_va, y_va,)
x_te, y_te = img_preprocess(x_te, y_te,)

load from CIFAR-10.


## train PIB-based VGG model

In [4]:
# load model
model = VGG(num_classes=num_class, dropout_rate=0.0, last_feature_map_size=feature_map_size(__data_set__))
model.cuda()

VGG(
  (extract_feature): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU()
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_feature

In [5]:
# get prior on the validation set
if os.path.exists(__prior_ckpt__):
    print("load prior.")
    model.load_state_dict(torch.load(__prior_ckpt__))
else:
    train(model, np.arange(len(y_va)), x_va, y_va, x_va, y_va, 10, 32, 5e-5, 0, __prior_ckpt__, 5)
w0_dict = dict()
for param in model.named_parameters():
    w0_dict[param[0]] = param[1].clone().detach() # detach but still on gpu
model.w0_dict = w0_dict
model._initialize_weights()
print("done get prior weights")

load prior.
initialize model weights.
done get prior weights


In [6]:
# start training model
info_dict, loss_acc_dict = train_pib(model, all_tr_idx,
    x_tr, y_tr, x_va, y_va, 
    num_epoch=opt['num_epoch'],
    batch_size=opt['batch_size'],
    lr=opt['lr'],
    weight_decay=opt['weight_decay'],
    beta=opt['beta'],
    early_stop_ckpt_path=__save_ckpt__,
    early_stop_tolerance=opt['early_stop'],
    noise_scale=opt['noise_scale'],
    schedule=opt['schedule'],
    )

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:1025.)
  p.data.add_(-group['lr'], d_p)


epoch: 0, info: {'extract_feature.0.weight': tensor(3.6269e-06, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(2.5803e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(9.1582e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(5.9072e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(7.7142e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(7.5887e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(9.1890e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(1.3563e-06, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(9.2734e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(9.8919e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 0, tr loss: 0.027935729106155428, lr: 0.0001, e_decay: 1.1113430446130224e-05


  1%|▉                                                                                         | 1/100 [00:18<29:47, 18.06s/it]

epoch: 0, va acc: 0.8075999617576599
epoch: 1, info: {'extract_feature.0.weight': tensor(1.0749e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(2.6885e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(6.0926e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(6.9039e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(4.3970e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(5.7491e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(8.2988e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(8.3694e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(6.8296e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(8.9229e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 1, tr loss: 0.016418349326268174, lr: 0.0001, e_decay: 5.9326

  2%|█▊                                                                                        | 2/100 [00:36<29:35, 18.12s/it]

epoch: 1, va acc: 0.8077999949455261
epoch: 2, info: {'extract_feature.0.weight': tensor(1.7195e-06, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(7.2301e-09, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(1.9345e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(1.1030e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(1.7715e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(3.6482e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(7.6794e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(7.6276e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(5.6456e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(4.5528e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 2, tr loss: 0.011348072461196668, lr: 0.0001, e_decay: 5.1229

  3%|██▋                                                                                       | 3/100 [00:54<29:21, 18.16s/it]

epoch: 2, va acc: 0.8079999685287476
epoch: 3, info: {'extract_feature.0.weight': tensor(8.0290e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(7.3828e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(7.1980e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(3.6335e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(3.1333e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(4.1059e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(5.3655e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(4.2526e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(3.8709e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(4.7218e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 3, tr loss: 0.008637504745757777, lr: 0.0001, e_decay: 5.1693

  4%|███▌                                                                                      | 4/100 [01:12<28:51, 18.03s/it]

epoch: 3, va acc: 0.8082000017166138
epoch: 4, info: {'extract_feature.0.weight': tensor(1.4442e-06, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(5.8814e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(9.8539e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(4.5269e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(2.1504e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(2.6243e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(3.7067e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(2.8635e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(2.7926e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(3.8497e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 4, tr loss: 0.00703760145727012, lr: 0.0001, e_decay: 4.38228

  5%|████▌                                                                                     | 5/100 [01:29<28:20, 17.90s/it]

epoch: 4, va acc: 0.8075999617576599
epoch: 5, info: {'extract_feature.0.weight': tensor(1.5320e-06, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(4.1105e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(1.6433e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(3.2256e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(1.9278e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(2.9345e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(2.4463e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(3.2504e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(2.3115e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(3.5107e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 5, tr loss: 0.005617788851103892, lr: 0.0001, e_decay: 4.0680

  6%|█████▍                                                                                    | 6/100 [01:47<27:53, 17.80s/it]

epoch: 5, va acc: 0.8079999685287476
epoch: 6, info: {'extract_feature.0.weight': tensor(7.4037e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(3.4044e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(9.5743e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(1.0320e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(1.3083e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(9.7484e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(1.4233e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(2.1150e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(1.5491e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(1.3699e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 6, tr loss: 0.0046768031552514725, lr: 0.0001, e_decay: 1.847

  7%|██████▎                                                                                   | 7/100 [02:05<27:38, 17.83s/it]

epoch: 6, va acc: 0.8082000017166138
epoch: 7, info: {'extract_feature.0.weight': tensor(1.0159e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(1.6380e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(6.6610e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(7.6061e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(7.1090e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(1.1268e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(1.3695e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(1.3392e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(1.4565e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(1.2916e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 7, tr loss: 0.003995783957970366, lr: 0.0001, e_decay: 9.9009

  8%|███████▏                                                                                  | 8/100 [02:23<27:36, 18.00s/it]

epoch: 7, va acc: 0.8083999752998352
epoch: 8, info: {'extract_feature.0.weight': tensor(1.0610e-06, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(1.4661e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(5.3935e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(7.7831e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(5.1174e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(8.9589e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(1.0874e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(1.5383e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(1.4675e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(1.2853e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 8, tr loss: 0.003457858437012232, lr: 0.0001, e_decay: 1.8860

  9%|████████                                                                                  | 9/100 [02:42<27:23, 18.06s/it]

epoch: 8, va acc: 0.8087999820709229
epoch: 9, info: {'extract_feature.0.weight': tensor(3.1340e-11, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(1.2467e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(9.9016e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(6.8468e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(5.3269e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(2.0001e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(1.1074e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(1.4707e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(1.3023e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(1.0829e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 9, tr loss: 0.0030516785294881074, lr: 0.0001, e_decay: 1.041

 10%|████████▉                                                                                | 10/100 [02:59<27:00, 18.01s/it]

epoch: 9, va acc: 0.8065999746322632
epoch: 10, info: {'extract_feature.0.weight': tensor(1.7619e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(7.9625e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(1.2105e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(9.8592e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(9.0488e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(4.7998e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(9.0422e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(9.3600e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(8.1775e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(9.0230e-08, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 10, tr loss: 0.0026741414986563303, lr: 0.0001, e_decay: 8.1

 11%|█████████▊                                                                               | 11/100 [03:18<26:46, 18.05s/it]

epoch: 10, va acc: 0.8068000078201294
epoch: 11, info: {'extract_feature.0.weight': tensor(5.0358e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(1.3053e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(7.6545e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(8.6383e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(6.4748e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(7.4786e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(6.3328e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(9.4890e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(6.9934e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(9.2716e-08, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 11, tr loss: 0.002352243481530861, lr: 0.0001, e_decay: 1.2

 12%|██████████▋                                                                              | 12/100 [03:36<26:26, 18.03s/it]

epoch: 11, va acc: 0.8047999739646912
epoch: 12, info: {'extract_feature.0.weight': tensor(1.0690e-06, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(1.9843e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(1.1100e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(1.3773e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(7.9274e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(8.1656e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(7.2513e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(1.1701e-07, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(9.6662e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(1.1362e-07, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 12, tr loss: 0.002109021629888209, lr: 0.0001, e_decay: 2.0

 13%|███████████▌                                                                             | 13/100 [03:53<26:05, 18.00s/it]

epoch: 12, va acc: 0.8069999814033508
epoch: 13, info: {'extract_feature.0.weight': tensor(2.4711e-09, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(3.9272e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(2.5752e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(5.7371e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(3.2324e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(5.5729e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(3.0857e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(7.6772e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(6.1762e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(5.9133e-08, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 13, tr loss: 0.0019043863307165315, lr: 0.0001, e_decay: 4.

 14%|████████████▍                                                                            | 14/100 [04:12<25:54, 18.08s/it]

epoch: 13, va acc: 0.8057999610900879
epoch: 14, info: {'extract_feature.0.weight': tensor(3.9987e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(1.6150e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(3.1180e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(3.1791e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(4.8599e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(3.9306e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(6.4064e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(5.0017e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(5.0270e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(4.6382e-08, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 14, tr loss: 0.0017193997835093222, lr: 0.0001, e_decay: 7.

 15%|█████████████▎                                                                           | 15/100 [04:30<25:35, 18.07s/it]

epoch: 14, va acc: 0.8051999807357788
epoch: 15, info: {'extract_feature.0.weight': tensor(1.6645e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(4.0766e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(3.3827e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(5.2978e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(1.5316e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(3.8095e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(6.6978e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(6.1674e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(6.4675e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(4.7283e-08, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 15, tr loss: 0.0015213592719182396, lr: 0.0001, e_decay: 5.

 16%|██████████████▏                                                                          | 16/100 [04:48<25:15, 18.05s/it]

epoch: 15, va acc: 0.8069999814033508
epoch: 16, info: {'extract_feature.0.weight': tensor(1.0852e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(6.3073e-09, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(1.5155e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(2.3850e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(1.8594e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(2.4377e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(2.1956e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(3.1862e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(2.6284e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(3.2692e-08, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 16, tr loss: 0.0014166381018200862, lr: 0.0001, e_decay: 2.

 17%|███████████████▏                                                                         | 17/100 [05:06<24:55, 18.02s/it]

epoch: 16, va acc: 0.8057999610900879
epoch: 17, info: {'extract_feature.0.weight': tensor(1.1708e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(3.3227e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(3.2608e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(2.8221e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(2.4811e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(2.0055e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(2.9486e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(4.0754e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(2.6790e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(3.0805e-08, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 17, tr loss: 0.0012668993128090184, lr: 0.0001, e_decay: 3.

 18%|████████████████                                                                         | 18/100 [05:24<24:37, 18.02s/it]

epoch: 17, va acc: 0.8057999610900879
epoch: 18, info: {'extract_feature.0.weight': tensor(1.8303e-07, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.2.weight': tensor(7.4612e-09, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.5.weight': tensor(1.5474e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.7.weight': tensor(1.7633e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.10.weight': tensor(2.4692e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.12.weight': tensor(2.2989e-08, device='cuda:0', grad_fn=<PowBackward0>), 'extract_feature.14.weight': tensor(2.0370e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.0.weight': tensor(5.2487e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.3.weight': tensor(2.9942e-08, device='cuda:0', grad_fn=<PowBackward0>), 'classifier.6.weight': tensor(2.6500e-08, device='cuda:0', grad_fn=<PowBackward0>)}
epoch: 18, tr loss: 0.0011806567544521713, lr: 0.0001, e_decay: 4.

 18%|████████████████                                                                         | 18/100 [05:42<25:58, 19.00s/it]

epoch: 18, va acc: 0.8068000078201294
early stop on epoch 18, val acc 0.8087999820709229





## bootstrap on test set

In [7]:
# do bootstrapping
stats = []
for i in range(10):
    # sample x_te
    sub_idx = np.random.choice(np.arange(len(x_te)), len(x_te), replace=True)
    x_te_sub, y_te_sub = x_te[sub_idx], y_te[sub_idx]
    pred_te = predict(model, x_te_sub)
    acc_te = eval_metric(pred_te, y_te_sub, num_class)
    stats.append(acc_te)
    print("test acc:", acc_te)

# compute confidence interveal 95%
alpha = 0.95
p = ((1-alpha)/2) * 100
lower = max(0, np.percentile(stats, p))
p = (alpha+((1.0-alpha)/2.0)) * 100
upper = min(1.0, np.percentile(stats, p))
print('%.1f confidence interval %.2f%% and %.2f%%' % (alpha*100, lower*100, upper*100))
print('average:', (upper+lower)/2)
print('interval:', (upper-lower)/2)

test acc: tensor(0.8002, device='cuda:0')
test acc: tensor(0.8009, device='cuda:0')
test acc: tensor(0.8128, device='cuda:0')
test acc: tensor(0.8011, device='cuda:0')
test acc: tensor(0.8118, device='cuda:0')
test acc: tensor(0.8077, device='cuda:0')
test acc: tensor(0.8032, device='cuda:0')
test acc: tensor(0.7972, device='cuda:0')
test acc: tensor(0.8063, device='cuda:0')
test acc: tensor(0.8019, device='cuda:0')
95.0 confidence interval 79.79% and 81.26%
average: tensor(0.8052, device='cuda:0')
interval: tensor(0.0073, device='cuda:0')
