## Imports

In [1]:
import sys
import os
import argparse
import time
import random
import math
import numpy as np
from scipy.special import logsumexp

import torch
import torchvision
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as tfs

from tensorboardX import SummaryWriter


from utils import kNN, AverageMeter, py_softmax

In [2]:
# model_name = "VGG"
# magic_dim = 2048

model_name = "ResNet"
magic_dim = 512

In [3]:
dataset_name = "PenDigits/PenDigits"

## Training parameters

In [4]:
#data
datadir = "/root/data/Multivariate_ts"

# optimization
lamb = 1      # SK lambda-parameter
nopts = 400    # number of SK-optimizations
epochs = 400   # numbers of epochs
momentum = 0.9 # sgd momentum
exp = './resnet1d_exp' # experiments results dir


# other
devc='0'  # cuda device
batch_size = 500
lr=0.003     #learning rate
alr=0.003    #starting learning rate

knn_dim = 20
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

In [5]:
device = torch.device('cuda:' + devc) if torch.cuda.is_available() else torch.device('cpu')
print(f"GPU device: {torch.cuda.current_device()}")

GPU device: 0


## Data Preparation

In [6]:
import pandas as pd
import numpy as np
from scipy.io import arff
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing
from sktime.utils.load_data import load_from_tsfile_to_dataframe

from tqdm import tqdm
import os
from datetime import datetime

In [7]:
def features_to_torch(X):
    X = X.applymap(np.array)
    dimensions_lst = []

    for dim in X.columns:
        dimensions_lst.append(np.dstack(list(X[dim].values))[0])

    dimensions_lst = np.array(dimensions_lst)
    X = torch.from_numpy(np.array(dimensions_lst, dtype=np.float64))
    X = X.transpose(0, 2)
    X = X.transpose(1, 2)
    X = F.normalize(X, dim=1)
    return X.float()

def answers_to_torch(y):
    le = preprocessing.LabelEncoder()
    y = le.fit_transform(y)
    y = torch.from_numpy(np.array(y, dtype=np.int32))
    y = y.long()
    return y

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.random.permutation(len(inputs))
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt], excerpt

In [8]:
X_train, y_train = load_from_tsfile_to_dataframe(datadir + f'/{dataset_name}_TRAIN.ts')
X_test, y_test = load_from_tsfile_to_dataframe(datadir + f'/{dataset_name}_TEST.ts')

X_train = features_to_torch(X_train)
X_test = features_to_torch(X_test)

y_train = answers_to_torch(y_train)
y_test = answers_to_torch(y_test)

In [9]:
N = X_train.shape[0]
time_steps = X_train.shape[2]
dims_num = X_train.shape[1]
num_classes = len(np.unique(y_train))

print('time_steps:', time_steps)
print('train samples_num:', N)
print('dims_num:', dims_num)
print('num_classes:', num_classes)

time_steps: 8
train samples_num: 7494
dims_num: 2
num_classes: 10


## Model parameters (AlexNet in that case)

In [10]:
hc=10                 # number of heads
ncl=num_classes       # number of clusters

numc = [ncl] * hc
# # (number of filters, kernel size, stride, pad) for AlexNet, two vesions
# CFG = {
#     'big': [(96, 11, 4, 2), 'M', (256, 5, 1, 2), 'M', (384, 3, 1, 1), (384, 3, 1, 1), (256, 3, 1, 1), 'M'],
#     'small': [(64, 11, 4, 2), 'M', (192, 5, 1, 2), 'M', (384, 3, 1, 1), (256, 3, 1, 1), (256, 3, 1, 1), 'M']
# }

## Models

### ResNet

In [11]:
__all__ = ['resnetv1','resnetv1_18']

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv1d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

class Normalize(nn.Module):
    def __init__(self, power=2):
        super(Normalize, self).__init__()
        self.power = power

    def forward(self, x):
        norm = x.pow(self.power).sum(1, keepdim=True).pow(1. / self.power)
        out = x.div(norm)
        return out

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm1d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm1d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv1d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm1d(planes)
        self.conv2 = nn.Conv1d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(planes)
        self.conv3 = nn.Conv1d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm1d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, in_channel=3, width=1, num_classes=[1000]):
        self.inplanes = 16
        super(ResNet, self).__init__()
        self.headcount = len(num_classes)
        self.base = int(16 * width)
        self.features = nn.Sequential(*[                                                     # [100, 8, 18]
                            nn.Conv1d(in_channel, 16, kernel_size=3, padding=1, bias=False), # [100, 16, 36]
                            nn.BatchNorm1d(16),
                            nn.ReLU(inplace=True),
                            self._make_layer(block, self.base, layers[0]),                   # [100, 16, 36]
                            self._make_layer(block, self.base * 2, layers[1]),               # [100, 32, 36]
                            self._make_layer(block, self.base * 4, layers[2]),               # [100, 64, 36]
                            self._make_layer(block, self.base * 8, layers[3]),               # [100, 128, 36]
                            nn.AvgPool1d(2),                                                 # [100, 128, 18]
        ])
    
        if len(num_classes) == 1:
            self.top_layer = nn.Sequential(nn.Linear(magic_dim, num_classes[0]))
        else:
            for a, i in enumerate(num_classes):
                setattr(self, "top_layer%d" % a, nn.Linear(magic_dim, i))
            self.top_layer = None
        for m in self.features.modules():
            if isinstance(m, nn.Conv1d):
                n = m.kernel_size[0] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv1d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.features(x.float())
        out = out.view(out.size(0), -1)
        if self.headcount == 1:
            if self.top_layer:
                out = self.top_layer(out)
            return out
        else:
            outp = []
            for i in range(self.headcount):
                outp.append(getattr(self, "top_layer%d" % i)(out))
            return outp

def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    return model

def resnetv1_18(num_classes=[1000]):
    """Encoder for instance discrimination and MoCo"""
    return resnet18(num_classes=num_classes)

### VGG

In [12]:
class VGG(nn.Module):
    def __init__(self, num_classes):
        super(VGG, self).__init__()
        self.headcount = len(num_classes)
        
        self.features = nn.Sequential(*[                                                     # [100, 8, 18]
                            nn.Conv1d(dims_num, 64, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
#                             nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                            nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
#                             nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                            nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
                            nn.Conv1d(256, 256, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
#                             nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                            nn.Conv1d(256, 512, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
                            nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
#                             nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                            nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
                            nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
                            nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),

#                             nn.Flatten(),
#                             nn.Linear(in_features=512 * (time_steps // 2**5), out_features=fc_hidden_dim, bias=True),

#                             nn.ReLU(inplace=True),
#                             nn.Dropout(p=0.5, inplace=False),
#                             nn.Linear(in_features=fc_hidden_dim, out_features=fc_hidden_dim, bias=True),
#                             nn.ReLU(inplace=True),
#                             nn.Dropout(p=0.5, inplace=False),
#                             nn.Linear(in_features=fc_hidden_dim, out_features=num_classes, bias=True),
#                             nn.Softmax()
        ])
        
        if len(num_classes) == 1:
            self.top_layer = nn.Sequential(nn.Linear(magic_dim, num_classes[0]))
        else:
            for a, i in enumerate(num_classes):
                setattr(self, "top_layer%d" % a, nn.Linear(magic_dim, i))
            self.top_layer = None
    
    def forward(self, x):
        out = self.features(x.float())  # [50, 10, 400] -> [50, 512, 12]
        out = out.view(out.size(0), -1) # [50, magic_dim]
        if self.headcount == 1:
            if self.top_layer:
                out = self.top_layer(out)
                print (out.size())
            return out
        else:
            outp = []
            for i in range(self.headcount):
                outp.append(getattr(self, "top_layer%d" % i)(out))
            return outp

## Sinkhorn-Knopp optimization

In [13]:
def optimize_L_sk(PS):
    N, K = PS.shape
    tt = time.time()
    PS = PS.T  # now it is K x N
    r = np.ones((K, 1)) / K
    c = np.ones((N, 1)) / N
    PS **= lamb  # K x N
    inv_K = 1. / K
    inv_N = 1. / N
    err = 1e3
    _counter = 0
    while err > 1e-2:
        r = inv_K / (PS @ c)  # (KxN)@(N,1) = K x 1
        c_new = inv_N / (r.T @ PS).T  # ((1,K)@(KxN)).t() = N x 1
        if _counter % 10 == 0:
            err = np.nansum(np.abs(c / c_new - 1))
        c = c_new
        _counter += 1
        
    print("error: ", err, 'step ', _counter, flush=True)  # " nonneg: ", sum(I), flush=True)
    # inplace calculations.
    PS *= np.squeeze(c)
    PS = PS.T
    PS *= np.squeeze(r)
    PS = PS.T
    argmaxes = np.nanargmax(PS, 0)  # size N
    newL = torch.LongTensor(argmaxes)
    selflabels = newL.to(device)
    PS = PS.T
    PS /= np.squeeze(r)
    PS = PS.T
    PS /= np.squeeze(c)
    sol = PS[argmaxes, np.arange(N)]
    np.log(sol, sol)
    cost = -(1. / lamb) * np.nansum(sol) / N
    print('cost: ', cost, flush=True)
    print('opt took {0:.2f}min, {1:4d}iters'.format(((time.time() - tt) / 60.), _counter), flush=True)
    return cost, selflabels

def opt_sk(model, selflabels_in, epoch):
    if hc == 1:
        PS = np.zeros((N, ncl))
    else:
        PS_pre = np.zeros((N, magic_dim)) # knn_dim
    
    for batch_idx, (data, _, _selected) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=True)):
        data = data.to(device)#cuda()
        if hc == 1:
            p = nn.functional.softmax(model(data), 1)
            PS[_selected, :] = p.detach().cpu().numpy()
        else:
            p = model(data.float())
            PS_pre[_selected, :] = p.detach().cpu().numpy() # p: [20, magic_dim]
    if hc == 1:
        cost, selflabels = optimize_L_sk(PS)
        _costs = [cost]
    else:
        _nmis = np.zeros(hc)
        _costs = np.zeros(hc)
        nh = epoch % hc  # np.random.randint(args.hc)
        print("computing head %s " % nh, end="\r", flush=True)
        tl = getattr(model, "top_layer%d" % nh)
        # do the forward pass:
        PS = (PS_pre @ tl.weight.cpu().numpy().T
                   + tl.bias.cpu().numpy())
        PS = py_softmax(PS, 1)
        c, selflabels_ = optimize_L_sk(PS)
        _costs[nh] = c
        selflabels_in[nh] = selflabels_
        selflabels = selflabels_in
    return selflabels

## Training utils

In [14]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = alr
    if epochs == 200:
        if epoch >= 80:
            lr = alr * (0.1 ** ((epoch - 80) // 40))  # i.e. 120, 160
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 400:
        if epoch >= 160:
            lr = alr * (0.1 ** ((epoch - 160) // 80))  # i.e. 240,320
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 800:
        if epoch >= 320:
            lr = alr * (0.1 ** ((epoch - 320) // 160))  # i.e. 480, 640
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 1600:
        if epoch >= 640:
            lr = alr * (0.1 ** ((epoch - 640) // 320))
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

In [15]:
def feature_return_switch(model, bool=True):
    """
    switch between network output or conv5features
        if True: changes switch s.t. forward pass returns post-conv5 features
        if False: changes switch s.t. forward will give full network output
    """
    if bool:
        model.headcount = 1
    else:
        model.headcount = hc
    model.return_feature = bool

In [16]:
def train(epoch, selflabels):
    print('\nEpoch: %d' % epoch)
    print(model_name)
    adjust_learning_rate(optimizer, epoch)
    train_loss = AverageMeter()
    data_time = AverageMeter()
    batch_time = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    
    for batch_idx, (inputs, targets, indexes) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=True)):
        inputs = inputs.float().to(device)
        niter = epoch * N // batch_size + batch_idx
        if niter * batch_size >= optimize_times[-1]:
            with torch.no_grad():
                _ = optimize_times.pop()
                if hc >1:
                    feature_return_switch(model, True)
                selflabels = opt_sk(model, selflabels, epoch)
                if hc >1:
                    feature_return_switch(model, False)
        data_time.update(time.time() - end)
        inputs, targets = inputs.to(device), targets.to(device)#, indexes.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        if hc == 1:
            loss = criterion(outputs, selflabels[indexes])
        else:
            loss = torch.mean(torch.stack([criterion(outputs[h], selflabels[h, indexes]) for h in range(hc)]))

        loss.backward()
        optimizer.step()

        train_loss.update(loss.item(), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
#         if True:
        if batch_idx % 10 == 0:
            print('Epoch: [{}][{}/{}]'
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'Data: {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})'.format(
                epoch, batch_idx, N // batch_size, batch_time=batch_time, data_time=data_time, train_loss=train_loss))
#             writer.add_scalar("loss", loss.item(), batch_idx*512 +epoch*N/batch_size)
    return selflabels

## Model initialization

In [17]:
if model_name == "ResNet":
    model = resnet18(num_classes=numc, in_channel=dims_num)
else:
    model = VGG(num_classes=numc)
print (model_name, "created")

ResNet created


In [18]:
optimize_times = ((epochs + 1.0001)*N*(np.linspace(0, 1, nopts))[::-1]).tolist()
optimize_times = [(epochs +10)*N] + optimize_times
print('We will optimize L at epochs:', [np.round(1.0*t/N, 2) for t in optimize_times], flush=True)

We will optimize L at epochs: [410.0, 401.0, 400.0, 398.99, 397.99, 396.98, 395.98, 394.97, 393.97, 392.96, 391.95, 390.95, 389.94, 388.94, 387.93, 386.93, 385.92, 384.92, 383.91, 382.91, 381.9, 380.9, 379.89, 378.89, 377.88, 376.88, 375.87, 374.87, 373.86, 372.86, 371.85, 370.85, 369.84, 368.84, 367.83, 366.83, 365.82, 364.82, 363.81, 362.81, 361.8, 360.8, 359.79, 358.79, 357.78, 356.78, 355.77, 354.77, 353.76, 352.76, 351.75, 350.75, 349.74, 348.74, 347.73, 346.73, 345.72, 344.72, 343.71, 342.71, 341.7, 340.7, 339.69, 338.69, 337.68, 336.68, 335.67, 334.67, 333.66, 332.66, 331.65, 330.65, 329.64, 328.64, 327.63, 326.63, 325.62, 324.62, 323.61, 322.61, 321.6, 320.6, 319.59, 318.59, 317.58, 316.58, 315.57, 314.57, 313.56, 312.56, 311.55, 310.55, 309.54, 308.54, 307.53, 306.53, 305.52, 304.52, 303.51, 302.51, 301.5, 300.5, 299.49, 298.49, 297.48, 296.48, 295.47, 294.47, 293.46, 292.46, 291.45, 290.45, 289.44, 288.44, 287.43, 286.43, 285.42, 284.42, 283.41, 282.41, 281.4, 280.4, 279.39, 

In [19]:
# init selflabels randomly
if hc == 1:
    selflabels = np.zeros(N, dtype=np.int32)
    for qq in range(N):
        selflabels[qq] = qq % ncl
    selflabels = np.random.permutation(selflabels)
    selflabels = torch.LongTensor(selflabels).to(device)
else:
    selflabels = np.zeros((hc, N), dtype=np.int32)
    for nh in range(hc):
        for _i in range(N):
            selflabels[nh, _i] = _i % numc[nh]
        selflabels[nh] = np.random.permutation(selflabels[nh])
    selflabels = torch.LongTensor(selflabels).to(device)

In [20]:
# optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
model = model.to(device)
criterion = nn.CrossEntropyLoss()

In [21]:
writer = SummaryWriter(f'./runs/{dataset_name}')

## Training! 
Takes a couple of minutes per epoch

In [22]:
def my_kNN(net, K, sigma=0.1, dim=128, use_pca=False):
    net.eval()
    # this part is ugly but made to be backwards-compatible. there was a change in cifar dataset's structure.
    trainLabels = y_train
    LEN = N
    C = trainLabels.max() + 1

    trainFeatures = torch.zeros((magic_dim, LEN))  # , device='cuda:0') # dim
    normalize = Normalize()
    for batch_idx, (inputs, targets, _) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=False)):
        batchSize = batch_size
        inputs = inputs.cuda()
        features = net(inputs.float())
        if not use_pca:
            features = normalize(features)
        trainFeatures[:, batch_idx * batchSize:batch_idx * batchSize + batchSize] = features.data.t().cpu()
        
    if use_pca:
        comps = 128
        print('doing PCA with %s components'%comps, end=' ')
        from sklearn.decomposition import PCA
        pca = PCA(n_components=comps, whiten=False)
        trainFeatures = pca.fit_transform(trainFeatures.numpy().T)
        trainFeatures = torch.Tensor(trainFeatures)
        trainFeatures = normalize(trainFeatures).t()
        print('..done')
    def eval_k_s(K_,sigma_):
        total = 0
        top1 = 0.
#         top5 = 0.

        with torch.no_grad():
            retrieval_one_hot = torch.zeros(K_, C)# .cuda()
            for batch_idx, (inputs, targets, _) in enumerate(iterate_minibatches(X_test, y_test, batch_size, shuffle=False)):
                targets = targets # .cuda(async=True) # or without async for py3.7
                inputs = inputs.cuda()
                batchSize = batch_size
                features = net(inputs)
                if use_pca:
                    features = pca.transform(features.cpu().numpy())
                    features = torch.Tensor(features).cuda()
                features = normalize(features).cpu()

                dist = torch.mm(features, trainFeatures)

                yd, yi = dist.topk(K_, dim=1, largest=True, sorted=True)
                candidates = trainLabels.view(1, -1).expand(batchSize, -1)
                retrieval = torch.gather(candidates, 1, yi).long()

                retrieval_one_hot.resize_(batchSize * K_, C).zero_()
                retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1.)
                
                yd_transform = yd.clone().div_(sigma_).exp_()
                probs = torch.sum(torch.mul(retrieval_one_hot.view(batchSize, -1, C),
                                            yd_transform.view(batchSize, -1, 1)),
                                  1)
                _, predictions = probs.sort(1, True)

                # Find which predictions match the target
                correct = predictions.eq(targets.data.view(-1, 1))

                top1 = top1 + correct.narrow(1, 0, 1).sum().item()
#                 top5 = top5 + correct.narrow(1, 0, 5).sum().item()

                total += targets.size(0)

        print(f"{K_}-NN,s={sigma_}: TOP1: ", top1 * 100. / total)
        return top1 / total

    if isinstance(K, list):
        res = []
        for K_ in K:
            for sigma_ in sigma:
                res.append(eval_k_s(K_, sigma_))
        return res
    else:
        res = eval_k_s(K, sigma)
        return res

In [23]:
import time

start = time.time()
for epoch in range(start_epoch, start_epoch + epochs):
    selflabels = train(epoch, selflabels)
    feature_return_switch(model, True)
    
    acc = my_kNN(model, K=10, sigma=0.1, dim=knn_dim)
    feature_return_switch(model, False)
#     writer.add_scalar("accuracy kNN", acc, epoch)
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'acc': acc,
            'epoch': epoch,
            'opt': optimizer.state_dict(),
            'L': selflabels,
        }
        if not os.path.isdir(exp):
            os.mkdir(exp)
        torch.save(state, '%s/best_ckpt.t7' % (exp))
        best_acc = acc
    if epoch % 100 == 0:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'opt': optimizer.state_dict(),
            'acc': acc,
            'epoch': epoch,
            'L': selflabels,
        }
        if not os.path.isdir(exp):
            os.mkdir(exp)
        torch.save(state, '%s/ep%s.t7' % (exp, epoch))
    if epoch % 50 == 0:
        feature_return_switch(model, True)
        acc = my_kNN(model, K=[50, 10], sigma=[0.1, 0.5], dim=knn_dim, use_pca=True)
        i = 0
#         for num_nn in [50, 10]:
#             for sig in [0.1, 0.5]:
#                 writer.add_scalar('knn%s-%s' % (num_nn, sig), acc[i], epoch)
#                 i += 1
        feature_return_switch(model, False)
    print('best accuracy: {:.2f}'.format(best_acc * 100))
end = time.time()

checkpoint = torch.load('%s'%exp+'/best_ckpt.t7' )
model.load_state_dict(checkpoint['net'])
feature_return_switch(model, True)
acc = my_kNN(model, K=10, sigma=0.1, dim=knn_dim, use_pca=True)


Epoch: 0
ResNet
error:  2.198907722572585e-12 step  11
cost:  1.8309918499843867
opt took 0.00min,   11iters
Epoch: [0][0/14]Time: 0.242 (0.242) Data: 0.166 (0.166) Loss: 2.4481 (2.4481)
Epoch: [0][10/14]Time: 0.033 (0.054) Data: 0.000 (0.016) Loss: 2.3291 (2.4108)
10-NN,s=0.1: TOP1:  90.43333333333334
Saving..
Saving..
doing PCA with 128 components ..done
50-NN,s=0.1: TOP1:  88.36666666666666
50-NN,s=0.5: TOP1:  85.1
10-NN,s=0.1: TOP1:  90.9
10-NN,s=0.5: TOP1:  90.0
best accuracy: 90.43

Epoch: 1
ResNet
Epoch: [1][0/14]Time: 0.033 (0.033) Data: 0.001 (0.001) Loss: 2.2700 (2.2700)
error:  2.540190280342358e-13 step  11
cost:  2.0304588050026444
opt took 0.00min,   11iters
Epoch: [1][10/14]Time: 0.026 (0.044) Data: 0.000 (0.014) Loss: 2.1883 (2.2219)
10-NN,s=0.1: TOP1:  90.23333333333333
best accuracy: 90.43

Epoch: 2
ResNet
Epoch: [2][0/14]Time: 0.027 (0.027) Data: 0.001 (0.001) Loss: 2.1442 (2.1442)
error:  3.239630785856207e-13 step  11
cost:  1.9740387419476242
opt took 0.00min,   

10-NN,s=0.1: TOP1:  91.53333333333333
best accuracy: 91.83

Epoch: 25
ResNet
Epoch: [25][0/14]Time: 0.033 (0.033) Data: 0.001 (0.001) Loss: 0.4093 (0.4093)
error:  0.0009347666629488938 step  41
cost:  0.3614874941491231
opt took 0.00min,   41iters
Epoch: [25][10/14]Time: 0.027 (0.044) Data: 0.000 (0.011) Loss: 0.4791 (0.4525)
10-NN,s=0.1: TOP1:  91.46666666666667
best accuracy: 91.83

Epoch: 26
ResNet
Epoch: [26][0/14]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.3788 (0.3788)
error:  0.002172176825272931 step  41
cost:  0.39722994005044127
opt took 0.00min,   41iters
Epoch: [26][10/14]Time: 0.029 (0.044) Data: 0.000 (0.012) Loss: 0.4770 (0.4465)
10-NN,s=0.1: TOP1:  91.56666666666666
best accuracy: 91.83

Epoch: 27
ResNet
Epoch: [27][0/14]Time: 0.036 (0.036) Data: 0.002 (0.002) Loss: 0.4019 (0.4019)
error:  0.0009022594514704885 step  41
cost:  0.378622813870011
opt took 0.00min,   41iters
Epoch: [27][10/14]Time: 0.026 (0.041) Data: 0.000 (0.011) Loss: 0.4844 (0.4577)
10-NN,s=0.1: T

error:  0.005148393146534147 step  61
cost:  0.3447144059996935
opt took 0.00min,   61iters
Epoch: [50][10/14]Time: 0.026 (0.044) Data: 0.000 (0.011) Loss: 0.3961 (0.3817)
10-NN,s=0.1: TOP1:  92.0
Saving..
doing PCA with 128 components ..done
50-NN,s=0.1: TOP1:  91.43333333333334
50-NN,s=0.5: TOP1:  88.26666666666667
10-NN,s=0.1: TOP1:  92.0
10-NN,s=0.5: TOP1:  91.2
best accuracy: 92.00

Epoch: 51
ResNet
Epoch: [51][0/14]Time: 0.025 (0.025) Data: 0.000 (0.000) Loss: 0.3438 (0.3438)
error:  0.0037355490285561066 step  61
cost:  0.28045033764155164
opt took 0.00min,   61iters
Epoch: [51][10/14]Time: 0.029 (0.040) Data: 0.000 (0.011) Loss: 0.4089 (0.3737)
10-NN,s=0.1: TOP1:  91.63333333333334
best accuracy: 92.00

Epoch: 52
ResNet
Epoch: [52][0/14]Time: 0.025 (0.025) Data: 0.001 (0.001) Loss: 0.3253 (0.3253)
error:  0.007414812827390205 step  61
cost:  0.3604442449746548
opt took 0.00min,   61iters
Epoch: [52][10/14]Time: 0.025 (0.040) Data: 0.000 (0.010) Loss: 0.4462 (0.3718)
10-NN,s=0.1

error:  0.006752546034266671 step  51
cost:  0.3386933764928414
opt took 0.00min,   51iters
Epoch: [75][10/14]Time: 0.028 (0.039) Data: 0.000 (0.010) Loss: 0.4205 (0.3667)
10-NN,s=0.1: TOP1:  92.1
best accuracy: 92.20

Epoch: 76
ResNet
Epoch: [76][0/14]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.3310 (0.3310)
error:  0.0018670471228658947 step  61
cost:  0.3075058156901782
opt took 0.00min,   61iters
Epoch: [76][10/14]Time: 0.029 (0.042) Data: 0.000 (0.012) Loss: 0.3656 (0.3467)
10-NN,s=0.1: TOP1:  91.7
best accuracy: 92.20

Epoch: 77
ResNet
Epoch: [77][0/14]Time: 0.035 (0.035) Data: 0.001 (0.001) Loss: 0.3367 (0.3367)
error:  0.0019264120935602591 step  51
cost:  0.29955323587117744
opt took 0.00min,   51iters
Epoch: [77][10/14]Time: 0.028 (0.044) Data: 0.000 (0.011) Loss: 0.3841 (0.3430)
10-NN,s=0.1: TOP1:  92.1
best accuracy: 92.20

Epoch: 78
ResNet
Epoch: [78][0/14]Time: 0.043 (0.043) Data: 0.001 (0.001) Loss: 0.2981 (0.2981)
error:  0.0036132586067197936 step  51
cost:  0.3233

Epoch: [100][10/14]Time: 0.031 (0.036) Data: 0.000 (0.010) Loss: 0.4145 (0.3355)
10-NN,s=0.1: TOP1:  91.36666666666666
Saving..
doing PCA with 128 components ..done
50-NN,s=0.1: TOP1:  91.23333333333333
50-NN,s=0.5: TOP1:  88.33333333333333
10-NN,s=0.1: TOP1:  91.56666666666666
10-NN,s=0.5: TOP1:  90.23333333333333
best accuracy: 92.33

Epoch: 101
ResNet
Epoch: [101][0/14]Time: 0.025 (0.025) Data: 0.001 (0.001) Loss: 0.3028 (0.3028)
error:  0.003216293524476388 step  71
cost:  0.2752938780967142
opt took 0.00min,   71iters
Epoch: [101][10/14]Time: 0.033 (0.039) Data: 0.000 (0.014) Loss: 0.3635 (0.3236)
10-NN,s=0.1: TOP1:  91.76666666666667
best accuracy: 92.33

Epoch: 102
ResNet
Epoch: [102][0/14]Time: 0.025 (0.025) Data: 0.001 (0.001) Loss: 0.2837 (0.2837)
error:  0.009005086695802578 step  61
cost:  0.3216443318318398
opt took 0.00min,   61iters
Epoch: [102][10/14]Time: 0.029 (0.036) Data: 0.000 (0.010) Loss: 0.3864 (0.3225)
10-NN,s=0.1: TOP1:  91.56666666666666
best accuracy: 92.33


error:  0.007411465062156175 step  51
cost:  0.28770299651677167
opt took 0.00min,   51iters
Epoch: [125][10/14]Time: 0.155 (0.040) Data: 0.119 (0.011) Loss: 0.4349 (0.3365)
10-NN,s=0.1: TOP1:  91.16666666666667
best accuracy: 92.33

Epoch: 126
ResNet
Epoch: [126][0/14]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.2949 (0.2949)
error:  0.002998945670608877 step  51
cost:  0.30162929021905166
opt took 0.00min,   51iters
Epoch: [126][10/14]Time: 0.150 (0.040) Data: 0.112 (0.011) Loss: 0.3867 (0.3304)
10-NN,s=0.1: TOP1:  91.53333333333333
best accuracy: 92.33

Epoch: 127
ResNet
Epoch: [127][0/14]Time: 0.030 (0.030) Data: 0.001 (0.001) Loss: 0.3185 (0.3185)
Epoch: [127][10/14]Time: 0.032 (0.029) Data: 0.000 (0.000) Loss: 0.3906 (0.3418)
error:  0.00497027377463688 step  51
cost:  0.3046205298156236
opt took 0.00min,   51iters
10-NN,s=0.1: TOP1:  91.93333333333334
best accuracy: 92.33

Epoch: 128
ResNet
Epoch: [128][0/14]Time: 0.030 (0.030) Data: 0.001 (0.001) Loss: 0.3014 (0.3014)
Epoch:

error:  0.00205480829841842 step  61
cost:  0.31182026859410444
opt took 0.00min,   61iters
10-NN,s=0.1: TOP1:  91.56666666666666
doing PCA with 128 components ..done
50-NN,s=0.1: TOP1:  90.7
50-NN,s=0.5: TOP1:  88.3
10-NN,s=0.1: TOP1:  91.2
10-NN,s=0.5: TOP1:  90.36666666666666
best accuracy: 92.33

Epoch: 151
ResNet
Epoch: [151][0/14]Time: 0.026 (0.026) Data: 0.001 (0.001) Loss: 0.3108 (0.3108)
Epoch: [151][10/14]Time: 0.025 (0.026) Data: 0.000 (0.000) Loss: 0.3372 (0.3333)
error:  0.007477776459692831 step  71
cost:  0.29037501755251527
opt took 0.00min,   71iters
10-NN,s=0.1: TOP1:  91.6
best accuracy: 92.33

Epoch: 152
ResNet
Epoch: [152][0/14]Time: 0.025 (0.025) Data: 0.001 (0.001) Loss: 0.2935 (0.2935)
Epoch: [152][10/14]Time: 0.025 (0.025) Data: 0.000 (0.000) Loss: 0.3546 (0.3295)
error:  0.00441029115443603 step  51
cost:  0.28480646550765876
opt took 0.00min,   51iters
10-NN,s=0.1: TOP1:  91.3
best accuracy: 92.33

Epoch: 153
ResNet
Epoch: [153][0/14]Time: 0.025 (0.025) Data:

error:  0.002494882264269749 step  51
cost:  0.3012658503987256
opt took 0.00min,   51iters
Epoch: [175][0/14]Time: 0.147 (0.147) Data: 0.107 (0.107) Loss: 0.3173 (0.3173)
Epoch: [175][10/14]Time: 0.025 (0.038) Data: 0.000 (0.010) Loss: 0.3430 (0.3146)
10-NN,s=0.1: TOP1:  91.23333333333333
best accuracy: 92.33

Epoch: 176
ResNet
0.003
error:  0.007733262654044948 step  51
cost:  0.3215927834385026
opt took 0.00min,   51iters
Epoch: [176][0/14]Time: 0.163 (0.163) Data: 0.124 (0.124) Loss: 0.2916 (0.2916)
Epoch: [176][10/14]Time: 0.025 (0.039) Data: 0.000 (0.012) Loss: 0.3114 (0.3140)
10-NN,s=0.1: TOP1:  91.4
best accuracy: 92.33

Epoch: 177
ResNet
0.003
error:  0.002506954764264435 step  61
cost:  0.29787617570584607
opt took 0.00min,   61iters
Epoch: [177][0/14]Time: 0.166 (0.166) Data: 0.130 (0.130) Loss: 0.3011 (0.3011)
Epoch: [177][10/14]Time: 0.028 (0.042) Data: 0.000 (0.012) Loss: 0.3288 (0.3138)
10-NN,s=0.1: TOP1:  91.1
best accuracy: 92.33

Epoch: 178
ResNet
0.003
error:  0.0038

error:  0.004904018901487062 step  51
cost:  0.31797674414212657
opt took 0.00min,   51iters
Epoch: [200][10/14]Time: 0.030 (0.047) Data: 0.000 (0.013) Loss: 0.4041 (0.3634)
10-NN,s=0.1: TOP1:  91.4
Saving..
doing PCA with 128 components ..done
50-NN,s=0.1: TOP1:  90.13333333333334
50-NN,s=0.5: TOP1:  87.86666666666666
10-NN,s=0.1: TOP1:  90.6
10-NN,s=0.5: TOP1:  89.8
best accuracy: 92.33

Epoch: 201
ResNet
0.003
Epoch: [201][0/14]Time: 0.025 (0.025) Data: 0.001 (0.001) Loss: 0.2691 (0.2691)
error:  0.0018094229189777877 step  61
cost:  0.3029298649519557
opt took 0.00min,   61iters
Epoch: [201][10/14]Time: 0.024 (0.039) Data: 0.000 (0.011) Loss: 0.3868 (0.3499)
10-NN,s=0.1: TOP1:  91.1
best accuracy: 92.33

Epoch: 202
ResNet
0.003
Epoch: [202][0/14]Time: 0.026 (0.026) Data: 0.001 (0.001) Loss: 0.2698 (0.2698)
error:  0.0046750247013612745 step  51
cost:  0.3057319560589804
opt took 0.00min,   51iters
Epoch: [202][10/14]Time: 0.028 (0.040) Data: 0.000 (0.010) Loss: 0.3692 (0.3466)
10-N

Epoch: [224][10/14]Time: 0.030 (0.039) Data: 0.000 (0.010) Loss: 0.3572 (0.3196)
10-NN,s=0.1: TOP1:  91.4
best accuracy: 92.33

Epoch: 225
ResNet
0.003
Epoch: [225][0/14]Time: 0.034 (0.034) Data: 0.001 (0.001) Loss: 0.2405 (0.2405)
error:  0.0035753076955579166 step  51
cost:  0.2996226808285213
opt took 0.00min,   51iters
Epoch: [225][10/14]Time: 0.026 (0.042) Data: 0.000 (0.011) Loss: 0.3279 (0.3067)
10-NN,s=0.1: TOP1:  91.4
best accuracy: 92.33

Epoch: 226
ResNet
0.003
Epoch: [226][0/14]Time: 0.026 (0.026) Data: 0.001 (0.001) Loss: 0.2593 (0.2593)
error:  0.0048759629591292875 step  51
cost:  0.336519994192355
opt took 0.00min,   51iters
Epoch: [226][10/14]Time: 0.025 (0.038) Data: 0.000 (0.010) Loss: 0.3196 (0.3062)
10-NN,s=0.1: TOP1:  91.43333333333334
best accuracy: 92.33

Epoch: 227
ResNet
0.003
Epoch: [227][0/14]Time: 0.026 (0.026) Data: 0.001 (0.001) Loss: 0.2744 (0.2744)
error:  0.006222198611916641 step  51
cost:  0.28075532368053024
opt took 0.00min,   51iters
Epoch: [227][

10-NN,s=0.1: TOP1:  91.3
best accuracy: 92.33

Epoch: 249
ResNet
0.00030000000000000003
Epoch: [249][0/14]Time: 0.030 (0.030) Data: 0.001 (0.001) Loss: 0.3927 (0.3927)
error:  0.005695443976102665 step  51
cost:  0.30556763084752825
opt took 0.00min,   51iters
Epoch: [249][10/14]Time: 0.025 (0.044) Data: 0.000 (0.013) Loss: 0.4111 (0.4123)
10-NN,s=0.1: TOP1:  91.46666666666667
best accuracy: 92.33

Epoch: 250
ResNet
0.00030000000000000003
Epoch: [250][0/14]Time: 0.026 (0.026) Data: 0.001 (0.001) Loss: 0.3923 (0.3923)
error:  0.0021228849862532373 step  61
cost:  0.2776446334997077
opt took 0.00min,   61iters
Epoch: [250][10/14]Time: 0.027 (0.039) Data: 0.000 (0.011) Loss: 0.4451 (0.3998)
10-NN,s=0.1: TOP1:  91.06666666666666
doing PCA with 128 components ..done
50-NN,s=0.1: TOP1:  90.46666666666667
50-NN,s=0.5: TOP1:  87.2
10-NN,s=0.1: TOP1:  90.7
10-NN,s=0.5: TOP1:  90.06666666666666
best accuracy: 92.33

Epoch: 251
ResNet
0.00030000000000000003
Epoch: [251][0/14]Time: 0.028 (0.028) D

10-NN,s=0.1: TOP1:  91.13333333333334
best accuracy: 92.33

Epoch: 272
ResNet
0.00030000000000000003
Epoch: [272][0/14]Time: 0.033 (0.033) Data: 0.001 (0.001) Loss: 0.3846 (0.3846)
error:  0.004127501231825703 step  61
cost:  0.27978680391679916
opt took 0.00min,   61iters
Epoch: [272][10/14]Time: 0.031 (0.046) Data: 0.000 (0.013) Loss: 0.4337 (0.3794)
10-NN,s=0.1: TOP1:  91.16666666666667
best accuracy: 92.33

Epoch: 273
ResNet
0.00030000000000000003
Epoch: [273][0/14]Time: 0.027 (0.027) Data: 0.001 (0.001) Loss: 0.3731 (0.3731)
error:  0.004501032349469192 step  61
cost:  0.28355751188502903
opt took 0.00min,   61iters
Epoch: [273][10/14]Time: 0.025 (0.040) Data: 0.000 (0.011) Loss: 0.3764 (0.3818)
10-NN,s=0.1: TOP1:  91.23333333333333
best accuracy: 92.33

Epoch: 274
ResNet
0.00030000000000000003
Epoch: [274][0/14]Time: 0.026 (0.026) Data: 0.001 (0.001) Loss: 0.3696 (0.3696)
error:  0.0019222251995107964 step  61
cost:  0.2868969891365834
opt took 0.00min,   61iters
Epoch: [274][10/

error:  0.0015553165471404728 step  61
cost:  0.2850630219527449
opt took 0.00min,   61iters
Epoch: [295][10/14]Time: 0.044 (0.039) Data: 0.000 (0.011) Loss: 0.3974 (0.3485)
10-NN,s=0.1: TOP1:  91.23333333333333
best accuracy: 92.33

Epoch: 296
ResNet
0.00030000000000000003
Epoch: [296][0/14]Time: 0.032 (0.032) Data: 0.001 (0.001) Loss: 0.3540 (0.3540)
error:  0.0026644919555683932 step  61
cost:  0.28077146166698536
opt took 0.00min,   61iters
Epoch: [296][10/14]Time: 0.039 (0.047) Data: 0.000 (0.014) Loss: 0.3943 (0.3544)
10-NN,s=0.1: TOP1:  91.4
best accuracy: 92.33

Epoch: 297
ResNet
0.00030000000000000003
Epoch: [297][0/14]Time: 0.026 (0.026) Data: 0.001 (0.001) Loss: 0.3657 (0.3657)
error:  0.005641892992788056 step  51
cost:  0.28325124266111845
opt took 0.00min,   51iters
Epoch: [297][10/14]Time: 0.033 (0.039) Data: 0.000 (0.011) Loss: 0.3723 (0.3602)
10-NN,s=0.1: TOP1:  91.2
best accuracy: 92.33

Epoch: 298
ResNet
0.00030000000000000003
Epoch: [298][0/14]Time: 0.030 (0.030) Da

error:  0.0017660190282490218 step  61
cost:  0.27044680062156823
opt took 0.00min,   61iters
Epoch: [318][10/14]Time: 0.143 (0.037) Data: 0.109 (0.010) Loss: 0.3860 (0.3432)
10-NN,s=0.1: TOP1:  91.06666666666666
best accuracy: 92.33

Epoch: 319
ResNet
0.00030000000000000003
Epoch: [319][0/14]Time: 0.031 (0.031) Data: 0.002 (0.002) Loss: 0.3280 (0.3280)
error:  0.008351360773628302 step  51
cost:  0.2937300209042977
opt took 0.00min,   51iters
Epoch: [319][10/14]Time: 0.171 (0.041) Data: 0.125 (0.012) Loss: 0.3635 (0.3438)
10-NN,s=0.1: TOP1:  91.2
best accuracy: 92.33

Epoch: 320
ResNet
3.0000000000000008e-05
Epoch: [320][0/14]Time: 0.027 (0.027) Data: 0.001 (0.001) Loss: 0.3428 (0.3428)
error:  0.003423353980988808 step  61
cost:  0.2777131657980529
opt took 0.00min,   61iters
Epoch: [320][10/14]Time: 0.153 (0.041) Data: 0.121 (0.011) Loss: 0.3969 (0.3514)
10-NN,s=0.1: TOP1:  91.2
best accuracy: 92.33

Epoch: 321
ResNet
3.0000000000000008e-05
Epoch: [321][0/14]Time: 0.029 (0.029) Data

Epoch: [342][10/14]Time: 0.024 (0.025) Data: 0.000 (0.000) Loss: 0.4458 (0.4739)
error:  0.005871620894301155 step  81
cost:  0.27551968809111305
opt took 0.00min,   81iters
10-NN,s=0.1: TOP1:  91.3
best accuracy: 92.33

Epoch: 343
ResNet
3.0000000000000008e-05
Epoch: [343][0/14]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.4842 (0.4842)
Epoch: [343][10/14]Time: 0.033 (0.029) Data: 0.000 (0.000) Loss: 0.4546 (0.4689)
error:  0.0069183696383608995 step  91
cost:  0.2794524909093491
opt took 0.00min,   91iters
10-NN,s=0.1: TOP1:  91.23333333333333
best accuracy: 92.33

Epoch: 344
ResNet
3.0000000000000008e-05
Epoch: [344][0/14]Time: 0.030 (0.030) Data: 0.001 (0.001) Loss: 0.4910 (0.4910)
Epoch: [344][10/14]Time: 0.034 (0.031) Data: 0.000 (0.000) Loss: 0.4515 (0.4524)
error:  0.006310756788409977 step  61
cost:  0.2733570136235062
opt took 0.00min,   61iters
10-NN,s=0.1: TOP1:  91.3
best accuracy: 92.33

Epoch: 345
ResNet
3.0000000000000008e-05
Epoch: [345][0/14]Time: 0.027 (0.027) Data

Epoch: [365][10/14]Time: 0.025 (0.026) Data: 0.000 (0.000) Loss: 0.4421 (0.4510)
error:  0.005347178979924316 step  71
cost:  0.27208878423752514
opt took 0.00min,   71iters
10-NN,s=0.1: TOP1:  91.23333333333333
best accuracy: 92.33

Epoch: 366
ResNet
3.0000000000000008e-05
Epoch: [366][0/14]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.4495 (0.4495)
Epoch: [366][10/14]Time: 0.030 (0.028) Data: 0.000 (0.000) Loss: 0.4819 (0.4509)
10-NN,s=0.1: TOP1:  91.26666666666667
best accuracy: 92.33

Epoch: 367
ResNet
3.0000000000000008e-05
error:  0.0023623632613387624 step  81
cost:  0.27774631308983094
opt took 0.00min,   81iters
Epoch: [367][0/14]Time: 0.192 (0.192) Data: 0.151 (0.151) Loss: 0.4199 (0.4199)
Epoch: [367][10/14]Time: 0.031 (0.050) Data: 0.000 (0.014) Loss: 0.4950 (0.4580)
10-NN,s=0.1: TOP1:  91.26666666666667
best accuracy: 92.33

Epoch: 368
ResNet
3.0000000000000008e-05
error:  0.007793321068916281 step  71
cost:  0.2713796938509746
opt took 0.00min,   71iters
Epoch: [368][0/

error:  0.0024078123960702413 step  81
cost:  0.2732226627849644
opt took 0.00min,   81iters
Epoch: [389][0/14]Time: 0.421 (0.421) Data: 0.366 (0.366) Loss: 0.4816 (0.4816)
Epoch: [389][10/14]Time: 0.032 (0.070) Data: 0.000 (0.034) Loss: 0.4417 (0.4596)
10-NN,s=0.1: TOP1:  91.43333333333334
best accuracy: 92.33

Epoch: 390
ResNet
3.0000000000000008e-05
error:  0.004654838614108536 step  71
cost:  0.26581286031465806
opt took 0.00min,   71iters
Epoch: [390][0/14]Time: 0.202 (0.202) Data: 0.137 (0.137) Loss: 0.4623 (0.4623)
Epoch: [390][10/14]Time: 0.024 (0.044) Data: 0.000 (0.013) Loss: 0.4352 (0.4494)
10-NN,s=0.1: TOP1:  91.43333333333334
best accuracy: 92.33

Epoch: 391
ResNet
3.0000000000000008e-05
error:  0.004075086013492046 step  81
cost:  0.25839084234923426
opt took 0.00min,   81iters
Epoch: [391][0/14]Time: 0.146 (0.146) Data: 0.108 (0.108) Loss: 0.4664 (0.4664)
Epoch: [391][10/14]Time: 0.029 (0.042) Data: 0.000 (0.010) Loss: 0.4088 (0.4428)
10-NN,s=0.1: TOP1:  91.2666666666666

In [24]:
print (end-start)

309.9845130443573
