## Imports

In [1]:
import sys
import os
import argparse
import time
import random
import math
import numpy as np
from scipy.special import logsumexp

import torch
import torchvision
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as tfs

from tensorboardX import SummaryWriter


from utils import kNN, AverageMeter, py_softmax

In [2]:
model_name = "VGG"
magic_dim = 9216 #2048 

# model_name = "ResNet"
# magic_dim = 512

In [3]:
dataset_name = "LSST/LSST"

## Training parameters

In [4]:
#data
datadir = "/root/data/Multivariate_ts"

# optimization
lamb = 0.1      # SK lambda-parameter
nopts = 400    # number of SK-optimizations
epochs = 400   # numbers of epochs
momentum = 0.9 # sgd momentum
exp = './resnet1d_exp' # experiments results dir


# other
devc='0'  # cuda device
batch_size = 500
lr=0.0005*2     #learning rate
alr=0.0005*2    #starting learning rate

knn_dim = 100
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

In [5]:
device = torch.device('cuda:' + devc) if torch.cuda.is_available() else torch.device('cpu')
print(f"GPU device: {torch.cuda.current_device()}")

GPU device: 0


## Data Preparation

In [6]:
import pandas as pd
import numpy as np
from scipy.io import arff
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing
from sktime.utils.load_data import load_from_tsfile_to_dataframe

from tqdm import tqdm
import os
from datetime import datetime

In [7]:
def features_to_torch(X):
    X = X.applymap(np.array)
    dimensions_lst = []

    for dim in X.columns:
        dimensions_lst.append(np.dstack(list(X[dim].values))[0])

    dimensions_lst = np.array(dimensions_lst)
    X = torch.from_numpy(np.array(dimensions_lst, dtype=np.float64))
    X = X.transpose(0, 2)
    X = X.transpose(1, 2)
    X = F.normalize(X, dim=1)
    return X.float()

def answers_to_torch(y):
    le = preprocessing.LabelEncoder()
    y = le.fit_transform(y)
    y = torch.from_numpy(np.array(y, dtype=np.int32))
    y = y.long()
    return y

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.random.permutation(len(inputs))
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt], excerpt

In [8]:
X_train, y_train = load_from_tsfile_to_dataframe(datadir + f'/{dataset_name}_TRAIN.ts')
X_test, y_test = load_from_tsfile_to_dataframe(datadir + f'/{dataset_name}_TEST.ts')

X_train = features_to_torch(X_train)
X_test = features_to_torch(X_test)

y_train = answers_to_torch(y_train)
y_test = answers_to_torch(y_test)

In [9]:
N = X_train.shape[0]
time_steps = X_train.shape[2]
dims_num = X_train.shape[1]
num_classes = len(np.unique(y_train))

print('time_steps:', time_steps)
print('train samples_num:', N)
print('dims_num:', dims_num)
print('num_classes:', num_classes)

time_steps: 36
train samples_num: 2459
dims_num: 6
num_classes: 14


## Model parameters (AlexNet in that case)

In [10]:
hc=10                 # number of heads
ncl=num_classes       # number of clusters

numc = [ncl] * hc
# # (number of filters, kernel size, stride, pad) for AlexNet, two vesions
# CFG = {
#     'big': [(96, 11, 4, 2), 'M', (256, 5, 1, 2), 'M', (384, 3, 1, 1), (384, 3, 1, 1), (256, 3, 1, 1), 'M'],
#     'small': [(64, 11, 4, 2), 'M', (192, 5, 1, 2), 'M', (384, 3, 1, 1), (256, 3, 1, 1), (256, 3, 1, 1), 'M']
# }

## Models

### ResNet

In [11]:
__all__ = ['resnetv1','resnetv1_18']

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv1d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

class Normalize(nn.Module):
    def __init__(self, power=2):
        super(Normalize, self).__init__()
        self.power = power

    def forward(self, x):
        norm = x.pow(self.power).sum(1, keepdim=True).pow(1. / self.power)
        out = x.div(norm)
        return out

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm1d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm1d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv1d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm1d(planes)
        self.conv2 = nn.Conv1d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(planes)
        self.conv3 = nn.Conv1d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm1d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, in_channel=3, width=1, num_classes=[1000]):
        self.inplanes = 16
        super(ResNet, self).__init__()
        self.headcount = len(num_classes)
        self.base = int(16 * width)
        self.features = nn.Sequential(*[                                                     # [100, 8, 18]
                            nn.Conv1d(in_channel, 16, kernel_size=3, padding=1, bias=False), # [100, 16, 36]
                            nn.BatchNorm1d(16),
                            nn.ReLU(inplace=True),
                            self._make_layer(block, self.base, layers[0]),                   # [100, 16, 36]
                            self._make_layer(block, self.base * 2, layers[1]),               # [100, 32, 36]
                            self._make_layer(block, self.base * 4, layers[2]),               # [100, 64, 36]
                            self._make_layer(block, self.base * 8, layers[3]),               # [100, 128, 36]
                            nn.AvgPool1d(2),                                                 # [100, 128, 18]
        ])
    
        if len(num_classes) == 1:
            self.top_layer = nn.Sequential(nn.Linear(magic_dim, num_classes[0]))
        else:
            for a, i in enumerate(num_classes):
                setattr(self, "top_layer%d" % a, nn.Linear(magic_dim, i))
            self.top_layer = None
        for m in self.features.modules():
            if isinstance(m, nn.Conv1d):
                n = m.kernel_size[0] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv1d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.features(x.float())
        out = out.view(out.size(0), -1)
        if self.headcount == 1:
            if self.top_layer:
                out = self.top_layer(out)
            return out
        else:
            outp = []
            for i in range(self.headcount):
                outp.append(getattr(self, "top_layer%d" % i)(out))
            return outp

def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    return model

def resnetv1_18(num_classes=[1000]):
    """Encoder for instance discrimination and MoCo"""
    return resnet18(num_classes=num_classes)

### VGG

In [12]:
class VGG(nn.Module):
    def __init__(self, num_classes):
        super(VGG, self).__init__()
        self.headcount = len(num_classes)
        
        self.features = nn.Sequential(*[                                                     # [100, 8, 18]
                            nn.Conv1d(dims_num, 64, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
#                             nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                            nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
#                             nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                            nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
                            nn.Conv1d(256, 256, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
#                             nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                            nn.Conv1d(256, 512, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
                            nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
#                             nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                            nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
                            nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True),
                            nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),

#                             nn.Flatten(),
#                             nn.Linear(in_features=512 * (time_steps // 2**5), out_features=fc_hidden_dim, bias=True),

#                             nn.ReLU(inplace=True),
#                             nn.Dropout(p=0.5, inplace=False),
#                             nn.Linear(in_features=fc_hidden_dim, out_features=fc_hidden_dim, bias=True),
#                             nn.ReLU(inplace=True),
#                             nn.Dropout(p=0.5, inplace=False),
#                             nn.Linear(in_features=fc_hidden_dim, out_features=num_classes, bias=True),
#                             nn.Softmax()
        ])
        
        if len(num_classes) == 1:
            self.top_layer = nn.Sequential(nn.Linear(magic_dim, num_classes[0]))
        else:
            for a, i in enumerate(num_classes):
                setattr(self, "top_layer%d" % a, nn.Linear(magic_dim, i))
            self.top_layer = None
    
    def forward(self, x):
        out = self.features(x.float())  # [50, 10, 400] -> [50, 512, 12]
        out = out.view(out.size(0), -1) # [50, magic_dim]
        if self.headcount == 1:
            if self.top_layer:
                out = self.top_layer(out)
                print (out.size())
            return out
        else:
            outp = []
            for i in range(self.headcount):
                outp.append(getattr(self, "top_layer%d" % i)(out))
            return outp

## Sinkhorn-Knopp optimization

In [13]:
def optimize_L_sk(PS):
    N, K = PS.shape
    tt = time.time()
    PS = PS.T  # now it is K x N
    r = np.ones((K, 1)) / K
    c = np.ones((N, 1)) / N
    PS **= lamb  # K x N
    inv_K = 1. / K
    inv_N = 1. / N
    err = 1e3
    _counter = 0
    while err > 1e-2:
        r = inv_K / (PS @ c)  # (KxN)@(N,1) = K x 1
        c_new = inv_N / (r.T @ PS).T  # ((1,K)@(KxN)).t() = N x 1
        if _counter % 10 == 0:
            err = np.nansum(np.abs(c / c_new - 1))
        c = c_new
        _counter += 1
        
    print("error: ", err, 'step ', _counter, flush=True)  # " nonneg: ", sum(I), flush=True)
    # inplace calculations.
    PS *= np.squeeze(c)
    PS = PS.T
    PS *= np.squeeze(r)
    PS = PS.T
    argmaxes = np.nanargmax(PS, 0)  # size N
    newL = torch.LongTensor(argmaxes)
    selflabels = newL.to(device)
    PS = PS.T
    PS /= np.squeeze(r)
    PS = PS.T
    PS /= np.squeeze(c)
    sol = PS[argmaxes, np.arange(N)]
    np.log(sol, sol)
    cost = -(1. / lamb) * np.nansum(sol) / N
    print('cost: ', cost, flush=True)
    print('opt took {0:.2f}min, {1:4d}iters'.format(((time.time() - tt) / 60.), _counter), flush=True)
    return cost, selflabels

def opt_sk(model, selflabels_in, epoch):
    if hc == 1:
        PS = np.zeros((N, ncl))
    else:
        PS_pre = np.zeros((N, magic_dim)) # knn_dim
    
    for batch_idx, (data, _, _selected) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=True)):
        data = data.to(device)#cuda()
        if hc == 1:
            p = nn.functional.softmax(model(data), 1)
            PS[_selected, :] = p.detach().cpu().numpy()
        else:
            p = model(data.float())
            PS_pre[_selected, :] = p.detach().cpu().numpy() # p: [20, magic_dim]
    if hc == 1:
        cost, selflabels = optimize_L_sk(PS)
        _costs = [cost]
    else:
        _nmis = np.zeros(hc)
        _costs = np.zeros(hc)
        nh = epoch % hc  # np.random.randint(args.hc)
        print("computing head %s " % nh, end="\r", flush=True)
        tl = getattr(model, "top_layer%d" % nh)
        # do the forward pass:
        PS = (PS_pre @ tl.weight.cpu().numpy().T
                   + tl.bias.cpu().numpy())
        PS = py_softmax(PS, 1)
        c, selflabels_ = optimize_L_sk(PS)
        _costs[nh] = c
        selflabels_in[nh] = selflabels_
        selflabels = selflabels_in
    return selflabels

## Training utils

In [14]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = alr
    if epochs == 200:
        if epoch >= 80:
            lr = alr * (0.1 ** ((epoch - 80) // 40))  # i.e. 120, 160
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 400:
        if epoch >= 160:
            lr = alr * (0.1 ** ((epoch - 160) // 80))  # i.e. 240,320
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 800:
        if epoch >= 320:
            lr = alr * (0.1 ** ((epoch - 320) // 160))  # i.e. 480, 640
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 1600:
        if epoch >= 640:
            lr = alr * (0.1 ** ((epoch - 640) // 320))
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

In [15]:
def feature_return_switch(model, bool=True):
    """
    switch between network output or conv5features
        if True: changes switch s.t. forward pass returns post-conv5 features
        if False: changes switch s.t. forward will give full network output
    """
    if bool:
        model.headcount = 1
    else:
        model.headcount = hc
    model.return_feature = bool

In [16]:
def train(epoch, selflabels):
    print('\nEpoch: %d' % epoch)
    print(model_name)
    adjust_learning_rate(optimizer, epoch)
    train_loss = AverageMeter()
    data_time = AverageMeter()
    batch_time = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    
    for batch_idx, (inputs, targets, indexes) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=True)):
        inputs = inputs.float().to(device)
        niter = epoch * N // batch_size + batch_idx
        if niter * batch_size >= optimize_times[-1]:
            with torch.no_grad():
                _ = optimize_times.pop()
                if hc >1:
                    feature_return_switch(model, True)
                selflabels = opt_sk(model, selflabels, epoch)
                if hc >1:
                    feature_return_switch(model, False)
        data_time.update(time.time() - end)
        inputs, targets = inputs.to(device), targets.to(device)#, indexes.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        if hc == 1:
            loss = criterion(outputs, selflabels[indexes])
        else:
            loss = torch.mean(torch.stack([criterion(outputs[h], selflabels[h, indexes]) for h in range(hc)]))

        loss.backward()
        optimizer.step()

        train_loss.update(loss.item(), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
#         if True:
        if batch_idx % 10 == 0:
            print('Epoch: [{}][{}/{}]'
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'Data: {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})'.format(
                epoch, batch_idx, N // batch_size, batch_time=batch_time, data_time=data_time, train_loss=train_loss))
#             writer.add_scalar("loss", loss.item(), batch_idx*512 +epoch*N/batch_size)
    return selflabels

## Model initialization

In [17]:
if model_name == "ResNet":
    model = resnet18(num_classes=numc, in_channel=dims_num)
else:
    model = VGG(num_classes=numc)
print (model_name, "created")

VGG created


In [18]:
optimize_times = ((epochs + 1.0001)*N*(np.linspace(0, 1, nopts))[::-1]).tolist()
optimize_times = [(epochs +10)*N] + optimize_times
print('We will optimize L at epochs:', [np.round(1.0*t/N, 2) for t in optimize_times], flush=True)

We will optimize L at epochs: [410.0, 401.0, 400.0, 398.99, 397.99, 396.98, 395.98, 394.97, 393.97, 392.96, 391.95, 390.95, 389.94, 388.94, 387.93, 386.93, 385.92, 384.92, 383.91, 382.91, 381.9, 380.9, 379.89, 378.89, 377.88, 376.88, 375.87, 374.87, 373.86, 372.86, 371.85, 370.85, 369.84, 368.84, 367.83, 366.83, 365.82, 364.82, 363.81, 362.81, 361.8, 360.8, 359.79, 358.79, 357.78, 356.78, 355.77, 354.77, 353.76, 352.76, 351.75, 350.75, 349.74, 348.74, 347.73, 346.73, 345.72, 344.72, 343.71, 342.71, 341.7, 340.7, 339.69, 338.69, 337.68, 336.68, 335.67, 334.67, 333.66, 332.66, 331.65, 330.65, 329.64, 328.64, 327.63, 326.63, 325.62, 324.62, 323.61, 322.61, 321.6, 320.6, 319.59, 318.59, 317.58, 316.58, 315.57, 314.57, 313.56, 312.56, 311.55, 310.55, 309.54, 308.54, 307.53, 306.53, 305.52, 304.52, 303.51, 302.51, 301.5, 300.5, 299.49, 298.49, 297.48, 296.48, 295.47, 294.47, 293.46, 292.46, 291.45, 290.45, 289.44, 288.44, 287.43, 286.43, 285.42, 284.42, 283.41, 282.41, 281.4, 280.4, 279.39, 

In [19]:
# init selflabels randomly
if hc == 1:
    selflabels = np.zeros(N, dtype=np.int32)
    for qq in range(N):
        selflabels[qq] = qq % ncl
    selflabels = np.random.permutation(selflabels)
    selflabels = torch.LongTensor(selflabels).to(device)
else:
    selflabels = np.zeros((hc, N), dtype=np.int32)
    for nh in range(hc):
        for _i in range(N):
            selflabels[nh, _i] = _i % numc[nh]
        selflabels[nh] = np.random.permutation(selflabels[nh])
    selflabels = torch.LongTensor(selflabels).to(device)

In [20]:
# optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
model = model.to(device)
criterion = nn.CrossEntropyLoss()

In [21]:
writer = SummaryWriter(f'./runs/{dataset_name}')

## Training! 
Takes a couple of minutes per epoch

In [22]:
def my_kNN(net, K, sigma=0.1, dim=128, use_pca=False):
    net.eval()
    # this part is ugly but made to be backwards-compatible. there was a change in cifar dataset's structure.
    trainLabels = y_train
    LEN = N
    C = trainLabels.max() + 1

    trainFeatures = torch.zeros((magic_dim, LEN))  # , device='cuda:0') # dim
    normalize = Normalize()
    for batch_idx, (inputs, targets, _) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=False)):
        batchSize = batch_size
        inputs = inputs.cuda()
        features = net(inputs.float())
        if not use_pca:
            features = normalize(features)
        trainFeatures[:, batch_idx * batchSize:batch_idx * batchSize + batchSize] = features.data.t().cpu()
        
    if use_pca:
        comps = 4
        print('doing PCA with %s components'%comps, end=' ')
        from sklearn.decomposition import PCA
        pca = PCA(n_components=comps, whiten=False)
        trainFeatures = pca.fit_transform(trainFeatures.numpy().T)
        trainFeatures = torch.Tensor(trainFeatures)
        trainFeatures = normalize(trainFeatures).t()
        print('..done')
    def eval_k_s(K_,sigma_):
        total = 0
        top1 = 0.
#         top5 = 0.

        with torch.no_grad():
            retrieval_one_hot = torch.zeros(K_, C)# .cuda()
            for batch_idx, (inputs, targets, _) in enumerate(iterate_minibatches(X_test, y_test, batch_size, shuffle=False)):
                targets = targets # .cuda(async=True) # or without async for py3.7
                inputs = inputs.cuda()
                batchSize = batch_size
                features = net(inputs)
                if use_pca:
                    features = pca.transform(features.cpu().numpy())
                    features = torch.Tensor(features).cuda()
                features = normalize(features).cpu()

                dist = torch.mm(features, trainFeatures)

                yd, yi = dist.topk(K_, dim=1, largest=True, sorted=True)
                candidates = trainLabels.view(1, -1).expand(batchSize, -1)
                retrieval = torch.gather(candidates, 1, yi).long()

                retrieval_one_hot.resize_(batchSize * K_, C).zero_()
                retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1.)
                
                yd_transform = yd.clone().div_(sigma_).exp_()
                probs = torch.sum(torch.mul(retrieval_one_hot.view(batchSize, -1, C),
                                            yd_transform.view(batchSize, -1, 1)),
                                  1)
                _, predictions = probs.sort(1, True)

                # Find which predictions match the target
                correct = predictions.eq(targets.data.view(-1, 1))

                top1 = top1 + correct.narrow(1, 0, 1).sum().item()
#                 top5 = top5 + correct.narrow(1, 0, 5).sum().item()

                total += targets.size(0)

        print(f"{K_}-NN,s={sigma_}: TOP1: ", top1 * 100. / total)
        return top1 / total

    if isinstance(K, list):
        res = []
        for K_ in K:
            for sigma_ in sigma:
                res.append(eval_k_s(K_, sigma_))
        return res
    else:
        res = eval_k_s(K, sigma)
        return res

In [23]:
import time

start = time.time()
for epoch in range(start_epoch, start_epoch + epochs):
    selflabels = train(epoch, selflabels)
    feature_return_switch(model, True)
    
    acc = my_kNN(model, K=10, sigma=0.1, dim=knn_dim)
    feature_return_switch(model, False)
#     writer.add_scalar("accuracy kNN", acc, epoch)
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'acc': acc,
            'epoch': epoch,
            'opt': optimizer.state_dict(),
            'L': selflabels,
        }
        if not os.path.isdir(exp):
            os.mkdir(exp)
        torch.save(state, '%s/best_ckpt.t7' % (exp))
        best_acc = acc
    if epoch % 100 == 0:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'opt': optimizer.state_dict(),
            'acc': acc,
            'epoch': epoch,
            'L': selflabels,
        }
        if not os.path.isdir(exp):
            os.mkdir(exp)
        torch.save(state, '%s/ep%s.t7' % (exp, epoch))
    if epoch % 50 == 0:
        feature_return_switch(model, True)
        acc = my_kNN(model, K=[50, 10], sigma=[0.1, 0.5], dim=knn_dim, use_pca=True)
        i = 0
#         for num_nn in [50, 10]:
#             for sig in [0.1, 0.5]:
#                 writer.add_scalar('knn%s-%s' % (num_nn, sig), acc[i], epoch)
#                 i += 1
        feature_return_switch(model, False)
    print('best accuracy: {:.2f}'.format(best_acc * 100))
end = time.time()

checkpoint = torch.load('%s'%exp+'/best_ckpt.t7' )
model.load_state_dict(checkpoint['net'])
feature_return_switch(model, True)
acc = my_kNN(model, K=10, sigma=0.1, dim=knn_dim, use_pca=True)


Epoch: 0
VGG
error:  0.0016160950934341889 step  1
cost:  2.6250176852413056
opt took 0.00min,    1iters
Epoch: [0][0/4]Time: 0.260 (0.260) Data: 0.176 (0.176) Loss: 2.6378 (2.6378)
10-NN,s=0.1: TOP1:  20.05
Saving..
Saving..
doing PCA with 4 components ..done
50-NN,s=0.1: TOP1:  20.5
50-NN,s=0.5: TOP1:  20.5
10-NN,s=0.1: TOP1:  16.85
10-NN,s=0.5: TOP1:  16.85
best accuracy: 20.05

Epoch: 1
VGG
Epoch: [1][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 2.4578 (2.4578)
error:  0.0 step  11
cost:  2.5290816467742885
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  27.8
Saving..
best accuracy: 27.80

Epoch: 2
VGG
Epoch: [2][0/4]Time: 0.067 (0.067) Data: 0.001 (0.001) Loss: 2.4231 (2.4231)
error:  1.532107773982716e-14 step  11
cost:  2.312843616378358
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  23.95
best accuracy: 27.80

Epoch: 3
VGG
Epoch: [3][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 2.1260 (2.1260)
error:  0.0 step  11
cost:  2.2880568192096473
opt took 0.00min,   11iter

error:  2.395861287141088e-13 step  11
cost:  0.6342434987506834
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  36.65
Saving..
best accuracy: 36.65

Epoch: 36
VGG
Epoch: [36][0/4]Time: 0.068 (0.068) Data: 0.001 (0.001) Loss: 0.6428 (0.6428)
error:  3.0531133177191805e-14 step  11
cost:  0.6426954870787297
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  36.8
Saving..
best accuracy: 36.80

Epoch: 37
VGG
Epoch: [37][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 0.6414 (0.6414)
error:  1.149080830487037e-13 step  11
cost:  0.67711290457048
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  39.3
Saving..
best accuracy: 39.30

Epoch: 38
VGG
Epoch: [38][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 0.6438 (0.6438)
error:  0.0 step  11
cost:  0.6829747188678573
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  39.6
Saving..
best accuracy: 39.60

Epoch: 39
VGG
Epoch: [39][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 0.6292 (0.6292)
error:  0.0 step  11
cost:  0.6689291314163356
opt too

cost:  0.6244876784948751
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  26.5
best accuracy: 39.60

Epoch: 71
VGG
Epoch: [71][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 0.6895 (0.6895)
error:  1.3988810110276972e-13 step  11
cost:  0.619989612310105
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  25.95
best accuracy: 39.60

Epoch: 72
VGG
Epoch: [72][0/4]Time: 0.068 (0.068) Data: 0.001 (0.001) Loss: 0.6959 (0.6959)
error:  0.0 step  11
cost:  0.6624748424260856
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  29.45
best accuracy: 39.60

Epoch: 73
VGG
Epoch: [73][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 0.6677 (0.6677)
error:  2.036149027162537e-13 step  11
cost:  0.6953594784403954
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  36.25
best accuracy: 39.60

Epoch: 74
VGG
Epoch: [74][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 0.6628 (0.6628)
error:  0.0 step  11
cost:  0.6803345531513031
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  36.55
best accuracy: 39.60

Epo

cost:  0.6584738592543026
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  38.75
best accuracy: 40.95

Epoch: 106
VGG
Epoch: [106][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 0.6258 (0.6258)
error:  0.0 step  11
cost:  0.6909156422257584
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  36.5
best accuracy: 40.95

Epoch: 107
VGG
Epoch: [107][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 0.6995 (0.6995)
error:  4.6629367034256575e-14 step  11
cost:  0.7233143544905271
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  36.75
best accuracy: 40.95

Epoch: 108
VGG
Epoch: [108][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 0.7013 (0.7013)
error:  1.028066520802895e-13 step  11
cost:  0.6855696571644329
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  38.4
best accuracy: 40.95

Epoch: 109
VGG
Epoch: [109][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 0.7269 (0.7269)
error:  1.412203687323199e-13 step  11
cost:  0.6913522555703135
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  37.8


10-NN,s=0.1: TOP1:  11.0
best accuracy: 40.95

Epoch: 142
VGG
error:  2.489120021209601e-13 step  11
cost:  0.6689005990103979
opt took 0.00min,   11iters
Epoch: [142][0/4]Time: 0.217 (0.217) Data: 0.151 (0.151) Loss: 0.7304 (0.7304)
10-NN,s=0.1: TOP1:  10.0
best accuracy: 40.95

Epoch: 143
VGG
error:  1.8463008899516353e-13 step  11
cost:  0.6912774000667296
opt took 0.00min,   11iters
Epoch: [143][0/4]Time: 0.250 (0.250) Data: 0.185 (0.185) Loss: 0.7261 (0.7261)
10-NN,s=0.1: TOP1:  11.7
best accuracy: 40.95

Epoch: 144
VGG
error:  0.0 step  11
cost:  0.684969744323886
opt took 0.00min,   11iters
Epoch: [144][0/4]Time: 0.236 (0.236) Data: 0.170 (0.170) Loss: 0.7157 (0.7157)
10-NN,s=0.1: TOP1:  12.3
best accuracy: 40.95

Epoch: 145
VGG
error:  0.0 step  11
cost:  0.7035568368837732
opt took 0.00min,   11iters
Epoch: [145][0/4]Time: 0.221 (0.221) Data: 0.155 (0.155) Loss: 0.7789 (0.7789)
10-NN,s=0.1: TOP1:  12.2
best accuracy: 40.95

Epoch: 146
VGG
error:  0.0 step  11
cost:  0.67829372

10-NN,s=0.1: TOP1:  40.5
best accuracy: 40.95

Epoch: 177
VGG
0.001
error:  1.8418599978531347e-13 step  11
cost:  0.6653904488395175
opt took 0.00min,   11iters
Epoch: [177][0/4]Time: 0.221 (0.221) Data: 0.155 (0.155) Loss: 0.7092 (0.7092)
10-NN,s=0.1: TOP1:  38.65
best accuracy: 40.95

Epoch: 178
VGG
0.001
error:  1.766364832178624e-13 step  11
cost:  0.6797773152678519
opt took 0.00min,   11iters
Epoch: [178][0/4]Time: 0.224 (0.224) Data: 0.157 (0.157) Loss: 0.7419 (0.7419)
10-NN,s=0.1: TOP1:  26.1
best accuracy: 40.95

Epoch: 179
VGG
0.001
error:  4.030109579389318e-13 step  11
cost:  0.6839972140896621
opt took 0.00min,   11iters
Epoch: [179][0/4]Time: 0.227 (0.227) Data: 0.161 (0.161) Loss: 0.7355 (0.7355)
10-NN,s=0.1: TOP1:  25.35
best accuracy: 40.95

Epoch: 180
VGG
0.001
error:  0.0 step  11
cost:  0.7222781155074196
opt took 0.00min,   11iters
Epoch: [180][0/4]Time: 0.228 (0.228) Data: 0.157 (0.157) Loss: 0.7589 (0.7589)
10-NN,s=0.1: TOP1:  24.5
best accuracy: 40.95

Epoch: 1

error:  0.0 step  11
cost:  0.7121632304939135
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  36.45
best accuracy: 40.95

Epoch: 212
VGG
0.001
Epoch: [212][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 0.6904 (0.6904)
error:  3.1530333899354446e-14 step  11
cost:  0.6526241944778554
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  36.9
best accuracy: 40.95

Epoch: 213
VGG
0.001
Epoch: [213][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 0.6738 (0.6738)
error:  2.3281376826389533e-13 step  11
cost:  0.6944003641972
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  35.0
best accuracy: 40.95

Epoch: 214
VGG
0.001
Epoch: [214][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 0.6953 (0.6953)
error:  2.8310687127941492e-14 step  11
cost:  0.7019728774820804
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  33.8
best accuracy: 40.95

Epoch: 215
VGG
0.001
Epoch: [215][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 0.6693 (0.6693)
error:  7.549516567451064e-14 step  11
cost:  0.640489407

10-NN,s=0.1: TOP1:  24.05
best accuracy: 40.95

Epoch: 247
VGG
0.0001
Epoch: [247][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 1.0893 (1.0893)
error:  9.64783808399261e-14 step  11
cost:  0.7444711273230293
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  25.95
best accuracy: 40.95

Epoch: 248
VGG
0.0001
Epoch: [248][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 1.1261 (1.1261)
error:  2.853273173286652e-14 step  11
cost:  0.6825778054346366
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  26.75
best accuracy: 40.95

Epoch: 249
VGG
0.0001
Epoch: [249][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 1.0949 (1.0949)
error:  2.277067423506196e-13 step  11
cost:  0.6703055665735815
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  30.35
best accuracy: 40.95

Epoch: 250
VGG
0.0001
Epoch: [250][0/4]Time: 0.066 (0.066) Data: 0.002 (0.002) Loss: 1.0899 (1.0899)
error:  0.0 step  11
cost:  0.6660086323749715
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  32.85
doing PCA with 4 componen

error:  0.0 step  11
cost:  0.6551286564094382
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  22.4
best accuracy: 40.95

Epoch: 282
VGG
0.0001
Epoch: [282][0/4]Time: 0.066 (0.066) Data: 0.002 (0.002) Loss: 1.0161 (1.0161)
error:  0.0 step  11
cost:  0.6914690867352843
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  22.2
best accuracy: 40.95

Epoch: 283
VGG
0.0001
Epoch: [283][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 0.9827 (0.9827)
error:  1.7508217098338719e-13 step  11
cost:  0.6885741799918073
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  22.0
best accuracy: 40.95

Epoch: 284
VGG
0.0001
Epoch: [284][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 1.0232 (1.0232)
error:  2.360334150353083e-13 step  11
cost:  0.6875938829768439
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  22.2
best accuracy: 40.95

Epoch: 285
VGG
0.0001
Epoch: [285][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 0.9948 (0.9948)
error:  0.0 step  11
cost:  0.6915448305258277
opt took 0.00min,   11it

error:  0.0 step  11
cost:  0.6798805991268642
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  25.75
best accuracy: 40.95

Epoch: 316
VGG
0.0001
Epoch: [316][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 1.0145 (1.0145)
error:  0.0 step  11
cost:  0.7195649874819392
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  26.15
best accuracy: 40.95

Epoch: 317
VGG
0.0001
Epoch: [317][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 1.0090 (1.0090)
error:  0.0 step  11
cost:  0.7183577533674365
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  26.15
best accuracy: 40.95

Epoch: 318
VGG
0.0001
Epoch: [318][0/4]Time: 0.065 (0.065) Data: 0.001 (0.001) Loss: 1.0372 (1.0372)
10-NN,s=0.1: TOP1:  26.55
best accuracy: 40.95

Epoch: 319
VGG
0.0001
error:  1.482147737874584e-13 step  11
cost:  0.7081349374414078
opt took 0.00min,   11iters
Epoch: [319][0/4]Time: 0.230 (0.230) Data: 0.159 (0.159) Loss: 0.9844 (0.9844)
10-NN,s=0.1: TOP1:  25.4
best accuracy: 40.95

Epoch: 320
VGG
1.0000000000000003e-05
e

10-NN,s=0.1: TOP1:  28.7
best accuracy: 40.95

Epoch: 349
VGG
1.0000000000000003e-05
error:  0.0 step  11
cost:  0.6983430524417416
opt took 0.00min,   11iters
Epoch: [349][0/4]Time: 0.219 (0.219) Data: 0.152 (0.152) Loss: 1.0687 (1.0687)
10-NN,s=0.1: TOP1:  28.75
best accuracy: 40.95

Epoch: 350
VGG
1.0000000000000003e-05
error:  3.375077994860476e-14 step  11
cost:  0.6828419967560178
opt took 0.00min,   11iters
Epoch: [350][0/4]Time: 0.220 (0.220) Data: 0.155 (0.155) Loss: 1.0662 (1.0662)
10-NN,s=0.1: TOP1:  28.7
doing PCA with 4 components ..done
50-NN,s=0.1: TOP1:  27.05
50-NN,s=0.5: TOP1:  27.05
10-NN,s=0.1: TOP1:  23.85
10-NN,s=0.5: TOP1:  23.9
best accuracy: 40.95

Epoch: 351
VGG
1.0000000000000003e-05
error:  1.1080025785759062e-13 step  11
cost:  0.7270919136459121
opt took 0.00min,   11iters
Epoch: [351][0/4]Time: 0.234 (0.234) Data: 0.167 (0.167) Loss: 1.0551 (1.0551)
10-NN,s=0.1: TOP1:  28.85
best accuracy: 40.95

Epoch: 352
VGG
1.0000000000000003e-05
error:  1.65312208366

error:  7.038813976123492e-14 step  11
cost:  0.7194679252721403
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  28.2
best accuracy: 40.95

Epoch: 382
VGG
1.0000000000000003e-05
Epoch: [382][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 1.0549 (1.0549)
error:  4.884981308350689e-14 step  11
cost:  0.6710563827356143
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  27.65
best accuracy: 40.95

Epoch: 383
VGG
1.0000000000000003e-05
Epoch: [383][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 1.0213 (1.0213)
error:  0.0 step  11
cost:  0.6726559095792009
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  28.85
best accuracy: 40.95

Epoch: 384
VGG
1.0000000000000003e-05
Epoch: [384][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 1.0377 (1.0377)
error:  0.0 step  11
cost:  0.7073445535633903
opt took 0.00min,   11iters
10-NN,s=0.1: TOP1:  29.15
best accuracy: 40.95

Epoch: 385
VGG
1.0000000000000003e-05
Epoch: [385][0/4]Time: 0.066 (0.066) Data: 0.001 (0.001) Loss: 1.0167 (1.0167)
error

In [24]:
print (end-start)

335.8529739379883
