## Imports

In [50]:
import sys
import os
import time
import random
import math
import numpy as np
from scipy.special import logsumexp

import torch
import torch.nn as nn
import torch.optim as optim
from tensorboardX import SummaryWriter


from utils import kNN, AverageMeter, py_softmax

## Training parameters

In [79]:
#data
datadir = "/root/data/Multivariate_arff"

# optimization
lamb = 10      # SK lambda-parameter
nopts = 400    # number of SK-optimizations
epochs = 30   # numbers of epochs
exp = './resnet1d_exp' # experiments results dir


# other
devc='0'  # cuda device
batch_size = 300
lr=0.03     #learning rate
alr=0.03    #starting learning rate

knn_dim = 10
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch


In [52]:
device = torch.device('cuda:' + devc) if torch.cuda.is_available() else torch.device('cpu')
print(f"GPU device: {torch.cuda.current_device()}")

GPU device: 0


## Model parameters

In [53]:
ncl=6       # number of clusters

numc = [i for i in range(6,36,3)]

hc=len(numc)      # number of heads

# # (number of filters, kernel size, stride, pad) for AlexNet, two vesions
# CFG = {
#     'big': [(96, 11, 4, 2), 'M', (256, 5, 1, 2), 'M', (384, 3, 1, 1), (384, 3, 1, 1), (256, 3, 1, 1), 'M'],
#     'small': [(64, 11, 4, 2), 'M', (192, 5, 1, 2), 'M', (384, 3, 1, 1), (256, 3, 1, 1), (256, 3, 1, 1), 'M']
# }
numc

[6, 9, 12, 15, 18, 21, 24, 27, 30, 33]

## Data Preparation

In [54]:
import pandas as pd
import numpy as np
from scipy.io import arff
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing

from tqdm import tqdm
import os
from datetime import datetime

In [55]:
def load_file(filepath):
    data = arff.loadarff(filepath)
    data = pd.DataFrame(data[0])
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]
    return X.values, y.values

def load_group(prefix, filenames): 
    loaded = []
    for name in filenames: 
        X, y = load_file(prefix + "/" + name) 
        loaded.append(X)
    # stack group so that features are the 3rd dimension 
    loaded = np.dstack(loaded)
    return loaded, y

def load_dataset_group(folder_path, ds_path, dims_num, is_train=True, label_enc=False): 
    filenames = []
    if is_train:
        postfix = "_TRAIN.arff"
    else:
        postfix = "_TEST.arff"
    for dim_num in range(1, dims_num + 1):
        filenames.append(ds_path + str(dim_num) + postfix)

    X, y = load_group(folder_path, filenames)
    X = torch.from_numpy(np.array(X, dtype=np.float64))
    if label_enc:
        le = preprocessing.LabelEncoder()
        y = le.fit_transform(y)
        y = torch.from_numpy(np.array(y, dtype=np.int32))
    else:
        y = torch.from_numpy(np.array(y, dtype=np.int32)) - 1
    X = X.transpose(1, 2)
    return X, y

def load_dataset(folder_path, ds_path, dims_num, label_enc=False): 
    X_train, y_train = load_dataset_group(folder_path, ds_path, dims_num, 
                                          is_train=True, label_enc=label_enc) 
    X_test, y_test = load_dataset_group(folder_path, ds_path, dims_num, 
                                        is_train=False, label_enc=label_enc)
    X_train = F.normalize(X_train, dim=1)
    X_test = F.normalize(X_test, dim=1)
    return X_train, y_train, X_test, y_test

# from tqdm import trange
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.random.permutation(len(inputs))
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt], excerpt

In [56]:


ds_path = "LSST/LSSTDimension"
dims_num = 6
num_classes = 14
magic_dim = 2304

X_train, y_train, X_test, y_test = load_dataset(datadir, ds_path, dims_num)
# X_train[0], y_train
print("X_train.shape:", X_train.shape, "\ny_train.shape:", y_train.shape)
print("X_test.shape:", X_test.shape, "\ny_test.shape:", y_test.shape)

X_train.shape: torch.Size([2459, 6, 36]) 
y_train.shape: torch.Size([2459])
X_test.shape: torch.Size([2466, 6, 36]) 
y_test.shape: torch.Size([2466])


In [57]:
N = X_train.shape[0]
N

2459

In [58]:
X_train.shape

torch.Size([2459, 6, 36])

## Model, ResNet

In [59]:
import torch.nn as nn
import math

__all__ = ['resnetv1','resnetv1_18']

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv1d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

class Normalize(nn.Module):
    def __init__(self, power=2):
        super(Normalize, self).__init__()
        self.power = power

    def forward(self, x):
        norm = x.pow(self.power).sum(1, keepdim=True).pow(1. / self.power)
        out = x.div(norm)
        return out

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm1d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm1d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv1d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm1d(planes)
        self.conv2 = nn.Conv1d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(planes)
        self.conv3 = nn.Conv1d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm1d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, in_channel=3, width=1, num_classes=[1000]):
        self.inplanes = 16
        super(ResNet, self).__init__()
        self.headcount = len(num_classes)
        self.base = int(16 * width)
        self.features = nn.Sequential(*[                                                     # [100, 8, 18]
                            nn.Conv1d(in_channel, 16, kernel_size=3, padding=1, bias=False), # [100, 16, 36]
                            nn.BatchNorm1d(16),
                            nn.ReLU(inplace=True),
                            self._make_layer(block, self.base, layers[0]),                   # [100, 16, 36]
                            self._make_layer(block, self.base * 2, layers[1]),               # [100, 32, 36]
                            self._make_layer(block, self.base * 4, layers[2]),               # [100, 64, 36]
                            self._make_layer(block, self.base * 8, layers[3]),               # [100, 128, 36]
                            nn.AvgPool1d(2),                                                 # [100, 128, 18]
        ])
        self.return_features = False
    
        if len(num_classes) == 1:
            self.top_layer = nn.Sequential(nn.Linear(magic_dim, num_classes[0]))
        else:
            for a, i in enumerate(num_classes):
                setattr(self, "top_layer%d" % a, nn.Linear(magic_dim, i))
            self.top_layer = None
        for m in self.features.modules():
            if isinstance(m, nn.Conv1d):
                n = m.kernel_size[0] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv1d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.features(x.float())
        out = out.view(out.size(0), -1)
        if self.headcount == 1:
            if self.top_layer:
                out = self.top_layer(out)
            return out
        else:
            outp = []
            for i in range(self.headcount):
                outp.append(getattr(self, "top_layer%d" % i)(out))
            return outp

def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    return model

def resnetv1_18(num_classes=[1000]):
    """Encoder for instance discrimination and MoCo"""
    return resnet18(num_classes=num_classes)

## Sinkhorn-Knopp optimization

In [61]:
def optimize_L_sk(PS):
    N, K = PS.shape
    tt = time.time()
    PS = PS.T  # now it is K x N
    r = np.ones((K, 1)) / K
    c = np.ones((N, 1)) / N
    PS **= lamb  # K x N
    inv_K = 1. / K
    inv_N = 1. / N
    err = 1e3
    _counter = 0
    while err > 1e-3:
        r = inv_K / (PS @ c)  # (KxN)@(N,1) = K x 1
        c_new = inv_N / (r.T @ PS).T  # ((1,K)@(KxN)).t() = N x 1
        if _counter % 10 == 0:
            err = np.nansum(np.abs(c / c_new - 1))
        c = c_new
        _counter += 1
        
    print("error: ", err, 'step ', _counter, flush=True)  # " nonneg: ", sum(I), flush=True)
    # inplace calculations.
    PS *= np.squeeze(c)
    PS = PS.T
    PS *= np.squeeze(r)
    PS = PS.T
    argmaxes = np.nanargmax(PS, 0)  # size N
    newL = torch.LongTensor(argmaxes)
    selflabels = newL.to(device)
    PS = PS.T
    PS /= np.squeeze(r)
    PS = PS.T
    PS /= np.squeeze(c)
    sol = PS[argmaxes, np.arange(N)]
    np.log(sol, sol)
    cost = -(1. / lamb) * np.nansum(sol) / N
    print('cost: ', cost, flush=True)
    print('opt took {0:.2f}min, {1:4d}iters'.format(((time.time() - tt) / 60.), _counter), flush=True)
    return cost, selflabels

def opt_sk(model, selflabels_in, epoch):
    if hc == 1:
        PS = np.zeros((N, ncl))
    else:
        PS_pre = np.zeros((N, magic_dim)) # knn_dim
    
    for batch_idx, (data, _, _selected) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=True)):
        data = data.to(device)#cuda()
        if hc == 1:
            p = nn.functional.softmax(model(data), 1)
            PS[_selected, :] = p.detach().cpu().numpy()
        else:
            p = model(data.float())
            PS_pre[_selected, :] = p.detach().cpu().numpy()
    if hc == 1:
        cost, selflabels = optimize_L_sk(PS)
        _costs = [cost]
    else:
        _nmis = np.zeros(hc)
        _costs = np.zeros(hc)
        nh = epoch % hc  # np.random.randint(args.hc)
        print("computing head %s " % nh, end="\r", flush=True)
        tl = getattr(model, "top_layer%d" % nh)
        # do the forward pass:
        PS = (PS_pre @ tl.weight.cpu().numpy().T
                   + tl.bias.cpu().numpy())
        PS = py_softmax(PS, 1)
        c, selflabels_ = optimize_L_sk(PS)
        _costs[nh] = c
        selflabels_in[nh] = selflabels_
        selflabels = selflabels_in
    return selflabels

## Training utils

In [62]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = alr
    if epochs == 200:
        if epoch >= 80:
            lr = alr * (0.1 ** ((epoch - 80) // 40))  # i.e. 120, 160
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 400:
        if epoch >= 160:
            lr = alr * (0.1 ** ((epoch - 160) // 80))  # i.e. 240,320
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 800:
        if epoch >= 320:
            lr = alr * (0.1 ** ((epoch - 320) // 160))  # i.e. 480, 640
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 1600:
        if epoch >= 640:
            lr = alr * (0.1 ** ((epoch - 640) // 320))
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

In [63]:
def feature_return_switch(model, bool=True):
    """
    switch between network output or conv5features
        if True: changes switch s.t. forward pass returns post-conv5 features
        if False: changes switch s.t. forward will give full network output
    """
    if bool:
        model.headcount = 1
    else:
        model.headcount = hc
    model.return_feature = bool

In [64]:
def train(epoch, selflabels):
    print('\nEpoch: %d' % epoch)
    print(name)
    adjust_learning_rate(optimizer, epoch)
    train_loss = AverageMeter()
    data_time = AverageMeter()
    batch_time = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    
    for batch_idx, (inputs, targets, indexes) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=True)):
        inputs = inputs.float().to(device)
        niter = epoch * N + batch_idx
        if len(optimize_times) > 0 and niter * batch_size >= optimize_times[-1]:
            with torch.no_grad():
                _ = optimize_times.pop()
                if hc >1:
                    feature_return_switch(model, True)
                selflabels = opt_sk(model, selflabels, epoch)
                if hc >1:
                    feature_return_switch(model, False)
        data_time.update(time.time() - end)
        inputs, targets = inputs.to(device), targets.to(device)#, indexes.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        if hc == 1:
            loss = criterion(outputs, selflabels[indexes])
        else:
            loss = torch.mean(torch.stack([criterion(outputs[h],
                                                     selflabels[h, indexes]) for h in range(hc)]))

        loss.backward()
        optimizer.step()

        train_loss.update(loss.item(), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if True:
#         if batch_idx % 10 == 0:
            print('Epoch: [{}][{}/{}]'
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'Data: {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})'.format(
                epoch, batch_idx, N, batch_time=batch_time, data_time=data_time, train_loss=train_loss))
#             writer.add_scalar("loss", loss.item(), batch_idx*512 +epoch*len(trainloader.dataset))
    return selflabels

In [65]:
def my_kNN(net, K, sigma=0.1, dim=128, use_pca=False):
    net.eval()
    # this part is ugly but made to be backwards-compatible. there was a change in cifar dataset's structure.
    trainLabels = y_train
    LEN = N
    C = trainLabels.max() + 1

    trainFeatures = torch.zeros((magic_dim, LEN))  # , device='cuda:0') # dim
    normalize = Normalize()
    for batch_idx, (inputs, targets, _) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=False)):
        batchSize = batch_size
        inputs = inputs.cuda()
        features = net(inputs.float())
        if not use_pca:
            features = normalize(features)
        tmp = trainFeatures[:, batch_idx * batchSize:batch_idx * batchSize + batchSize]
        trainFeatures[:, batch_idx * batchSize:batch_idx * batchSize + batchSize] = features.data.t().cpu()
        
    if use_pca:
        comps = 128
        print('doing PCA with %s components'%comps, end=' ')
        from sklearn.decomposition import PCA
        pca = PCA(n_components=comps, whiten=False)
        trainFeatures = pca.fit_transform(trainFeatures.numpy().T)
        trainFeatures = torch.Tensor(trainFeatures)
        trainFeatures = normalize(trainFeatures).t()
        print('..done')
    def eval_k_s(K_,sigma_):
        total = 0
        top1 = 0.
        top5 = 0.

        with torch.no_grad():
            retrieval_one_hot = torch.zeros(K_, C)# .cuda()
            for batch_idx, (inputs, targets, _) in enumerate(iterate_minibatches(X_test, y_test, batch_size, shuffle=False)):
                targets = targets # .cuda(async=True) # or without async for py3.7
                inputs = inputs.cuda()
                batchSize = batch_size
                features = net(inputs)
                if use_pca:
                    features = pca.transform(features.cpu().numpy())
                    features = torch.Tensor(features).cuda()
                features = normalize(features).cpu()

                dist = torch.mm(features, trainFeatures)

                yd, yi = dist.topk(K_, dim=1, largest=True, sorted=True)
                candidates = trainLabels.view(1, -1).expand(batchSize, -1)
                retrieval = torch.gather(candidates, 1, yi).long()

                retrieval_one_hot.resize_(batchSize * K_, C).zero_()
                retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1.)
                
                yd_transform = yd.clone().div_(sigma_).exp_()
                probs = torch.sum(torch.mul(retrieval_one_hot.view(batchSize, -1, C),
                                            yd_transform.view(batchSize, -1, 1)),
                                  1)
                _, predictions = probs.sort(1, True)

                # Find which predictions match the target
                correct = predictions.eq(targets.data.view(-1, 1))

                top1 = top1 + correct.narrow(1, 0, 1).sum().item()
                top5 = top5 + correct.narrow(1, 0, 5).sum().item()

                total += targets.size(0)

        print(f"{K_}-NN,s={sigma_}: TOP1: ", top1 * 100. / total)
        return top1 / total

    if isinstance(K, list):
        res = []
        for K_ in K:
            for sigma_ in sigma:
                res.append(eval_k_s(K_, sigma_))
        return res
    else:
        res = eval_k_s(K, sigma)
        return res

## Model initialization

In [75]:
model = resnet18(num_classes=numc, in_channel=dims_num)

In [80]:
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=5e-4)
model = model.to(device)
criterion = nn.CrossEntropyLoss()

In [74]:
optimize_times = ((epochs + 1.0001)*N*(np.linspace(0, 1, nopts))[::-1]).tolist()
optimize_times = [(epochs +10)*N] + optimize_times
print('We will optimize L at epochs:', [np.round(1.0*t/N, 2) for t in optimize_times], flush=True)

We will optimize L at epochs: [40.0, 31.0, 30.92, 30.84, 30.77, 30.69, 30.61, 30.53, 30.46, 30.38, 30.3, 30.22, 30.15, 30.07, 29.99, 29.91, 29.83, 29.76, 29.68, 29.6, 29.52, 29.45, 29.37, 29.29, 29.21, 29.14, 29.06, 28.98, 28.9, 28.82, 28.75, 28.67, 28.59, 28.51, 28.44, 28.36, 28.28, 28.2, 28.13, 28.05, 27.97, 27.89, 27.81, 27.74, 27.66, 27.58, 27.5, 27.43, 27.35, 27.27, 27.19, 27.12, 27.04, 26.96, 26.88, 26.8, 26.73, 26.65, 26.57, 26.49, 26.42, 26.34, 26.26, 26.18, 26.11, 26.03, 25.95, 25.87, 25.79, 25.72, 25.64, 25.56, 25.48, 25.41, 25.33, 25.25, 25.17, 25.1, 25.02, 24.94, 24.86, 24.78, 24.71, 24.63, 24.55, 24.47, 24.4, 24.32, 24.24, 24.16, 24.09, 24.01, 23.93, 23.85, 23.77, 23.7, 23.62, 23.54, 23.46, 23.39, 23.31, 23.23, 23.15, 23.08, 23.0, 22.92, 22.84, 22.76, 22.69, 22.61, 22.53, 22.45, 22.38, 22.3, 22.22, 22.14, 22.07, 21.99, 21.91, 21.83, 21.75, 21.68, 21.6, 21.52, 21.44, 21.37, 21.29, 21.21, 21.13, 21.06, 20.98, 20.9, 20.82, 20.74, 20.67, 20.59, 20.51, 20.43, 20.36, 20.28, 20.2

In [77]:
# init selflabels randomly
if hc == 1:
    selflabels = np.zeros(N, dtype=np.int32)
    for qq in range(N):
        selflabels[qq] = qq % ncl
    selflabels = np.random.permutation(selflabels)
    selflabels = torch.LongTensor(selflabels).to(device)
else:
    selflabels = np.zeros((hc, N), dtype=np.int32)
    for nh in range(hc):
        for _i in range(N):
            selflabels[nh, _i] = _i % numc[nh]
        selflabels[nh] = np.random.permutation(selflabels[nh])
    selflabels = torch.LongTensor(selflabels).to(device)

In [78]:
name = "ResNet1D"
writer = SummaryWriter(f'./runs/ERing/{name}')

## Training! 
Takes a couple of minutes per epoch

In [81]:
import time

start = time.time()
for epoch in range(start_epoch, start_epoch + epochs):
    selflabels = train(epoch, selflabels)
    feature_return_switch(model, True)
    
    acc = my_kNN(model, K=10, sigma=0.1, dim=knn_dim)
    feature_return_switch(model, False)
#     writer.add_scalar("accuracy kNN", acc, epoch)
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'acc': acc,
            'epoch': epoch,
            'opt': optimizer.state_dict(),
            'L': selflabels,
        }
        if not os.path.isdir(exp):
            os.mkdir(exp)
        torch.save(state, '%s/best_ckpt.t7' % (exp))
        best_acc = acc
    if epoch % 100 == 0:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'opt': optimizer.state_dict(),
            'acc': acc,
            'epoch': epoch,
            'L': selflabels,
        }
        if not os.path.isdir(exp):
            os.mkdir(exp)
        torch.save(state, '%s/ep%s.t7' % (exp, epoch))
    if epoch % 50 == 0:
        feature_return_switch(model, True)
        acc = my_kNN(model, K=[50, 10], sigma=[0.1, 0.5], dim=knn_dim, use_pca=True)
        i = 0
#         for num_nn in [50, 10]:
#             for sig in [0.1, 0.5]:
#                 writer.add_scalar('knn%s-%s' % (num_nn, sig), acc[i], epoch)
#                 i += 1
        feature_return_switch(model, False)
    print('best accuracy: {:.2f}'.format(best_acc * 100))
end = time.time()

checkpoint = torch.load('%s'%exp+'/best_ckpt.t7' )
model.load_state_dict(checkpoint['net'])
feature_return_switch(model, True)
acc = my_kNN(model, K=10, sigma=0.1, dim=knn_dim, use_pca=True)


Epoch: 0
ResNet1D
error:  0.00014390381574547195 step  41
cost:  1.422989671386523
opt took 0.00min,   41iters
Epoch: [0][0/2459]Time: 0.183 (0.183) Data: 0.131 (0.131) Loss: 2.9777 (2.9777)
error:  0.00011761677591848319 step  41
cost:  1.3514934322383634
opt took 0.00min,   41iters
Epoch: [0][1/2459]Time: 0.197 (0.190) Data: 0.142 (0.136) Loss: 2.9556 (2.9666)
error:  0.0003353056875193827 step  41
cost:  1.3417622999567298
opt took 0.00min,   41iters
Epoch: [0][2/2459]Time: 0.182 (0.187) Data: 0.133 (0.135) Loss: 2.9266 (2.9533)
error:  0.00038117495865697126 step  41
cost:  1.3811691434431654
opt took 0.00min,   41iters
Epoch: [0][3/2459]Time: 0.173 (0.184) Data: 0.122 (0.132) Loss: 2.9263 (2.9465)
error:  0.0006994712035833794 step  41
cost:  1.3581219633147834
opt took 0.00min,   41iters
Epoch: [0][4/2459]Time: 0.208 (0.189) Data: 0.158 (0.137) Loss: 2.9372 (2.9447)
error:  9.944607707990105e-05 step  51
cost:  1.291638493763578
opt took 0.00min,   51iters
Epoch: [0][5/2459]Time

error:  0.0005123365933552559 step  41
cost:  2.233628327652542
opt took 0.00min,   41iters
Epoch: [5][4/2459]Time: 0.179 (0.184) Data: 0.135 (0.140) Loss: 2.1247 (2.1413)
error:  5.97201586142182e-05 step  51
cost:  2.21247800165279
opt took 0.00min,   51iters
Epoch: [5][5/2459]Time: 0.188 (0.185) Data: 0.144 (0.141) Loss: 2.1043 (2.1351)
error:  0.00020567449665798154 step  51
cost:  2.192598815326588
opt took 0.00min,   51iters
Epoch: [5][6/2459]Time: 0.194 (0.186) Data: 0.150 (0.142) Loss: 2.1380 (2.1355)
error:  0.000494425656726305 step  51
cost:  2.178580076285469
opt took 0.00min,   51iters
Epoch: [5][7/2459]Time: 0.204 (0.188) Data: 0.159 (0.144) Loss: 2.0922 (2.1301)
10-NN,s=0.1: TOP1:  39.666666666666664
best accuracy: 40.21

Epoch: 6
ResNet1D
error:  0.0001878654220642817 step  41
cost:  2.3404520958499417
opt took 0.00min,   41iters
Epoch: [6][0/2459]Time: 0.158 (0.158) Data: 0.113 (0.113) Loss: 1.9950 (1.9950)
error:  0.0002657567095702973 step  41
cost:  2.33384030355142

error:  0.0009311308801263074 step  231
cost:  0.7627133489455441
opt took 0.00min,  231iters
Epoch: [11][1/2459]Time: 0.223 (0.204) Data: 0.176 (0.158) Loss: 1.2681 (1.2696)
error:  0.0006483602289745205 step  241
cost:  0.7676014003269278
opt took 0.00min,  241iters
Epoch: [11][2/2459]Time: 0.205 (0.205) Data: 0.162 (0.160) Loss: 1.2848 (1.2747)
error:  0.0007546185892404189 step  251
cost:  0.7452092767356919
opt took 0.00min,  251iters
Epoch: [11][3/2459]Time: 0.226 (0.210) Data: 0.183 (0.165) Loss: 1.2383 (1.2656)
error:  0.0006277058043913097 step  231
cost:  0.733904594438344
opt took 0.00min,  231iters
Epoch: [11][4/2459]Time: 0.223 (0.213) Data: 0.180 (0.168) Loss: 1.2377 (1.2600)
error:  0.000846186787812675 step  201
cost:  0.7200015216146677
opt took 0.00min,  201iters
Epoch: [11][5/2459]Time: 0.221 (0.214) Data: 0.178 (0.170) Loss: 1.2291 (1.2549)
error:  0.0006605912591582941 step  231
cost:  0.7301379046815237
opt took 0.00min,  231iters
Epoch: [11][6/2459]Time: 0.278 (0

error:  0.0007040960317433731 step  281
cost:  0.8577186728646817
opt took 0.00min,  281iters
Epoch: [16][6/2459]Time: 0.237 (0.238) Data: 0.193 (0.194) Loss: 0.7766 (0.7798)
error:  0.0009844540596781748 step  281
cost:  0.857054348531436
opt took 0.00min,  281iters
Epoch: [16][7/2459]Time: 0.248 (0.239) Data: 0.203 (0.195) Loss: 0.7752 (0.7792)
10-NN,s=0.1: TOP1:  38.166666666666664
best accuracy: 40.21

Epoch: 17
ResNet1D
error:  0.0008200405316300952 step  341
cost:  0.8976795284115304
opt took 0.00min,  341iters
Epoch: [17][0/2459]Time: 0.267 (0.267) Data: 0.227 (0.227) Loss: 0.7024 (0.7024)
error:  0.000787579960255802 step  311
cost:  0.8878365890278975
opt took 0.00min,  311iters
Epoch: [17][1/2459]Time: 0.290 (0.278) Data: 0.243 (0.235) Loss: 0.7142 (0.7083)
error:  0.0008342397954753666 step  351
cost:  0.879803171008417
opt took 0.00min,  351iters
Epoch: [17][2/2459]Time: 0.250 (0.269) Data: 0.203 (0.224) Loss: 0.7115 (0.7093)
error:  0.0009532188837984013 step  331
cost:  0

error:  0.0009432952996876187 step  1291
cost:  0.3297274015495966
opt took 0.00min, 1291iters
Epoch: [22][2/2459]Time: 0.315 (0.285) Data: 0.270 (0.240) Loss: 0.4946 (0.4691)
error:  0.0009143243586814398 step  791
cost:  0.3287684813839734
opt took 0.00min,  791iters
Epoch: [22][3/2459]Time: 0.292 (0.287) Data: 0.233 (0.238) Loss: 0.4744 (0.4704)
error:  0.0009897131431763073 step  891
cost:  0.3235507613076406
opt took 0.00min,  891iters
Epoch: [22][4/2459]Time: 0.489 (0.327) Data: 0.402 (0.271) Loss: 0.4372 (0.4638)
error:  0.0009444829850302483 step  501
cost:  0.34233145844306945
opt took 0.00min,  501iters
Epoch: [22][5/2459]Time: 0.220 (0.310) Data: 0.174 (0.255) Loss: 0.4793 (0.4664)
error:  0.0009965403441386522 step  961
cost:  0.3373932796390956
opt took 0.00min,  961iters
Epoch: [22][6/2459]Time: 0.279 (0.305) Data: 0.232 (0.252) Loss: 0.4828 (0.4687)
error:  0.0009780145439853838 step  1841
cost:  0.3239928526334219
opt took 0.00min, 1841iters
Epoch: [22][7/2459]Time: 0.4

Epoch: [27][6/2459]Time: 0.417 (0.533) Data: 0.372 (0.484) Loss: 0.3687 (0.3587)
error:  0.0009804724320519043 step  1121
cost:  0.4078992303169037
opt took 0.00min, 1121iters
Epoch: [27][7/2459]Time: 0.377 (0.514) Data: 0.332 (0.465) Loss: 0.3537 (0.3581)
10-NN,s=0.1: TOP1:  39.458333333333336
best accuracy: 40.21

Epoch: 28
ResNet1D
error:  0.000986561409401232 step  1661
cost:  0.4119992899007616
opt took 0.01min, 1661iters
Epoch: [28][0/2459]Time: 0.459 (0.459) Data: 0.413 (0.413) Loss: 0.3417 (0.3417)
error:  0.0009935410564766434 step  2011
cost:  0.3917406983229844
opt took 0.01min, 2011iters
Epoch: [28][1/2459]Time: 0.556 (0.508) Data: 0.511 (0.462) Loss: 0.3308 (0.3362)
error:  0.0009980791417155688 step  7821
cost:  0.39767236438981035
opt took 0.03min, 7821iters
Epoch: [28][2/2459]Time: 1.692 (0.902) Data: 1.646 (0.857) Loss: 0.3260 (0.3328)
error:  0.0009998004015262518 step  7811
cost:  0.40033042358937554
opt took 0.02min, 7811iters
Epoch: [28][3/2459]Time: 1.330 (1.009) 

In [84]:
!ls './resnet1d_exp'

best_ckpt.t7  ep0.t7


In [97]:
checkpoint = torch.load('%s'%exp+'/best_ckpt.t7' )
model = resnet18(num_classes=numc, in_channel=dims_num)
model.load_state_dict(checkpoint['net'])
for param in model.features.parameters():
    param.requires_grad = False
model.top_layer = nn.Linear(magic_dim, 256),
                  nn.Linear(100,num_classes)
model.headcount = 1
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=5e-4)
model = model.to(device)
criterion = nn.CrossEntropyLoss()

start = time.time()
for epoch in range(start_epoch, start_epoch + epochs):
    print('\nEpoch: %d' % epoch)
    print(name)
    adjust_learning_rate(optimizer, epoch)
    train_loss = AverageMeter()
    data_time = AverageMeter()
    batch_time = AverageMeter()

    model.top_layer.train()

    end = time.time()
    best_acc = 0
    for batch_idx, (inputs, targets, indexes) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=True)):
        inputs = inputs.float().to(device)
        niter = epoch * N + batch_idx
        if len(optimize_times) > 0 and niter * batch_size >= optimize_times[-1]:
            with torch.no_grad():
                _ = optimize_times.pop()
        data_time.update(time.time() - end)
        inputs, targets = inputs.to(device), targets.to(device).long()#, indexes.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss.update(loss.item(), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if True:
#         if batch_idx % 10 == 0:
            print('Epoch: [{}][{}/{}]'
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'Data: {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})'.format(
                epoch, batch_idx, N, batch_time=batch_time, data_time=data_time, train_loss=train_loss))
    acc = []
    for batch_idx, (inputs, targets, _) in enumerate(iterate_minibatches(X_test, y_test, batch_size, shuffle=False)):
        with torch.no_grad():
            targets = targets
            inputs = inputs.to(device)
            pred = model(inputs).argmax(dim=1)
            acc.append((pred == targets.to(device)).float().mean())
    acc = torch.Tensor(acc)
    if torch.mean(acc)>best_acc:
        best_acc = torch.mean(acc)
        state = {
            'net': model.state_dict(),
            'acc': best_acc,
            'epoch': epoch,
            'opt': optimizer.state_dict(),
        }
        if not os.path.isdir(exp):
            os.mkdir(exp)
        torch.save(state, '%s/my_best_ckpt.t7' % (exp))
print("best accuracy:", best_acc)


Epoch: 0
ResNet1D
Epoch: [0][0/2459]Time: 0.009 (0.009) Data: 0.001 (0.001) Loss: 2.7465 (2.7465)
Epoch: [0][1/2459]Time: 0.009 (0.009) Data: 0.001 (0.001) Loss: 6.9886 (4.8675)
Epoch: [0][2/2459]Time: 0.009 (0.009) Data: 0.001 (0.001) Loss: 9.7415 (6.4922)
Epoch: [0][3/2459]Time: 0.009 (0.009) Data: 0.002 (0.001) Loss: 10.1891 (7.4164)
Epoch: [0][4/2459]Time: 0.009 (0.009) Data: 0.001 (0.001) Loss: 12.8986 (8.5129)
Epoch: [0][5/2459]Time: 0.009 (0.009) Data: 0.002 (0.001) Loss: 14.5807 (9.5242)
Epoch: [0][6/2459]Time: 0.009 (0.009) Data: 0.002 (0.002) Loss: 19.0564 (10.8859)
Epoch: [0][7/2459]Time: 0.009 (0.009) Data: 0.002 (0.002) Loss: 25.1008 (12.6628)

Epoch: 1
ResNet1D
Epoch: [1][0/2459]Time: 0.009 (0.009) Data: 0.002 (0.002) Loss: 30.0927 (30.0927)
Epoch: [1][1/2459]Time: 0.009 (0.009) Data: 0.001 (0.001) Loss: 34.5391 (32.3159)
Epoch: [1][2/2459]Time: 0.009 (0.009) Data: 0.001 (0.001) Loss: 43.6711 (36.1010)
Epoch: [1][3/2459]Time: 0.009 (0.009) Data: 0.001 (0.001) Loss: 38.97


Epoch: 13
ResNet1D
Epoch: [13][0/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 33.0037 (33.0037)
Epoch: [13][1/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 37.4506 (35.2272)
Epoch: [13][2/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 41.5180 (37.3241)
Epoch: [13][3/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 50.7870 (40.6898)
Epoch: [13][4/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 48.5341 (42.2587)
Epoch: [13][5/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 40.8700 (42.0272)
Epoch: [13][6/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 52.3112 (43.4964)
Epoch: [13][7/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 54.2547 (44.8412)

Epoch: 14
ResNet1D
Epoch: [14][0/2459]Time: 0.012 (0.012) Data: 0.002 (0.002) Loss: 35.1577 (35.1577)
Epoch: [14][1/2459]Time: 0.011 (0.012) Data: 0.001 (0.002) Loss: 42.9316 (39.0446)
Epoch: [14][2/2459]Time: 0.011 (0.012) Data: 0.001 (0.002) Loss: 41.6714 (39.9202)
Epoch: [14][3/2459]Time: 0.010 (0.011) Data: 0.


Epoch: 26
ResNet1D
Epoch: [26][0/2459]Time: 0.008 (0.008) Data: 0.002 (0.002) Loss: 35.2589 (35.2589)
Epoch: [26][1/2459]Time: 0.007 (0.008) Data: 0.001 (0.001) Loss: 36.3146 (35.7867)
Epoch: [26][2/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 26.9745 (32.8493)
Epoch: [26][3/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 38.1892 (34.1843)
Epoch: [26][4/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 52.3237 (37.8122)
Epoch: [26][5/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 44.5439 (38.9341)
Epoch: [26][6/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 27.9803 (37.3693)
Epoch: [26][7/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 27.9673 (36.1941)

Epoch: 27
ResNet1D
Epoch: [27][0/2459]Time: 0.008 (0.008) Data: 0.001 (0.001) Loss: 33.5173 (33.5173)
Epoch: [27][1/2459]Time: 0.007 (0.008) Data: 0.001 (0.001) Loss: 48.2758 (40.8966)
Epoch: [27][2/2459]Time: 0.007 (0.007) Data: 0.001 (0.001) Loss: 45.7723 (42.5218)
Epoch: [27][3/2459]Time: 0.007 (0.007) Data: 0.

In [91]:
y_train.min()

tensor(5, dtype=torch.int32)