## Imports

In [1]:
import sys
import os
import argparse
import time
import random
import math
import numpy as np
from scipy.special import logsumexp

import torch
import torchvision
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as tfs

from tensorboardX import SummaryWriter


from utils import kNN, AverageMeter, py_softmax

## Training parameters

In [2]:
#data
datadir = "/root/data/Multivariate_ts"

# optimization
lamb = 0.1      # SK lambda-parameter
nopts = 400    # number of SK-optimizations
epochs = 400   # numbers of epochs
momentum = 0.9 # sgd momentum
exp = './resnet1d_exp' # experiments results dir


# other
devc='0'  # cuda device
batch_size = 100
lr=0.03     #learning rate
alr=0.03    #starting learning rate

knn_dim = 20
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

In [3]:
device = torch.device('cuda:' + devc) if torch.cuda.is_available() else torch.device('cpu')
print(f"GPU device: {torch.cuda.current_device()}")

GPU device: 0


## Model parameters (AlexNet in that case)

In [4]:
hc=10       # number of heads
ncl=128       # number of clusters

numc = [ncl] * hc
# # (number of filters, kernel size, stride, pad) for AlexNet, two vesions
# CFG = {
#     'big': [(96, 11, 4, 2), 'M', (256, 5, 1, 2), 'M', (384, 3, 1, 1), (384, 3, 1, 1), (256, 3, 1, 1), 'M'],
#     'small': [(64, 11, 4, 2), 'M', (192, 5, 1, 2), 'M', (384, 3, 1, 1), (256, 3, 1, 1), (256, 3, 1, 1), 'M']
# }

## Data Preparation

In [5]:
import pandas as pd
import numpy as np
from scipy.io import arff
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing
from sktime.utils.load_data import load_from_tsfile_to_dataframe

from tqdm import tqdm
import os
from datetime import datetime

In [6]:
def preproc(X, dims_num):
    X = X.applymap(np.array)
    cols_lst = X.columns
    
    for col_name in cols_lst:
        dimension_values = np.dstack(list(X[col_name].values))[0].T
        time_steps_num = dimension_values.shape[1]
        
        for i in range(time_steps_num):
            one_step_values = dimension_values[:, i]
            X[col_name + f'_{i}'] = one_step_values
    
    X = X.drop(columns=cols_lst)
    
    step = int(X.shape[1] / dims_num)
    X_3d = []
    init = 0
    for dim_num in range(dims_num):
        X_3d.append(X.iloc[:, init:init + step])
        init += step
    X_3d = np.dstack(X_3d)
    return X_3d

def load_file(filepath, dims_num, is_arff=False):
    if is_arff:
        data = arff.loadarff(filepath) 
        data = pd.DataFrame(data[0])
        X = data.iloc[:, :-1] # [30, 65] x 4 times
        y = data.iloc[:, -1]
        return X.values, y.values
    else:
        X, y = load_from_tsfile_to_dataframe(filepath)
        X = preproc(X, dims_num)
        return X, y

def load_group(prefix, filenames, dims_num, is_arff=False):
    loaded = []
    if is_arff:
        for name in filenames: 
            X, y = load_file(prefix + "/" + name, dims_num, is_arff=is_arff) # [30, 65]
            loaded.append(X)
            # stack group so that features are the 3rd dimension 
        loaded = np.dstack(loaded) # [30, 65, 4]
    else:
        loaded, y = load_file(prefix + "/" + filenames[0], dims_num, is_arff=is_arff) # [30, 65, 4]
    return loaded, y

def load_dataset_group(folder_path, ds_path, dims_num, is_train=True, label_enc=False, is_arff=False): 
    filenames = []
    extension = ".arff" if is_arff else ".ts"
    postfix = "_TRAIN" if is_train else "_TEST"
    
    if is_arff:
        for dim_num in range(1, dims_num + 1):
            filenames.append(ds_path + str(dim_num) + postfix + extension)
    else:
        filenames.append(ds_path + postfix + extension)

    X, y = load_group(folder_path, filenames, dims_num, is_arff=is_arff) # [30, 65, 4]
    X = torch.from_numpy(np.array(X, dtype=np.float64))
    if label_enc:
        le = preprocessing.LabelEncoder()
        y = le.fit_transform(y)
        y = torch.from_numpy(np.array(y, dtype=np.int32))
    else:
        y = torch.from_numpy(np.array(y, dtype=np.int32)) - 1
    X = X.transpose(1, 2)
    return X, y

def load_dataset(folder_path, ds_path, dims_num, label_enc=False, is_arff=False): 
    X_train, y_train = load_dataset_group(folder_path, ds_path, dims_num, 
                                          is_train=True, label_enc=label_enc, is_arff=is_arff) 
    X_test, y_test = load_dataset_group(folder_path, ds_path, dims_num, 
                                        is_train=False, label_enc=label_enc, is_arff=is_arff) 
    return X_train, y_train, X_test, y_test

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.random.permutation(len(inputs))
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt], excerpt

In [7]:
#ds_path = "ERing/ERingDimension"
#ds_path = "SpokenArabicDigits/SpokenArabicDigitsDimension"
# dims_num = 13
# num_classes = 10
# magic_dim = 4500
label_enc = True
ds_path = "LSST/LSST"
dims_num = 6
num_classes = 14
magic_dim = 2304

# ds_path = "HandMovementDirection/HandMovementDirection"
# label_enc = True
# dims_num = 10
# num_classes = 4
# magic_dim = 25600
# ds_path = "PenDigits/PenDigits"
# label_enc = True
# dims_num = 2
# num_classes = 4
# magic_dim = 512

X_train, y_train, X_test, y_test = load_dataset(datadir, ds_path, dims_num, label_enc=False, is_arff=False)
print("X_train.shape:", X_train.shape, "\ny_train.shape:", y_train.shape)
print("X_test.shape:", X_test.shape, "\ny_test.shape:", y_test.shape)

X_train.shape: torch.Size([2459, 6, 36]) 
y_train.shape: torch.Size([2459])
X_test.shape: torch.Size([2466, 6, 36]) 
y_test.shape: torch.Size([2466])


In [8]:
N = X_train.shape[0]
N

2459

In [9]:
X_train.shape

torch.Size([2459, 6, 36])

## Model, ResNet

In [10]:
import torch.nn as nn
import math

__all__ = ['resnetv1','resnetv1_18']

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv1d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

class Normalize(nn.Module):
    def __init__(self, power=2):
        super(Normalize, self).__init__()
        self.power = power

    def forward(self, x):
        norm = x.pow(self.power).sum(1, keepdim=True).pow(1. / self.power)
        out = x.div(norm)
        return out

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm1d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm1d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv1d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm1d(planes)
        self.conv2 = nn.Conv1d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(planes)
        self.conv3 = nn.Conv1d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm1d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, in_channel=3, width=1, num_classes=[1000]):
        self.inplanes = 16
        super(ResNet, self).__init__()
        self.headcount = len(num_classes)
        self.base = int(16 * width)
        self.features = nn.Sequential(*[                                                     # [100, 8, 18]
                            nn.Conv1d(in_channel, 16, kernel_size=3, padding=1, bias=False), # [100, 16, 36]
                            nn.BatchNorm1d(16),
                            nn.ReLU(inplace=True),
                            self._make_layer(block, self.base, layers[0]),                   # [100, 16, 36]
                            self._make_layer(block, self.base * 2, layers[1]),               # [100, 32, 36]
                            self._make_layer(block, self.base * 4, layers[2]),               # [100, 64, 36]
                            self._make_layer(block, self.base * 8, layers[3]),               # [100, 128, 36]
                            nn.AvgPool1d(2),                                                 # [100, 128, 18]
        ])
    
        if len(num_classes) == 1:
            self.top_layer = nn.Sequential(nn.Linear(magic_dim, num_classes[0]))
        else:
            for a, i in enumerate(num_classes):
                setattr(self, "top_layer%d" % a, nn.Linear(magic_dim, i))
            self.top_layer = None
        for m in self.features.modules():
            if isinstance(m, nn.Conv1d):
                n = m.kernel_size[0] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv1d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.features(x.float())
        out = out.view(out.size(0), -1)
        if self.headcount == 1:
            if self.top_layer:
                out = self.top_layer(out)
            return out
        else:
            outp = []
            for i in range(self.headcount):
                outp.append(getattr(self, "top_layer%d" % i)(out))
            return outp

def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    return model

def resnetv1_18(num_classes=[1000]):
    """Encoder for instance discrimination and MoCo"""
    return resnet18(num_classes=num_classes)

## Sinkhorn-Knopp optimization

In [11]:
def optimize_L_sk(PS):
    N, K = PS.shape
    tt = time.time()
    PS = PS.T  # now it is K x N
    r = np.ones((K, 1)) / K
    c = np.ones((N, 1)) / N
    PS **= lamb  # K x N
    inv_K = 1. / K
    inv_N = 1. / N
    err = 1e3
    _counter = 0
    while err > 1e-2:
        r = inv_K / (PS @ c)  # (KxN)@(N,1) = K x 1
        c_new = inv_N / (r.T @ PS).T  # ((1,K)@(KxN)).t() = N x 1
        if _counter % 10 == 0:
            err = np.nansum(np.abs(c / c_new - 1))
        c = c_new
        _counter += 1
        
    print("error: ", err, 'step ', _counter, flush=True)  # " nonneg: ", sum(I), flush=True)
    # inplace calculations.
    PS *= np.squeeze(c)
    PS = PS.T
    PS *= np.squeeze(r)
    PS = PS.T
    argmaxes = np.nanargmax(PS, 0)  # size N
    newL = torch.LongTensor(argmaxes)
    selflabels = newL.to(device)
    PS = PS.T
    PS /= np.squeeze(r)
    PS = PS.T
    PS /= np.squeeze(c)
    sol = PS[argmaxes, np.arange(N)]
    np.log(sol, sol)
    cost = -(1. / lamb) * np.nansum(sol) / N
    print('cost: ', cost, flush=True)
    print('opt took {0:.2f}min, {1:4d}iters'.format(((time.time() - tt) / 60.), _counter), flush=True)
    return cost, selflabels

def opt_sk(model, selflabels_in, epoch):
    if hc == 1:
        PS = np.zeros((N, ncl))
    else:
        PS_pre = np.zeros((N, magic_dim)) # knn_dim
    
    for batch_idx, (data, _, _selected) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=True)):
        data = data.to(device)#cuda()
        if hc == 1:
            p = nn.functional.softmax(model(data), 1)
            PS[_selected, :] = p.detach().cpu().numpy()
        else:
            p = model(data.float())
            PS_pre[_selected, :] = p.detach().cpu().numpy()
    if hc == 1:
        cost, selflabels = optimize_L_sk(PS)
        _costs = [cost]
    else:
        _nmis = np.zeros(hc)
        _costs = np.zeros(hc)
        nh = epoch % hc  # np.random.randint(args.hc)
        print("computing head %s " % nh, end="\r", flush=True)
        tl = getattr(model, "top_layer%d" % nh)
        # do the forward pass:
        PS = (PS_pre @ tl.weight.cpu().numpy().T
                   + tl.bias.cpu().numpy())
        PS = py_softmax(PS, 1)
        c, selflabels_ = optimize_L_sk(PS)
        _costs[nh] = c
        selflabels_in[nh] = selflabels_
        selflabels = selflabels_in
    return selflabels

## Training utils

In [12]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = alr
    if epochs == 200:
        if epoch >= 80:
            lr = alr * (0.1 ** ((epoch - 80) // 40))  # i.e. 120, 160
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 400:
        if epoch >= 160:
            lr = alr * (0.1 ** ((epoch - 160) // 80))  # i.e. 240,320
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 800:
        if epoch >= 320:
            lr = alr * (0.1 ** ((epoch - 320) // 160))  # i.e. 480, 640
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    elif epochs == 1600:
        if epoch >= 640:
            lr = alr * (0.1 ** ((epoch - 640) // 320))
            print(lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

In [13]:
def feature_return_switch(model, bool=True):
    """
    switch between network output or conv5features
        if True: changes switch s.t. forward pass returns post-conv5 features
        if False: changes switch s.t. forward will give full network output
    """
    if bool:
        model.headcount = 1
    else:
        model.headcount = hc
    model.return_feature = bool

In [14]:
def train(epoch, selflabels):
    print('\nEpoch: %d' % epoch)
    print(name)
    adjust_learning_rate(optimizer, epoch)
    train_loss = AverageMeter()
    data_time = AverageMeter()
    batch_time = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    
    for batch_idx, (inputs, targets, indexes) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=True)):
        inputs = inputs.float().to(device)
        niter = epoch * N//batch_size + batch_idx
        if len(optimize_times) > 0 and niter * batch_size >= optimize_times[-1]:
            with torch.no_grad():
                _ = optimize_times.pop()
                if hc >1:
                    feature_return_switch(model, True)
                selflabels = opt_sk(model, selflabels, epoch)
                if hc >1:
                    feature_return_switch(model, False)
        data_time.update(time.time() - end)
        inputs, targets = inputs.to(device), targets.to(device)#, indexes.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        if hc == 1:
            loss = criterion(outputs, selflabels[indexes])
        else:
            loss = torch.mean(torch.stack([criterion(outputs[h],
                                                     selflabels[h, indexes]) for h in range(hc)]))

        loss.backward()
        optimizer.step()

        train_loss.update(loss.item(), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if True:
#         if batch_idx % 10 == 0:
            print('Epoch: [{}][{}/{}]'
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'Data: {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})'.format(
                epoch, batch_idx, N//batch_size, batch_time=batch_time, data_time=data_time, train_loss=train_loss))
#             writer.add_scalar("loss", loss.item(), batch_idx*512 +epoch*len(trainloader.dataset))
    return selflabels

## Model initialization

In [15]:
model = resnet18(num_classes=numc, in_channel=dims_num)

In [16]:
optimize_times = ((epochs + 1.0001)*N*(np.linspace(0, 1, nopts))[::-1]).tolist()
optimize_times = [(epochs +10)*N] + optimize_times
print('We will optimize L at epochs:', [np.round(1.0*t/N, 2) for t in optimize_times], flush=True)

We will optimize L at epochs: [410.0, 401.0, 400.0, 398.99, 397.99, 396.98, 395.98, 394.97, 393.97, 392.96, 391.95, 390.95, 389.94, 388.94, 387.93, 386.93, 385.92, 384.92, 383.91, 382.91, 381.9, 380.9, 379.89, 378.89, 377.88, 376.88, 375.87, 374.87, 373.86, 372.86, 371.85, 370.85, 369.84, 368.84, 367.83, 366.83, 365.82, 364.82, 363.81, 362.81, 361.8, 360.8, 359.79, 358.79, 357.78, 356.78, 355.77, 354.77, 353.76, 352.76, 351.75, 350.75, 349.74, 348.74, 347.73, 346.73, 345.72, 344.72, 343.71, 342.71, 341.7, 340.7, 339.69, 338.69, 337.68, 336.68, 335.67, 334.67, 333.66, 332.66, 331.65, 330.65, 329.64, 328.64, 327.63, 326.63, 325.62, 324.62, 323.61, 322.61, 321.6, 320.6, 319.59, 318.59, 317.58, 316.58, 315.57, 314.57, 313.56, 312.56, 311.55, 310.55, 309.54, 308.54, 307.53, 306.53, 305.52, 304.52, 303.51, 302.51, 301.5, 300.5, 299.49, 298.49, 297.48, 296.48, 295.47, 294.47, 293.46, 292.46, 291.45, 290.45, 289.44, 288.44, 287.43, 286.43, 285.42, 284.42, 283.41, 282.41, 281.4, 280.4, 279.39, 

In [17]:
optimize_times

[1008190,
 986059.2459,
 983587.9194691728,
 981116.5930383457,
 978645.2666075188,
 976173.9401766917,
 973702.6137458646,
 971231.2873150375,
 968759.9608842104,
 966288.6344533834,
 963817.3080225564,
 961345.9815917292,
 958874.6551609022,
 956403.3287300752,
 953932.002299248,
 951460.675868421,
 948989.349437594,
 946518.0230067669,
 944046.6965759398,
 941575.3701451126,
 939104.0437142856,
 936632.7172834587,
 934161.3908526315,
 931690.0644218044,
 929218.7379909774,
 926747.4115601503,
 924276.0851293232,
 921804.7586984962,
 919333.4322676691,
 916862.1058368421,
 914390.779406015,
 911919.4529751878,
 909448.1265443609,
 906976.8001135339,
 904505.4736827067,
 902034.1472518796,
 899562.8208210525,
 897091.4943902255,
 894620.1679593985,
 892148.8415285713,
 889677.5150977443,
 887206.1886669173,
 884734.8622360901,
 882263.5358052631,
 879792.2093744361,
 877320.882943609,
 874849.5565127819,
 872378.2300819547,
 869906.9036511277,
 867435.5772203008,
 864964.2507894736,
 

In [18]:
# init selflabels randomly
if hc == 1:
    selflabels = np.zeros(N, dtype=np.int32)
    for qq in range(N):
        selflabels[qq] = qq % ncl
    selflabels = np.random.permutation(selflabels)
    selflabels = torch.LongTensor(selflabels).to(device)
else:
    selflabels = np.zeros((hc, N), dtype=np.int32)
    for nh in range(hc):
        for _i in range(N):
            selflabels[nh, _i] = _i % numc[nh]
        selflabels[nh] = np.random.permutation(selflabels[nh])
    selflabels = torch.LongTensor(selflabels).to(device)

In [19]:
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=5e-4)
model = model.to(device)
criterion = nn.CrossEntropyLoss()

In [20]:
name = "ResNet1D"
writer = SummaryWriter(f'./runs/ERing/{name}')

## Training! 
Takes a couple of minutes per epoch

In [21]:
def my_kNN(net, K, sigma=0.1, dim=128, use_pca=False):
    net.eval()
    # this part is ugly but made to be backwards-compatible. there was a change in cifar dataset's structure.
    trainLabels = y_train
    LEN = N
    C = trainLabels.max() + 1

    trainFeatures = torch.zeros((magic_dim, LEN))  # , device='cuda:0') # dim
    normalize = Normalize()
    for batch_idx, (inputs, targets, _) in enumerate(iterate_minibatches(X_train, y_train, batch_size, shuffle=False)):
        batchSize = batch_size
        inputs = inputs.cuda()
        features = net(inputs.float())
        if not use_pca:
            features = normalize(features)
        tmp = trainFeatures[:, batch_idx * batchSize:batch_idx * batchSize + batchSize]
        trainFeatures[:, batch_idx * batchSize:batch_idx * batchSize + batchSize] = features.data.t().cpu()
        
    if use_pca:
        comps = 128
        print('doing PCA with %s components'%comps, end=' ')
        from sklearn.decomposition import PCA
        pca = PCA(n_components=comps, whiten=False)
        trainFeatures = pca.fit_transform(trainFeatures.numpy().T)
        trainFeatures = torch.Tensor(trainFeatures)
        trainFeatures = normalize(trainFeatures).t()
        print('..done')
    def eval_k_s(K_,sigma_):
        total = 0
        top1 = 0.
        top5 = 0.

        with torch.no_grad():
            retrieval_one_hot = torch.zeros(K_, C)# .cuda()
            for batch_idx, (inputs, targets, _) in enumerate(iterate_minibatches(X_test, y_test, batch_size, shuffle=False)):
                targets = targets # .cuda(async=True) # or without async for py3.7
                inputs = inputs.cuda()
                batchSize = batch_size
                features = net(inputs)
                if use_pca:
                    features = pca.transform(features.cpu().numpy())
                    features = torch.Tensor(features).cuda()
                features = normalize(features).cpu()

                dist = torch.mm(features, trainFeatures)

                yd, yi = dist.topk(K_, dim=1, largest=True, sorted=True)
                candidates = trainLabels.view(1, -1).expand(batchSize, -1)
                retrieval = torch.gather(candidates, 1, yi).long()

                retrieval_one_hot.resize_(batchSize * K_, C).zero_()
                retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1.)
                
                yd_transform = yd.clone().div_(sigma_).exp_()
                probs = torch.sum(torch.mul(retrieval_one_hot.view(batchSize, -1, C),
                                            yd_transform.view(batchSize, -1, 1)),
                                  1)
                _, predictions = probs.sort(1, True)

                # Find which predictions match the target
                correct = predictions.eq(targets.data.view(-1, 1))

                top1 = top1 + correct.narrow(1, 0, 1).sum().item()
                top5 = top5 + correct.narrow(1, 0, 5).sum().item()

                total += targets.size(0)

        print(f"{K_}-NN,s={sigma_}: TOP1: ", top1 * 100. / total)
        return top1 / total

    if isinstance(K, list):
        res = []
        for K_ in K:
            for sigma_ in sigma:
                res.append(eval_k_s(K_, sigma_))
        return res
    else:
        res = eval_k_s(K, sigma)
        return res

In [22]:
import time

start = time.time()
for epoch in range(start_epoch, start_epoch + epochs):
    print('Size pf grid: ', len(optimize_times))
    selflabels = train(epoch, selflabels)
    feature_return_switch(model, True)
    
    acc = my_kNN(model, K=10, sigma=0.1, dim=knn_dim)
    feature_return_switch(model, False)
#     writer.add_scalar("accuracy kNN", acc, epoch)
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'acc': acc,
            'epoch': epoch,
            'opt': optimizer.state_dict(),
            'L': selflabels,
        }
        if not os.path.isdir(exp):
            os.mkdir(exp)
        torch.save(state, '%s/best_ckpt.t7' % (exp))
        best_acc = acc
    if epoch % 100 == 0:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'opt': optimizer.state_dict(),
            'acc': acc,
            'epoch': epoch,
            'L': selflabels,
        }
        if not os.path.isdir(exp):
            os.mkdir(exp)
        torch.save(state, '%s/ep%s.t7' % (exp, epoch))
    if epoch % 50 == 0:
        print("hello")
        feature_return_switch(model, True)
        acc = my_kNN(model, K=[50, 10], sigma=[0.1, 0.5], dim=knn_dim, use_pca=True)
        i = 0
#         for num_nn in [50, 10]:
#             for sig in [0.1, 0.5]:
#                 writer.add_scalar('knn%s-%s' % (num_nn, sig), acc[i], epoch)
#                 i += 1
        feature_return_switch(model, False)
    print('best accuracy: {:.2f}'.format(best_acc * 100))
end = time.time()

checkpoint = torch.load('%s'%exp+'/best_ckpt.t7' )
model.load_state_dict(checkpoint['net'])
feature_return_switch(model, True)
acc = my_kNN(model, K=10, sigma=0.1, dim=knn_dim, use_pca=True)

Size pf grid:  401

Epoch: 0
ResNet1D
error:  2.382538610845586e-13 step  11
cost:  4.579266110928804
opt took 0.00min,   11iters
Epoch: [0][0/24]Time: 0.301 (0.301) Data: 0.260 (0.260) Loss: 5.0348 (5.0348)
Epoch: [0][1/24]Time: 0.029 (0.165) Data: 0.001 (0.131) Loss: 4.9830 (5.0089)
Epoch: [0][2/24]Time: 0.028 (0.119) Data: 0.001 (0.088) Loss: 5.0335 (5.0171)
Epoch: [0][3/24]Time: 0.028 (0.097) Data: 0.001 (0.066) Loss: 4.9675 (5.0047)
Epoch: [0][4/24]Time: 0.025 (0.082) Data: 0.001 (0.053) Loss: 4.9766 (4.9991)
Epoch: [0][5/24]Time: 0.025 (0.073) Data: 0.001 (0.044) Loss: 4.9950 (4.9984)
Epoch: [0][6/24]Time: 0.024 (0.066) Data: 0.001 (0.038) Loss: 4.9466 (4.9910)
Epoch: [0][7/24]Time: 0.024 (0.061) Data: 0.001 (0.033) Loss: 4.9021 (4.9799)
Epoch: [0][8/24]Time: 0.024 (0.057) Data: 0.001 (0.030) Loss: 4.9059 (4.9717)
Epoch: [0][9/24]Time: 0.024 (0.053) Data: 0.001 (0.027) Loss: 4.8501 (4.9595)
Epoch: [0][10/24]Time: 0.023 (0.051) Data: 0.001 (0.025) Loss: 4.8659 (4.9510)
Epoch: [0][

10-NN,s=0.1: TOP1:  44.791666666666664
best accuracy: 46.42
Size pf grid:  397

Epoch: 4
ResNet1D
Epoch: [4][0/24]Time: 0.024 (0.024) Data: 0.001 (0.001) Loss: 3.8785 (3.8785)
error:  1.694200335577989e-13 step  11
cost:  4.21391828745979
opt took 0.00min,   11iters
Epoch: [4][1/24]Time: 0.254 (0.139) Data: 0.226 (0.114) Loss: 3.9255 (3.9020)
Epoch: [4][2/24]Time: 0.034 (0.104) Data: 0.001 (0.076) Loss: 3.7410 (3.8484)
Epoch: [4][3/24]Time: 0.049 (0.090) Data: 0.009 (0.059) Loss: 3.8194 (3.8411)
Epoch: [4][4/24]Time: 0.027 (0.078) Data: 0.005 (0.048) Loss: 3.7375 (3.8204)
Epoch: [4][5/24]Time: 0.024 (0.069) Data: 0.001 (0.040) Loss: 3.7057 (3.8013)
Epoch: [4][6/24]Time: 0.031 (0.063) Data: 0.001 (0.035) Loss: 3.6077 (3.7736)
Epoch: [4][7/24]Time: 0.035 (0.060) Data: 0.001 (0.031) Loss: 3.6430 (3.7573)
Epoch: [4][8/24]Time: 0.032 (0.057) Data: 0.001 (0.027) Loss: 3.6858 (3.7494)
Epoch: [4][9/24]Time: 0.030 (0.054) Data: 0.001 (0.025) Loss: 3.5045 (3.7249)
Epoch: [4][10/24]Time: 0.028 (0

10-NN,s=0.1: TOP1:  47.875
Saving..
best accuracy: 47.88
Size pf grid:  393

Epoch: 8
ResNet1D
Epoch: [8][0/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 2.2580 (2.2580)
Epoch: [8][1/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 2.3121 (2.2850)
error:  8.459899447643693e-14 step  11
cost:  3.3880880794283157
opt took 0.00min,   11iters
Epoch: [8][2/24]Time: 0.235 (0.093) Data: 0.210 (0.070) Loss: 2.3247 (2.2983)
Epoch: [8][3/24]Time: 0.028 (0.076) Data: 0.001 (0.053) Loss: 2.0861 (2.2452)
Epoch: [8][4/24]Time: 0.026 (0.066) Data: 0.001 (0.043) Loss: 2.2957 (2.2553)
Epoch: [8][5/24]Time: 0.026 (0.060) Data: 0.001 (0.036) Loss: 2.3754 (2.2753)
Epoch: [8][6/24]Time: 0.024 (0.055) Data: 0.001 (0.031) Loss: 2.2043 (2.2652)
Epoch: [8][7/24]Time: 0.021 (0.050) Data: 0.001 (0.027) Loss: 2.2647 (2.2651)
Epoch: [8][8/24]Time: 0.021 (0.047) Data: 0.001 (0.024) Loss: 2.4034 (2.2805)
Epoch: [8][9/24]Time: 0.021 (0.045) Data: 0.001 (0.022) Loss: 2.1089 (2.2633)
Epoch: [8][10/24]Time: 0.021 (0.

10-NN,s=0.1: TOP1:  48.791666666666664
best accuracy: 49.42
Size pf grid:  389

Epoch: 12
ResNet1D
Epoch: [12][0/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 1.2621 (1.2621)
Epoch: [12][1/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 1.2395 (1.2508)
error:  1.3788969965844444e-13 step  11
cost:  1.9401732355744015
opt took 0.00min,   11iters
Epoch: [12][2/24]Time: 0.349 (0.131) Data: 0.313 (0.105) Loss: 1.5926 (1.3647)
Epoch: [12][3/24]Time: 0.033 (0.106) Data: 0.001 (0.079) Loss: 1.1606 (1.3137)
Epoch: [12][4/24]Time: 0.031 (0.091) Data: 0.001 (0.063) Loss: 1.2141 (1.2938)
Epoch: [12][5/24]Time: 0.027 (0.081) Data: 0.001 (0.053) Loss: 1.4505 (1.3199)
Epoch: [12][6/24]Time: 0.024 (0.072) Data: 0.001 (0.046) Loss: 1.1665 (1.2980)
Epoch: [12][7/24]Time: 0.023 (0.066) Data: 0.001 (0.040) Loss: 1.3194 (1.3007)
Epoch: [12][8/24]Time: 0.023 (0.062) Data: 0.001 (0.036) Loss: 1.2158 (1.2912)
Epoch: [12][9/24]Time: 0.024 (0.058) Data: 0.001 (0.032) Loss: 1.2296 (1.2851)
Epoch: [12][10/24

Epoch: [15][22/24]Time: 0.022 (0.033) Data: 0.001 (0.011) Loss: 1.0695 (0.9769)
Epoch: [15][23/24]Time: 0.022 (0.032) Data: 0.001 (0.010) Loss: 0.7966 (0.9694)
10-NN,s=0.1: TOP1:  52.166666666666664
Saving..
best accuracy: 52.17
Size pf grid:  385

Epoch: 16
ResNet1D
Epoch: [16][0/24]Time: 0.024 (0.024) Data: 0.001 (0.001) Loss: 1.0427 (1.0427)
Epoch: [16][1/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.8022 (0.9224)
Epoch: [16][2/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.6536 (0.8328)
error:  4.9182879990894435e-14 step  11
cost:  1.2445062237870486
opt took 0.00min,   11iters
Epoch: [16][3/24]Time: 0.246 (0.079) Data: 0.219 (0.055) Loss: 0.9035 (0.8505)
Epoch: [16][4/24]Time: 0.027 (0.068) Data: 0.001 (0.045) Loss: 1.0572 (0.8918)
Epoch: [16][5/24]Time: 0.027 (0.061) Data: 0.001 (0.037) Loss: 0.8023 (0.8769)
Epoch: [16][6/24]Time: 0.027 (0.057) Data: 0.001 (0.032) Loss: 0.7594 (0.8601)
Epoch: [16][7/24]Time: 0.024 (0.052) Data: 0.001 (0.028) Loss: 0.8151 (0.8545)
Epoch:

10-NN,s=0.1: TOP1:  51.666666666666664
best accuracy: 52.17
Size pf grid:  381

Epoch: 20
ResNet1D
Epoch: [20][0/24]Time: 0.024 (0.024) Data: 0.002 (0.002) Loss: 0.4724 (0.4724)
Epoch: [20][1/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.5416 (0.5070)
Epoch: [20][2/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.4768 (0.4970)
Epoch: [20][3/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.4723 (0.4908)
error:  7.582823258189819e-14 step  11
cost:  0.5151502027924167
opt took 0.00min,   11iters
Epoch: [20][4/24]Time: 0.245 (0.068) Data: 0.216 (0.044) Loss: 0.7716 (0.5470)
Epoch: [20][5/24]Time: 0.028 (0.061) Data: 0.001 (0.037) Loss: 0.5525 (0.5479)
Epoch: [20][6/24]Time: 0.026 (0.056) Data: 0.001 (0.032) Loss: 0.5255 (0.5447)
Epoch: [20][7/24]Time: 0.026 (0.053) Data: 0.001 (0.028) Loss: 0.5091 (0.5402)
Epoch: [20][8/24]Time: 0.023 (0.049) Data: 0.001 (0.025) Loss: 0.6033 (0.5472)
Epoch: [20][9/24]Time: 0.023 (0.047) Data: 0.001 (0.023) Loss: 0.5964 (0.5522)
Epoch: [20][10/24]

Epoch: [23][22/24]Time: 0.023 (0.034) Data: 0.001 (0.011) Loss: 0.4727 (0.5208)
Epoch: [23][23/24]Time: 0.023 (0.033) Data: 0.001 (0.010) Loss: 0.4809 (0.5191)
10-NN,s=0.1: TOP1:  48.416666666666664
best accuracy: 52.17
Size pf grid:  377

Epoch: 24
ResNet1D
Epoch: [24][0/24]Time: 0.023 (0.023) Data: 0.002 (0.002) Loss: 0.3765 (0.3765)
Epoch: [24][1/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.4380 (0.4073)
Epoch: [24][2/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3843 (0.3996)
Epoch: [24][3/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3516 (0.3876)
error:  1.5165646516379638e-13 step  11
cost:  0.49172888756449035
opt took 0.00min,   11iters
Epoch: [24][4/24]Time: 0.311 (0.080) Data: 0.275 (0.056) Loss: 0.4243 (0.3950)
Epoch: [24][5/24]Time: 0.042 (0.073) Data: 0.002 (0.047) Loss: 0.4536 (0.4047)
Epoch: [24][6/24]Time: 0.036 (0.068) Data: 0.001 (0.040) Loss: 0.3496 (0.3968)
Epoch: [24][7/24]Time: 0.021 (0.062) Data: 0.001 (0.035) Loss: 0.4700 (0.4060)
Epoch: [24][8/

Epoch: [27][20/24]Time: 0.023 (0.037) Data: 0.001 (0.012) Loss: 0.3559 (0.3893)
Epoch: [27][21/24]Time: 0.023 (0.036) Data: 0.001 (0.012) Loss: 0.3556 (0.3878)
Epoch: [27][22/24]Time: 0.023 (0.035) Data: 0.001 (0.011) Loss: 0.3567 (0.3864)
Epoch: [27][23/24]Time: 0.023 (0.035) Data: 0.001 (0.011) Loss: 0.3001 (0.3828)
10-NN,s=0.1: TOP1:  52.375
best accuracy: 52.62
Size pf grid:  373

Epoch: 28
ResNet1D
Epoch: [28][0/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2443 (0.2443)
Epoch: [28][1/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.3533 (0.2988)
Epoch: [28][2/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.3911 (0.3295)
Epoch: [28][3/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2909 (0.3199)
error:  3.4083846855992306e-13 step  11
cost:  0.36490328607867023
opt took 0.00min,   11iters
Epoch: [28][4/24]Time: 0.339 (0.085) Data: 0.300 (0.061) Loss: 0.3986 (0.3356)
Epoch: [28][5/24]Time: 0.044 (0.078) Data: 0.001 (0.051) Loss: 0.3429 (0.3368)
Epoch: [28][6/24]Time: 0

Epoch: [31][20/24]Time: 0.026 (0.036) Data: 0.001 (0.011) Loss: 0.2972 (0.3141)
Epoch: [31][21/24]Time: 0.028 (0.035) Data: 0.001 (0.011) Loss: 0.4412 (0.3199)
Epoch: [31][22/24]Time: 0.033 (0.035) Data: 0.002 (0.010) Loss: 0.3090 (0.3194)
Epoch: [31][23/24]Time: 0.030 (0.035) Data: 0.001 (0.010) Loss: 0.3257 (0.3197)
10-NN,s=0.1: TOP1:  53.166666666666664
best accuracy: 53.25
Size pf grid:  369

Epoch: 32
ResNet1D
Epoch: [32][0/24]Time: 0.025 (0.025) Data: 0.001 (0.001) Loss: 0.2713 (0.2713)
Epoch: [32][1/24]Time: 0.031 (0.028) Data: 0.001 (0.001) Loss: 0.2226 (0.2469)
Epoch: [32][2/24]Time: 0.029 (0.028) Data: 0.001 (0.001) Loss: 0.4376 (0.3105)
Epoch: [32][3/24]Time: 0.030 (0.029) Data: 0.001 (0.001) Loss: 0.2379 (0.2924)
Epoch: [32][4/24]Time: 0.031 (0.029) Data: 0.001 (0.001) Loss: 0.2118 (0.2763)
error:  1.5520917884259688e-13 step  11
cost:  0.33805741859694494
opt took 0.00min,   11iters
Epoch: [32][5/24]Time: 0.317 (0.077) Data: 0.287 (0.049) Loss: 0.2820 (0.2772)
Epoch: [32][

Epoch: [35][18/24]Time: 0.031 (0.047) Data: 0.001 (0.016) Loss: 0.2907 (0.2811)
Epoch: [35][19/24]Time: 0.036 (0.046) Data: 0.003 (0.015) Loss: 0.3284 (0.2835)
Epoch: [35][20/24]Time: 0.034 (0.046) Data: 0.002 (0.014) Loss: 0.3084 (0.2847)
Epoch: [35][21/24]Time: 0.034 (0.045) Data: 0.001 (0.014) Loss: 0.2512 (0.2832)
Epoch: [35][22/24]Time: 0.031 (0.045) Data: 0.001 (0.013) Loss: 0.2892 (0.2834)
Epoch: [35][23/24]Time: 0.030 (0.044) Data: 0.001 (0.013) Loss: 0.2040 (0.2801)
10-NN,s=0.1: TOP1:  51.5
best accuracy: 53.25
Size pf grid:  365

Epoch: 36
ResNet1D
Epoch: [36][0/24]Time: 0.031 (0.031) Data: 0.002 (0.002) Loss: 0.1755 (0.1755)
Epoch: [36][1/24]Time: 0.036 (0.033) Data: 0.001 (0.001) Loss: 0.1815 (0.1785)
Epoch: [36][2/24]Time: 0.040 (0.036) Data: 0.002 (0.001) Loss: 0.1792 (0.1787)
Epoch: [36][3/24]Time: 0.034 (0.035) Data: 0.001 (0.001) Loss: 0.2883 (0.2061)
Epoch: [36][4/24]Time: 0.038 (0.036) Data: 0.001 (0.001) Loss: 0.2049 (0.2059)
error:  6.350475700855895e-14 step  11
c

Epoch: [39][17/24]Time: 0.035 (0.052) Data: 0.001 (0.022) Loss: 0.1981 (0.2139)
Epoch: [39][18/24]Time: 0.034 (0.051) Data: 0.001 (0.021) Loss: 0.1828 (0.2123)
Epoch: [39][19/24]Time: 0.034 (0.050) Data: 0.001 (0.020) Loss: 0.2462 (0.2140)
Epoch: [39][20/24]Time: 0.032 (0.050) Data: 0.001 (0.019) Loss: 0.2696 (0.2166)
Epoch: [39][21/24]Time: 0.031 (0.049) Data: 0.001 (0.018) Loss: 0.2151 (0.2166)
Epoch: [39][22/24]Time: 0.033 (0.048) Data: 0.001 (0.017) Loss: 0.2483 (0.2180)
Epoch: [39][23/24]Time: 0.031 (0.047) Data: 0.001 (0.017) Loss: 0.1754 (0.2162)
10-NN,s=0.1: TOP1:  51.916666666666664
best accuracy: 53.25
Size pf grid:  361

Epoch: 40
ResNet1D
Epoch: [40][0/24]Time: 0.028 (0.028) Data: 0.001 (0.001) Loss: 0.2326 (0.2326)
Epoch: [40][1/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.1409 (0.1868)
Epoch: [40][2/24]Time: 0.022 (0.024) Data: 0.001 (0.001) Loss: 0.1552 (0.1762)
Epoch: [40][3/24]Time: 0.022 (0.024) Data: 0.001 (0.001) Loss: 0.1632 (0.1730)
Epoch: [40][4/24]Time: 0.

Epoch: [43][17/24]Time: 0.029 (0.058) Data: 0.001 (0.026) Loss: 0.1939 (0.2142)
Epoch: [43][18/24]Time: 0.029 (0.056) Data: 0.001 (0.025) Loss: 0.1716 (0.2120)
Epoch: [43][19/24]Time: 0.028 (0.055) Data: 0.002 (0.023) Loss: 0.2155 (0.2122)
Epoch: [43][20/24]Time: 0.035 (0.054) Data: 0.001 (0.022) Loss: 0.2092 (0.2120)
Epoch: [43][21/24]Time: 0.028 (0.053) Data: 0.001 (0.021) Loss: 0.3285 (0.2173)
Epoch: [43][22/24]Time: 0.028 (0.052) Data: 0.001 (0.021) Loss: 0.1929 (0.2163)
Epoch: [43][23/24]Time: 0.028 (0.051) Data: 0.001 (0.020) Loss: 0.1409 (0.2131)
10-NN,s=0.1: TOP1:  52.25
best accuracy: 53.25
Size pf grid:  357

Epoch: 44
ResNet1D
Epoch: [44][0/24]Time: 0.028 (0.028) Data: 0.002 (0.002) Loss: 0.1688 (0.1688)
Epoch: [44][1/24]Time: 0.043 (0.036) Data: 0.016 (0.009) Loss: 0.1738 (0.1713)
Epoch: [44][2/24]Time: 0.032 (0.034) Data: 0.005 (0.007) Loss: 0.1478 (0.1635)
Epoch: [44][3/24]Time: 0.029 (0.033) Data: 0.001 (0.006) Loss: 0.1709 (0.1653)
Epoch: [44][4/24]Time: 0.026 (0.032) D

Epoch: [47][15/24]Time: 0.029 (0.061) Data: 0.001 (0.031) Loss: 0.2085 (0.1808)
Epoch: [47][16/24]Time: 0.028 (0.059) Data: 0.001 (0.029) Loss: 0.2266 (0.1835)
Epoch: [47][17/24]Time: 0.028 (0.057) Data: 0.001 (0.027) Loss: 0.2094 (0.1850)
Epoch: [47][18/24]Time: 0.029 (0.055) Data: 0.001 (0.026) Loss: 0.2217 (0.1869)
Epoch: [47][19/24]Time: 0.045 (0.055) Data: 0.001 (0.025) Loss: 0.1505 (0.1851)
Epoch: [47][20/24]Time: 0.028 (0.054) Data: 0.001 (0.024) Loss: 0.1712 (0.1844)
Epoch: [47][21/24]Time: 0.029 (0.052) Data: 0.001 (0.023) Loss: 0.1797 (0.1842)
Epoch: [47][22/24]Time: 0.030 (0.052) Data: 0.001 (0.022) Loss: 0.1822 (0.1841)
Epoch: [47][23/24]Time: 0.027 (0.050) Data: 0.001 (0.021) Loss: 0.2176 (0.1855)
10-NN,s=0.1: TOP1:  52.458333333333336
best accuracy: 53.25
Size pf grid:  353

Epoch: 48
ResNet1D
Epoch: [48][0/24]Time: 0.027 (0.027) Data: 0.001 (0.001) Loss: 0.1594 (0.1594)
Epoch: [48][1/24]Time: 0.026 (0.027) Data: 0.001 (0.001) Loss: 0.1897 (0.1745)
Epoch: [48][2/24]Time: 

Epoch: [51][11/24]Time: 0.050 (0.057) Data: 0.001 (0.026) Loss: 0.1625 (0.1581)
Epoch: [51][12/24]Time: 0.029 (0.055) Data: 0.001 (0.024) Loss: 0.2704 (0.1667)
Epoch: [51][13/24]Time: 0.032 (0.053) Data: 0.001 (0.023) Loss: 0.1971 (0.1689)
Epoch: [51][14/24]Time: 0.028 (0.051) Data: 0.001 (0.021) Loss: 0.2119 (0.1718)
Epoch: [51][15/24]Time: 0.030 (0.050) Data: 0.001 (0.020) Loss: 0.1803 (0.1723)
Epoch: [51][16/24]Time: 0.031 (0.049) Data: 0.001 (0.019) Loss: 0.1614 (0.1716)
Epoch: [51][17/24]Time: 0.031 (0.048) Data: 0.001 (0.018) Loss: 0.2014 (0.1733)
Epoch: [51][18/24]Time: 0.032 (0.047) Data: 0.001 (0.017) Loss: 0.1583 (0.1725)
Epoch: [51][19/24]Time: 0.034 (0.047) Data: 0.001 (0.016) Loss: 0.1711 (0.1724)
Epoch: [51][20/24]Time: 0.042 (0.046) Data: 0.001 (0.016) Loss: 0.1900 (0.1733)
Epoch: [51][21/24]Time: 0.058 (0.047) Data: 0.002 (0.015) Loss: 0.1773 (0.1735)
Epoch: [51][22/24]Time: 0.028 (0.046) Data: 0.001 (0.014) Loss: 0.2360 (0.1762)
Epoch: [51][23/24]Time: 0.036 (0.046) Da

Epoch: [55][9/24]Time: 0.064 (0.071) Data: 0.005 (0.034) Loss: 0.2281 (0.1561)
Epoch: [55][10/24]Time: 0.033 (0.067) Data: 0.006 (0.032) Loss: 0.2109 (0.1611)
Epoch: [55][11/24]Time: 0.029 (0.064) Data: 0.001 (0.029) Loss: 0.1734 (0.1621)
Epoch: [55][12/24]Time: 0.033 (0.062) Data: 0.001 (0.027) Loss: 0.2852 (0.1716)
Epoch: [55][13/24]Time: 0.038 (0.060) Data: 0.001 (0.025) Loss: 0.1975 (0.1735)
Epoch: [55][14/24]Time: 0.037 (0.058) Data: 0.001 (0.024) Loss: 0.4962 (0.1950)
Epoch: [55][15/24]Time: 0.036 (0.057) Data: 0.001 (0.022) Loss: 0.1575 (0.1926)
Epoch: [55][16/24]Time: 0.030 (0.055) Data: 0.001 (0.021) Loss: 0.1860 (0.1922)
Epoch: [55][17/24]Time: 0.031 (0.054) Data: 0.001 (0.020) Loss: 0.2039 (0.1929)
Epoch: [55][18/24]Time: 0.029 (0.053) Data: 0.001 (0.019) Loss: 0.1952 (0.1930)
Epoch: [55][19/24]Time: 0.027 (0.051) Data: 0.001 (0.018) Loss: 0.1420 (0.1905)
Epoch: [55][20/24]Time: 0.028 (0.050) Data: 0.001 (0.017) Loss: 0.1805 (0.1900)
Epoch: [55][21/24]Time: 0.031 (0.049) Dat

error:  1.8596235662471372e-13 step  11
cost:  0.25499979499187087
opt took 0.00min,   11iters
Epoch: [59][9/24]Time: 0.297 (0.055) Data: 0.266 (0.028) Loss: 0.1277 (0.1466)
Epoch: [59][10/24]Time: 0.068 (0.056) Data: 0.001 (0.025) Loss: 0.1184 (0.1440)
Epoch: [59][11/24]Time: 0.026 (0.054) Data: 0.002 (0.023) Loss: 0.2277 (0.1510)
Epoch: [59][12/24]Time: 0.026 (0.052) Data: 0.001 (0.022) Loss: 0.1624 (0.1519)
Epoch: [59][13/24]Time: 0.023 (0.050) Data: 0.001 (0.020) Loss: 0.1611 (0.1525)
Epoch: [59][14/24]Time: 0.027 (0.048) Data: 0.001 (0.019) Loss: 0.2444 (0.1587)
Epoch: [59][15/24]Time: 0.027 (0.047) Data: 0.001 (0.018) Loss: 0.1859 (0.1604)
Epoch: [59][16/24]Time: 0.028 (0.046) Data: 0.001 (0.017) Loss: 0.1840 (0.1618)
Epoch: [59][17/24]Time: 0.026 (0.045) Data: 0.001 (0.016) Loss: 0.1752 (0.1625)
Epoch: [59][18/24]Time: 0.028 (0.044) Data: 0.001 (0.015) Loss: 0.1075 (0.1596)
Epoch: [59][19/24]Time: 0.032 (0.043) Data: 0.001 (0.014) Loss: 0.1718 (0.1602)
Epoch: [59][20/24]Time: 0.

error:  7.316369732279782e-14 step  11
cost:  0.24690581421203883
opt took 0.00min,   11iters
Epoch: [63][8/24]Time: 0.519 (0.084) Data: 0.463 (0.053) Loss: 0.1716 (0.1354)
Epoch: [63][9/24]Time: 0.060 (0.081) Data: 0.007 (0.048) Loss: 0.1563 (0.1375)
Epoch: [63][10/24]Time: 0.022 (0.076) Data: 0.001 (0.044) Loss: 0.1538 (0.1390)
Epoch: [63][11/24]Time: 0.022 (0.071) Data: 0.001 (0.040) Loss: 0.1464 (0.1396)
Epoch: [63][12/24]Time: 0.022 (0.068) Data: 0.001 (0.037) Loss: 0.1937 (0.1438)
Epoch: [63][13/24]Time: 0.028 (0.065) Data: 0.001 (0.035) Loss: 0.2479 (0.1512)
Epoch: [63][14/24]Time: 0.032 (0.063) Data: 0.001 (0.032) Loss: 0.2584 (0.1584)
Epoch: [63][15/24]Time: 0.030 (0.061) Data: 0.001 (0.030) Loss: 0.1949 (0.1606)
Epoch: [63][16/24]Time: 0.030 (0.059) Data: 0.001 (0.029) Loss: 0.1576 (0.1605)
Epoch: [63][17/24]Time: 0.031 (0.057) Data: 0.001 (0.027) Loss: 0.1135 (0.1579)
Epoch: [63][18/24]Time: 0.031 (0.056) Data: 0.001 (0.026) Loss: 0.1493 (0.1574)
Epoch: [63][19/24]Time: 0.04

Epoch: [67][6/24]Time: 0.039 (0.031) Data: 0.001 (0.001) Loss: 0.1035 (0.1232)
Epoch: [67][7/24]Time: 0.036 (0.031) Data: 0.001 (0.001) Loss: 0.1543 (0.1271)
Epoch: [67][8/24]Time: 0.041 (0.032) Data: 0.003 (0.001) Loss: 0.1489 (0.1295)
error:  6.628031457012185e-14 step  11
cost:  0.21997018173362867
opt took 0.00min,   11iters
Epoch: [67][9/24]Time: 0.306 (0.060) Data: 0.279 (0.029) Loss: 0.1450 (0.1311)
Epoch: [67][10/24]Time: 0.028 (0.057) Data: 0.001 (0.027) Loss: 0.1223 (0.1303)
Epoch: [67][11/24]Time: 0.026 (0.054) Data: 0.001 (0.025) Loss: 0.1410 (0.1312)
Epoch: [67][12/24]Time: 0.031 (0.053) Data: 0.001 (0.023) Loss: 0.2128 (0.1374)
Epoch: [67][13/24]Time: 0.031 (0.051) Data: 0.003 (0.021) Loss: 0.2223 (0.1435)
Epoch: [67][14/24]Time: 0.030 (0.050) Data: 0.001 (0.020) Loss: 0.1559 (0.1443)
Epoch: [67][15/24]Time: 0.031 (0.048) Data: 0.001 (0.019) Loss: 0.1734 (0.1461)
Epoch: [67][16/24]Time: 0.031 (0.047) Data: 0.001 (0.018) Loss: 0.1997 (0.1493)
Epoch: [67][17/24]Time: 0.029 

Epoch: [71][6/24]Time: 0.033 (0.031) Data: 0.001 (0.001) Loss: 0.1020 (0.1062)
Epoch: [71][7/24]Time: 0.027 (0.030) Data: 0.001 (0.001) Loss: 0.1048 (0.1060)
Epoch: [71][8/24]Time: 0.027 (0.030) Data: 0.001 (0.001) Loss: 0.1307 (0.1088)
Epoch: [71][9/24]Time: 0.028 (0.030) Data: 0.001 (0.001) Loss: 0.1317 (0.1111)
error:  1.4355183708403274e-13 step  11
cost:  0.22951242451876439
opt took 0.00min,   11iters
Epoch: [71][10/24]Time: 0.491 (0.072) Data: 0.427 (0.040) Loss: 0.1688 (0.1163)
Epoch: [71][11/24]Time: 0.051 (0.070) Data: 0.002 (0.037) Loss: 0.1396 (0.1182)
Epoch: [71][12/24]Time: 0.027 (0.067) Data: 0.001 (0.034) Loss: 0.1889 (0.1237)
Epoch: [71][13/24]Time: 0.027 (0.064) Data: 0.001 (0.032) Loss: 0.1686 (0.1269)
Epoch: [71][14/24]Time: 0.032 (0.062) Data: 0.001 (0.030) Loss: 0.1612 (0.1292)
Epoch: [71][15/24]Time: 0.032 (0.060) Data: 0.001 (0.028) Loss: 0.1193 (0.1286)
Epoch: [71][16/24]Time: 0.036 (0.058) Data: 0.001 (0.026) Loss: 0.1629 (0.1306)
Epoch: [71][17/24]Time: 0.030

Epoch: [75][7/24]Time: 0.027 (0.027) Data: 0.001 (0.001) Loss: 0.1295 (0.1331)
Epoch: [75][8/24]Time: 0.029 (0.027) Data: 0.001 (0.001) Loss: 0.1380 (0.1336)
Epoch: [75][9/24]Time: 0.027 (0.027) Data: 0.001 (0.001) Loss: 0.0983 (0.1301)
error:  2.1005419625907962e-13 step  11
cost:  0.23933244067756068
opt took 0.00min,   11iters
Epoch: [75][10/24]Time: 0.604 (0.080) Data: 0.536 (0.050) Loss: 0.1444 (0.1314)
Epoch: [75][11/24]Time: 0.056 (0.078) Data: 0.003 (0.046) Loss: 0.1492 (0.1329)
Epoch: [75][12/24]Time: 0.044 (0.075) Data: 0.004 (0.043) Loss: 0.1316 (0.1328)
Epoch: [75][13/24]Time: 0.031 (0.072) Data: 0.001 (0.040) Loss: 0.1151 (0.1315)
Epoch: [75][14/24]Time: 0.029 (0.069) Data: 0.004 (0.037) Loss: 0.2012 (0.1362)
Epoch: [75][15/24]Time: 0.026 (0.066) Data: 0.001 (0.035) Loss: 0.1890 (0.1395)
Epoch: [75][16/24]Time: 0.030 (0.064) Data: 0.001 (0.033) Loss: 0.1907 (0.1425)
Epoch: [75][17/24]Time: 0.028 (0.062) Data: 0.001 (0.031) Loss: 0.1749 (0.1443)
Epoch: [75][18/24]Time: 0.03

Epoch: [79][5/24]Time: 0.030 (0.034) Data: 0.001 (0.001) Loss: 0.1047 (0.1147)
Epoch: [79][6/24]Time: 0.032 (0.034) Data: 0.001 (0.001) Loss: 0.0859 (0.1106)
Epoch: [79][7/24]Time: 0.030 (0.033) Data: 0.001 (0.001) Loss: 0.1094 (0.1105)
Epoch: [79][8/24]Time: 0.029 (0.033) Data: 0.001 (0.001) Loss: 0.1230 (0.1119)
Epoch: [79][9/24]Time: 0.030 (0.032) Data: 0.001 (0.001) Loss: 0.1352 (0.1142)
Epoch: [79][10/24]Time: 0.028 (0.032) Data: 0.001 (0.001) Loss: 0.1023 (0.1131)
error:  6.7390537594747e-14 step  11
cost:  0.2126411409586409
opt took 0.00min,   11iters
Epoch: [79][11/24]Time: 0.371 (0.060) Data: 0.349 (0.030) Loss: 0.2259 (0.1225)
Epoch: [79][12/24]Time: 0.026 (0.058) Data: 0.001 (0.028) Loss: 0.1415 (0.1240)
Epoch: [79][13/24]Time: 0.029 (0.056) Data: 0.001 (0.026) Loss: 0.1302 (0.1244)
Epoch: [79][14/24]Time: 0.029 (0.054) Data: 0.001 (0.024) Loss: 0.1458 (0.1258)
Epoch: [79][15/24]Time: 0.029 (0.052) Data: 0.001 (0.023) Loss: 0.1451 (0.1270)
Epoch: [79][16/24]Time: 0.030 (0.0

Epoch: [83][4/24]Time: 0.040 (0.042) Data: 0.003 (0.006) Loss: 0.0988 (0.1098)
Epoch: [83][5/24]Time: 0.040 (0.041) Data: 0.003 (0.006) Loss: 0.1293 (0.1131)
Epoch: [83][6/24]Time: 0.040 (0.041) Data: 0.004 (0.006) Loss: 0.1007 (0.1113)
Epoch: [83][7/24]Time: 0.040 (0.041) Data: 0.004 (0.005) Loss: 0.1183 (0.1122)
Epoch: [83][8/24]Time: 0.041 (0.041) Data: 0.004 (0.005) Loss: 0.0849 (0.1091)
Epoch: [83][9/24]Time: 0.038 (0.041) Data: 0.001 (0.005) Loss: 0.0871 (0.1069)
Epoch: [83][10/24]Time: 0.039 (0.041) Data: 0.001 (0.005) Loss: 0.1326 (0.1093)
Epoch: [83][11/24]Time: 0.031 (0.040) Data: 0.001 (0.004) Loss: 0.1278 (0.1108)
error:  5.828670879282072e-14 step  11
cost:  0.20808768939459166
opt took 0.00min,   11iters
Epoch: [83][12/24]Time: 0.473 (0.073) Data: 0.382 (0.033) Loss: 0.1974 (0.1175)
Epoch: [83][13/24]Time: 0.027 (0.070) Data: 0.001 (0.031) Loss: 0.1992 (0.1233)
Epoch: [83][14/24]Time: 0.027 (0.067) Data: 0.001 (0.029) Loss: 0.1493 (0.1250)
Epoch: [83][15/24]Time: 0.028 (0

Epoch: [87][6/24]Time: 0.033 (0.025) Data: 0.001 (0.001) Loss: 0.0898 (0.1038)
Epoch: [87][7/24]Time: 0.040 (0.027) Data: 0.001 (0.001) Loss: 0.1404 (0.1084)
Epoch: [87][8/24]Time: 0.037 (0.028) Data: 0.001 (0.001) Loss: 0.1137 (0.1090)
Epoch: [87][9/24]Time: 0.037 (0.029) Data: 0.002 (0.001) Loss: 0.1381 (0.1119)
Epoch: [87][10/24]Time: 0.040 (0.030) Data: 0.004 (0.001) Loss: 0.1138 (0.1120)
Epoch: [87][11/24]Time: 0.040 (0.031) Data: 0.003 (0.001) Loss: 0.1029 (0.1113)
error:  1.1435297153639112e-13 step  11
cost:  0.2379122935828163
opt took 0.00min,   11iters
Epoch: [87][12/24]Time: 0.309 (0.052) Data: 0.282 (0.023) Loss: 0.1987 (0.1180)
Epoch: [87][13/24]Time: 0.028 (0.050) Data: 0.001 (0.021) Loss: 0.1827 (0.1226)
Epoch: [87][14/24]Time: 0.029 (0.049) Data: 0.001 (0.020) Loss: 0.1230 (0.1227)
Epoch: [87][15/24]Time: 0.031 (0.048) Data: 0.001 (0.019) Loss: 0.1542 (0.1246)
Epoch: [87][16/24]Time: 0.045 (0.048) Data: 0.001 (0.018) Loss: 0.1301 (0.1250)
Epoch: [87][17/24]Time: 0.035 

Epoch: [91][6/24]Time: 0.034 (0.030) Data: 0.001 (0.001) Loss: 0.1097 (0.1059)
Epoch: [91][7/24]Time: 0.043 (0.032) Data: 0.001 (0.001) Loss: 0.0898 (0.1039)
Epoch: [91][8/24]Time: 0.051 (0.034) Data: 0.003 (0.001) Loss: 0.1484 (0.1088)
Epoch: [91][9/24]Time: 0.032 (0.034) Data: 0.001 (0.001) Loss: 0.0980 (0.1078)
Epoch: [91][10/24]Time: 0.026 (0.033) Data: 0.001 (0.001) Loss: 0.1444 (0.1111)
Epoch: [91][11/24]Time: 0.051 (0.035) Data: 0.002 (0.001) Loss: 0.1297 (0.1126)
error:  1.5520917884259688e-13 step  11
cost:  0.21402657796558133
opt took 0.00min,   11iters
Epoch: [91][12/24]Time: 0.823 (0.095) Data: 0.729 (0.057) Loss: 0.2082 (0.1200)
Epoch: [91][13/24]Time: 0.027 (0.090) Data: 0.001 (0.053) Loss: 0.1213 (0.1201)
Epoch: [91][14/24]Time: 0.040 (0.087) Data: 0.001 (0.050) Loss: 0.1434 (0.1216)
Epoch: [91][15/24]Time: 0.048 (0.085) Data: 0.008 (0.047) Loss: 0.1375 (0.1226)
Epoch: [91][16/24]Time: 0.039 (0.082) Data: 0.003 (0.045) Loss: 0.1232 (0.1227)
Epoch: [91][17/24]Time: 0.043

Epoch: [95][7/24]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.1163 (0.1014)
Epoch: [95][8/24]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.1050 (0.1018)
Epoch: [95][9/24]Time: 0.026 (0.028) Data: 0.001 (0.001) Loss: 0.0870 (0.1003)
Epoch: [95][10/24]Time: 0.040 (0.029) Data: 0.001 (0.001) Loss: 0.0992 (0.1002)
Epoch: [95][11/24]Time: 0.033 (0.030) Data: 0.001 (0.001) Loss: 0.1236 (0.1022)
error:  7.283063041541027e-14 step  11
cost:  0.21258639637512103
opt took 0.00min,   11iters
Epoch: [95][12/24]Time: 0.648 (0.077) Data: 0.569 (0.045) Loss: 0.1825 (0.1083)
Epoch: [95][13/24]Time: 0.038 (0.074) Data: 0.003 (0.042) Loss: 0.1859 (0.1139)
Epoch: [95][14/24]Time: 0.028 (0.071) Data: 0.001 (0.039) Loss: 0.1765 (0.1181)
Epoch: [95][15/24]Time: 0.029 (0.069) Data: 0.001 (0.037) Loss: 0.1022 (0.1171)
Epoch: [95][16/24]Time: 0.028 (0.066) Data: 0.001 (0.035) Loss: 0.1416 (0.1185)
Epoch: [95][17/24]Time: 0.030 (0.064) Data: 0.001 (0.033) Loss: 0.1450 (0.1200)
Epoch: [95][18/24]Time: 0.031

Epoch: [99][6/24]Time: 0.028 (0.029) Data: 0.001 (0.001) Loss: 0.1231 (0.1042)
Epoch: [99][7/24]Time: 0.054 (0.032) Data: 0.001 (0.001) Loss: 0.0956 (0.1031)
Epoch: [99][8/24]Time: 0.040 (0.033) Data: 0.001 (0.001) Loss: 0.1258 (0.1056)
Epoch: [99][9/24]Time: 0.051 (0.035) Data: 0.008 (0.002) Loss: 0.1012 (0.1052)
Epoch: [99][10/24]Time: 0.036 (0.035) Data: 0.001 (0.002) Loss: 0.0968 (0.1044)
Epoch: [99][11/24]Time: 0.038 (0.035) Data: 0.002 (0.002) Loss: 0.0762 (0.1021)
Epoch: [99][12/24]Time: 0.038 (0.035) Data: 0.002 (0.002) Loss: 0.1333 (0.1045)
error:  1.3300471835009375e-13 step  11
cost:  0.19937029212417057
opt took 0.00min,   11iters
Epoch: [99][13/24]Time: 0.470 (0.066) Data: 0.421 (0.032) Loss: 0.2009 (0.1114)
Epoch: [99][14/24]Time: 0.059 (0.066) Data: 0.015 (0.031) Loss: 0.1320 (0.1127)
Epoch: [99][15/24]Time: 0.033 (0.064) Data: 0.001 (0.029) Loss: 0.1162 (0.1130)
Epoch: [99][16/24]Time: 0.029 (0.062) Data: 0.001 (0.027) Loss: 0.1226 (0.1135)
Epoch: [99][17/24]Time: 0.031

Epoch: [103][7/24]Time: 0.029 (0.028) Data: 0.001 (0.001) Loss: 0.0969 (0.1089)
Epoch: [103][8/24]Time: 0.032 (0.028) Data: 0.001 (0.001) Loss: 0.0809 (0.1058)
Epoch: [103][9/24]Time: 0.030 (0.028) Data: 0.001 (0.001) Loss: 0.1057 (0.1058)
Epoch: [103][10/24]Time: 0.031 (0.029) Data: 0.001 (0.001) Loss: 0.1560 (0.1103)
Epoch: [103][11/24]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.1068 (0.1100)
Epoch: [103][12/24]Time: 0.031 (0.029) Data: 0.001 (0.001) Loss: 0.0768 (0.1075)
Epoch: [103][13/24]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.1096 (0.1076)
error:  6.94999613415348e-14 step  11
cost:  0.19446165552638225
opt took 0.00min,   11iters
Epoch: [103][14/24]Time: 0.477 (0.059) Data: 0.435 (0.030) Loss: 0.1332 (0.1093)
Epoch: [103][15/24]Time: 0.048 (0.058) Data: 0.008 (0.029) Loss: 0.0768 (0.1073)
Epoch: [103][16/24]Time: 0.049 (0.057) Data: 0.007 (0.027) Loss: 0.1687 (0.1109)
Epoch: [103][17/24]Time: 0.027 (0.056) Data: 0.001 (0.026) Loss: 0.1667 (0.1140)
Epoch: [103][18/24]

Epoch: [107][6/24]Time: 0.031 (0.029) Data: 0.001 (0.001) Loss: 0.0972 (0.1037)
Epoch: [107][7/24]Time: 0.027 (0.028) Data: 0.001 (0.001) Loss: 0.1595 (0.1107)
Epoch: [107][8/24]Time: 0.030 (0.029) Data: 0.001 (0.001) Loss: 0.1319 (0.1130)
Epoch: [107][9/24]Time: 0.032 (0.029) Data: 0.001 (0.001) Loss: 0.1083 (0.1126)
Epoch: [107][10/24]Time: 0.028 (0.029) Data: 0.001 (0.001) Loss: 0.1070 (0.1120)
Epoch: [107][11/24]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.1395 (0.1143)
Epoch: [107][12/24]Time: 0.028 (0.029) Data: 0.001 (0.001) Loss: 0.0828 (0.1119)
Epoch: [107][13/24]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.1009 (0.1111)
error:  1.4976908602193362e-13 step  11
cost:  0.19986898430797304
opt took 0.00min,   11iters
Epoch: [107][14/24]Time: 0.588 (0.066) Data: 0.498 (0.034) Loss: 0.1497 (0.1137)
Epoch: [107][15/24]Time: 0.028 (0.064) Data: 0.001 (0.032) Loss: 0.1169 (0.1139)
Epoch: [107][16/24]Time: 0.028 (0.062) Data: 0.001 (0.030) Loss: 0.1844 (0.1180)
Epoch: [107][17/24

Epoch: [111][7/24]Time: 0.029 (0.028) Data: 0.001 (0.001) Loss: 0.1015 (0.0970)
Epoch: [111][8/24]Time: 0.029 (0.028) Data: 0.001 (0.001) Loss: 0.0711 (0.0941)
Epoch: [111][9/24]Time: 0.028 (0.028) Data: 0.001 (0.001) Loss: 0.1173 (0.0965)
Epoch: [111][10/24]Time: 0.030 (0.028) Data: 0.001 (0.001) Loss: 0.0910 (0.0960)
Epoch: [111][11/24]Time: 0.029 (0.028) Data: 0.001 (0.001) Loss: 0.1179 (0.0978)
Epoch: [111][12/24]Time: 0.032 (0.028) Data: 0.001 (0.001) Loss: 0.0751 (0.0960)
Epoch: [111][13/24]Time: 0.027 (0.028) Data: 0.001 (0.001) Loss: 0.0926 (0.0958)
Epoch: [111][14/24]Time: 0.029 (0.028) Data: 0.001 (0.001) Loss: 0.1192 (0.0974)
error:  5.240252676230739e-14 step  11
cost:  0.21534285113561147
opt took 0.00min,   11iters
Epoch: [111][15/24]Time: 0.296 (0.045) Data: 0.253 (0.017) Loss: 0.2622 (0.1077)
Epoch: [111][16/24]Time: 0.048 (0.045) Data: 0.006 (0.016) Loss: 0.1520 (0.1103)
Epoch: [111][17/24]Time: 0.030 (0.044) Data: 0.001 (0.015) Loss: 0.1411 (0.1120)
Epoch: [111][18/24

Epoch: [115][5/24]Time: 0.028 (0.037) Data: 0.001 (0.004) Loss: 0.0832 (0.0829)
Epoch: [115][6/24]Time: 0.032 (0.037) Data: 0.001 (0.004) Loss: 0.1042 (0.0860)
Epoch: [115][7/24]Time: 0.027 (0.035) Data: 0.001 (0.004) Loss: 0.1788 (0.0976)
Epoch: [115][8/24]Time: 0.039 (0.036) Data: 0.007 (0.004) Loss: 0.1074 (0.0987)
Epoch: [115][9/24]Time: 0.026 (0.035) Data: 0.001 (0.004) Loss: 0.1179 (0.1006)
Epoch: [115][10/24]Time: 0.028 (0.034) Data: 0.001 (0.003) Loss: 0.0927 (0.0999)
Epoch: [115][11/24]Time: 0.027 (0.034) Data: 0.001 (0.003) Loss: 0.0641 (0.0969)
Epoch: [115][12/24]Time: 0.045 (0.035) Data: 0.007 (0.003) Loss: 0.0999 (0.0971)
Epoch: [115][13/24]Time: 0.032 (0.034) Data: 0.001 (0.003) Loss: 0.0894 (0.0966)
Epoch: [115][14/24]Time: 0.028 (0.034) Data: 0.001 (0.003) Loss: 0.1131 (0.0977)
Epoch: [115][15/24]Time: 0.027 (0.033) Data: 0.001 (0.003) Loss: 0.1373 (0.1002)
error:  7.793765632868599e-14 step  11
cost:  0.22207402689331968
opt took 0.00min,   11iters
Epoch: [115][16/24]T

Epoch: [119][7/24]Time: 0.031 (0.029) Data: 0.001 (0.001) Loss: 0.1282 (0.1024)
Epoch: [119][8/24]Time: 0.030 (0.029) Data: 0.001 (0.001) Loss: 0.0924 (0.1013)
Epoch: [119][9/24]Time: 0.030 (0.029) Data: 0.001 (0.001) Loss: 0.1069 (0.1018)
Epoch: [119][10/24]Time: 0.029 (0.029) Data: 0.001 (0.001) Loss: 0.0971 (0.1014)
Epoch: [119][11/24]Time: 0.025 (0.029) Data: 0.001 (0.001) Loss: 0.1000 (0.1013)
Epoch: [119][12/24]Time: 0.027 (0.028) Data: 0.001 (0.001) Loss: 0.0994 (0.1011)
Epoch: [119][13/24]Time: 0.027 (0.028) Data: 0.001 (0.001) Loss: 0.0965 (0.1008)
Epoch: [119][14/24]Time: 0.035 (0.029) Data: 0.001 (0.001) Loss: 0.0772 (0.0992)
error:  1.3633538742396922e-13 step  11
cost:  0.19763226784594598
opt took 0.00min,   11iters
Epoch: [119][15/24]Time: 0.707 (0.071) Data: 0.654 (0.042) Loss: 0.1475 (0.1023)
Epoch: [119][16/24]Time: 0.059 (0.071) Data: 0.005 (0.040) Loss: 0.1385 (0.1044)
Epoch: [119][17/24]Time: 0.032 (0.068) Data: 0.001 (0.038) Loss: 0.1089 (0.1046)
Epoch: [119][18/2

Epoch: [123][8/24]Time: 0.029 (0.023) Data: 0.001 (0.001) Loss: 0.1181 (0.1002)
Epoch: [123][9/24]Time: 0.029 (0.023) Data: 0.001 (0.001) Loss: 0.0958 (0.0998)
Epoch: [123][10/24]Time: 0.027 (0.024) Data: 0.001 (0.001) Loss: 0.1168 (0.1014)
Epoch: [123][11/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.1226 (0.1031)
Epoch: [123][12/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.0696 (0.1005)
Epoch: [123][13/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.0720 (0.0985)
Epoch: [123][14/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.0912 (0.0980)
Epoch: [123][15/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.1078 (0.0986)
error:  1.127986593019159e-13 step  11
cost:  0.1961134046983521
opt took 0.00min,   11iters
Epoch: [123][16/24]Time: 0.250 (0.036) Data: 0.224 (0.014) Loss: 0.1525 (0.1018)
Epoch: [123][17/24]Time: 0.027 (0.036) Data: 0.001 (0.013) Loss: 0.1005 (0.1017)
Epoch: [123][18/24]Time: 0.027 (0.035) Data: 0.001 (0.013) Loss: 0.0920 (0.1012)
Epoch: [123][19/24

Epoch: [127][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0950 (0.0976)
Epoch: [127][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0967 (0.0975)
Epoch: [127][11/24]Time: 0.023 (0.022) Data: 0.001 (0.001) Loss: 0.0835 (0.0963)
Epoch: [127][12/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0963 (0.0963)
Epoch: [127][13/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1193 (0.0980)
Epoch: [127][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0753 (0.0965)
Epoch: [127][15/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1083 (0.0972)
Epoch: [127][16/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0698 (0.0956)
error:  1.4710455076283324e-13 step  11
cost:  0.1971251657899252
opt took 0.00min,   11iters
Epoch: [127][17/24]Time: 0.274 (0.036) Data: 0.246 (0.015) Loss: 0.1026 (0.0960)
Epoch: [127][18/24]Time: 0.037 (0.036) Data: 0.001 (0.014) Loss: 0.1224 (0.0974)
Epoch: [127][19/24]Time: 0.039 (0.036) Data: 0.001 (0.013) Loss: 0.1529 (0.1001)
Epoch: [127][20/

Epoch: [131][8/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.1005 (0.1056)
Epoch: [131][9/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.1035 (0.1054)
Epoch: [131][10/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1025 (0.1052)
Epoch: [131][11/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1022 (0.1049)
Epoch: [131][12/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0938 (0.1041)
Epoch: [131][13/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0844 (0.1027)
Epoch: [131][14/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.0963 (0.1022)
Epoch: [131][15/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1139 (0.1030)
Epoch: [131][16/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1038 (0.1030)
error:  4.973799150320701e-14 step  11
cost:  0.21196883990847293
opt took 0.00min,   11iters
Epoch: [131][17/24]Time: 0.251 (0.035) Data: 0.225 (0.013) Loss: 0.1093 (0.1034)
Epoch: [131][18/24]Time: 0.027 (0.035) Data: 0.001 (0.013) Loss: 0.1835 (0.1076)
Epoch: [131][19/2

Epoch: [135][9/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.0940 (0.0959)
Epoch: [135][10/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.1121 (0.0974)
Epoch: [135][11/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.0853 (0.0964)
Epoch: [135][12/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.0808 (0.0952)
Epoch: [135][13/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.0913 (0.0949)
Epoch: [135][14/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.0921 (0.0947)
Epoch: [135][15/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.0778 (0.0936)
Epoch: [135][16/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.1161 (0.0950)
Epoch: [135][17/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.1067 (0.0956)
error:  1.191269305422793e-13 step  11
cost:  0.20544052845284377
opt took 0.00min,   11iters
Epoch: [135][18/24]Time: 0.229 (0.032) Data: 0.204 (0.011) Loss: 0.1450 (0.0982)
Epoch: [135][19/24]Time: 0.026 (0.032) Data: 0.001 (0.011) Loss: 0.1246 (0.0995)
Epoch: [135][20/

Epoch: [139][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1419 (0.0971)
Epoch: [139][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1189 (0.0991)
Epoch: [139][11/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0689 (0.0966)
Epoch: [139][12/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1103 (0.0977)
Epoch: [139][13/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1122 (0.0987)
Epoch: [139][14/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0969 (0.0986)
Epoch: [139][15/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1046 (0.0989)
Epoch: [139][16/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0958 (0.0988)
Epoch: [139][17/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0725 (0.0973)
error:  3.601563491884008e-13 step  11
cost:  0.21461721407766732
opt took 0.00min,   11iters
Epoch: [139][18/24]Time: 0.263 (0.035) Data: 0.235 (0.013) Loss: 0.1521 (0.1002)
Epoch: [139][19/24]Time: 0.047 (0.035) Data: 0.002 (0.013) Loss: 0.1657 (0.1035)
Epoch: [139][20/

Epoch: [143][8/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.1703 (0.1155)
Epoch: [143][9/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1112 (0.1151)
Epoch: [143][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0972 (0.1135)
Epoch: [143][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0945 (0.1119)
Epoch: [143][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0684 (0.1085)
Epoch: [143][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1123 (0.1088)
Epoch: [143][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1093 (0.1088)
Epoch: [143][15/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1175 (0.1094)
Epoch: [143][16/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1429 (0.1113)
Epoch: [143][17/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0795 (0.1096)
error:  2.495781359357352e-13 step  11
cost:  0.17861774612000506
opt took 0.00min,   11iters
Epoch: [143][18/24]Time: 0.246 (0.034) Data: 0.220 (0.012) Loss: 0.1453 (0.1115)
Epoch: [143][19/2

Epoch: [147][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0901 (0.1011)
Epoch: [147][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0879 (0.0999)
Epoch: [147][11/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0812 (0.0983)
Epoch: [147][12/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1056 (0.0989)
Epoch: [147][13/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0921 (0.0984)
Epoch: [147][14/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0793 (0.0971)
Epoch: [147][15/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1541 (0.1007)
Epoch: [147][16/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0863 (0.0998)
Epoch: [147][17/24]Time: 0.038 (0.023) Data: 0.001 (0.001) Loss: 0.1203 (0.1010)
Epoch: [147][18/24]Time: 0.034 (0.024) Data: 0.001 (0.001) Loss: 0.0567 (0.0987)
error:  2.7755575615628914e-14 step  11
cost:  0.1953629308303608
opt took 0.00min,   11iters
Epoch: [147][19/24]Time: 0.320 (0.038) Data: 0.291 (0.015) Loss: 0.1182 (0.0996)
Epoch: [147][20/

Epoch: [151][8/24]Time: 0.024 (0.024) Data: 0.001 (0.001) Loss: 0.0934 (0.0961)
Epoch: [151][9/24]Time: 0.024 (0.024) Data: 0.001 (0.001) Loss: 0.0898 (0.0954)
Epoch: [151][10/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.1112 (0.0969)
Epoch: [151][11/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.0765 (0.0952)
Epoch: [151][12/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.0648 (0.0928)
Epoch: [151][13/24]Time: 0.022 (0.024) Data: 0.001 (0.001) Loss: 0.0833 (0.0922)
Epoch: [151][14/24]Time: 0.022 (0.024) Data: 0.001 (0.001) Loss: 0.1081 (0.0932)
Epoch: [151][15/24]Time: 0.022 (0.024) Data: 0.001 (0.001) Loss: 0.0789 (0.0923)
Epoch: [151][16/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.0950 (0.0925)
Epoch: [151][17/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.0694 (0.0912)
Epoch: [151][18/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0998 (0.0917)
error:  3.83026943495679e-14 step  11
cost:  0.19913475144359696
opt took 0.00min,   11iters
Epoch: [151][19/24

Epoch: [155][8/24]Time: 0.024 (0.024) Data: 0.001 (0.001) Loss: 0.0854 (0.0913)
Epoch: [155][9/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.1053 (0.0927)
Epoch: [155][10/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.1246 (0.0956)
Epoch: [155][11/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.0715 (0.0936)
Epoch: [155][12/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0877 (0.0932)
Epoch: [155][13/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1176 (0.0949)
Epoch: [155][14/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1058 (0.0956)
Epoch: [155][15/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.0898 (0.0953)
Epoch: [155][16/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.1318 (0.0974)
Epoch: [155][17/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.0780 (0.0963)
Epoch: [155][18/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0860 (0.0958)
Epoch: [155][19/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0816 (0.0951)
error:  1.6209256159527285e-13

Epoch: [159][8/24]Time: 0.024 (0.024) Data: 0.001 (0.001) Loss: 0.1137 (0.0904)
Epoch: [159][9/24]Time: 0.024 (0.024) Data: 0.001 (0.001) Loss: 0.0824 (0.0896)
Epoch: [159][10/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.0703 (0.0878)
Epoch: [159][11/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.0786 (0.0871)
Epoch: [159][12/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0995 (0.0880)
Epoch: [159][13/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1545 (0.0928)
Epoch: [159][14/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0783 (0.0918)
Epoch: [159][15/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1072 (0.0928)
Epoch: [159][16/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1103 (0.0938)
Epoch: [159][17/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0784 (0.0929)
Epoch: [159][18/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1049 (0.0936)
Epoch: [159][19/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0832 (0.0930)
Epoch: [159][20/24]Time: 0.022

Epoch: [163][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0930 (0.0973)
Epoch: [163][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0928 (0.0969)
Epoch: [163][11/24]Time: 0.026 (0.022) Data: 0.001 (0.001) Loss: 0.1087 (0.0978)
Epoch: [163][12/24]Time: 0.031 (0.023) Data: 0.001 (0.001) Loss: 0.1031 (0.0982)
Epoch: [163][13/24]Time: 0.028 (0.023) Data: 0.001 (0.001) Loss: 0.0982 (0.0982)
Epoch: [163][14/24]Time: 0.026 (0.023) Data: 0.001 (0.001) Loss: 0.1087 (0.0989)
Epoch: [163][15/24]Time: 0.025 (0.023) Data: 0.001 (0.001) Loss: 0.0717 (0.0972)
Epoch: [163][16/24]Time: 0.024 (0.023) Data: 0.001 (0.001) Loss: 0.1095 (0.0980)
Epoch: [163][17/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.0851 (0.0972)
Epoch: [163][18/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0800 (0.0963)
Epoch: [163][19/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0889 (0.0960)
Epoch: [163][20/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1049 (0.0964)
error:  2.1782575743145571e-1

Epoch: [167][9/24]Time: 0.023 (0.022) Data: 0.001 (0.001) Loss: 0.1296 (0.1286)
Epoch: [167][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1281 (0.1285)
Epoch: [167][11/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0827 (0.1247)
Epoch: [167][12/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1037 (0.1231)
Epoch: [167][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0827 (0.1202)
Epoch: [167][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1176 (0.1201)
Epoch: [167][15/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0965 (0.1186)
Epoch: [167][16/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1140 (0.1183)
Epoch: [167][17/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1030 (0.1175)
Epoch: [167][18/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1343 (0.1183)
Epoch: [167][19/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1144 (0.1182)
Epoch: [167][20/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0876 (0.1167)
Epoch: [167][21/24]Time: 0.02

Epoch: [171][7/24]Time: 0.028 (0.028) Data: 0.001 (0.001) Loss: 0.1214 (0.1092)
Epoch: [171][8/24]Time: 0.022 (0.027) Data: 0.001 (0.001) Loss: 0.0757 (0.1055)
Epoch: [171][9/24]Time: 0.021 (0.027) Data: 0.001 (0.001) Loss: 0.0904 (0.1040)
Epoch: [171][10/24]Time: 0.021 (0.026) Data: 0.001 (0.001) Loss: 0.0729 (0.1012)
Epoch: [171][11/24]Time: 0.021 (0.026) Data: 0.001 (0.001) Loss: 0.1051 (0.1015)
Epoch: [171][12/24]Time: 0.022 (0.026) Data: 0.001 (0.001) Loss: 0.0889 (0.1005)
Epoch: [171][13/24]Time: 0.021 (0.025) Data: 0.001 (0.001) Loss: 0.0834 (0.0993)
Epoch: [171][14/24]Time: 0.021 (0.025) Data: 0.001 (0.001) Loss: 0.0855 (0.0984)
Epoch: [171][15/24]Time: 0.021 (0.025) Data: 0.001 (0.001) Loss: 0.0880 (0.0977)
Epoch: [171][16/24]Time: 0.021 (0.025) Data: 0.001 (0.001) Loss: 0.1072 (0.0983)
Epoch: [171][17/24]Time: 0.022 (0.024) Data: 0.001 (0.001) Loss: 0.0642 (0.0964)
Epoch: [171][18/24]Time: 0.022 (0.024) Data: 0.001 (0.001) Loss: 0.1129 (0.0973)
Epoch: [171][19/24]Time: 0.022 

Epoch: [175][7/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0969 (0.1028)
Epoch: [175][8/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0709 (0.0992)
Epoch: [175][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0991 (0.0992)
Epoch: [175][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0737 (0.0969)
Epoch: [175][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0747 (0.0950)
Epoch: [175][12/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1367 (0.0983)
Epoch: [175][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0796 (0.0969)
Epoch: [175][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0957 (0.0968)
Epoch: [175][15/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0815 (0.0959)
Epoch: [175][16/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1427 (0.0986)
Epoch: [175][17/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0913 (0.0982)
Epoch: [175][18/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0868 (0.0976)
Epoch: [175][19/24]Time: 0.024 

Epoch: [179][7/24]Time: 0.026 (0.026) Data: 0.001 (0.001) Loss: 0.0716 (0.1036)
Epoch: [179][8/24]Time: 0.025 (0.026) Data: 0.001 (0.001) Loss: 0.1134 (0.1047)
Epoch: [179][9/24]Time: 0.026 (0.026) Data: 0.001 (0.001) Loss: 0.0839 (0.1026)
Epoch: [179][10/24]Time: 0.022 (0.025) Data: 0.001 (0.001) Loss: 0.0864 (0.1011)
Epoch: [179][11/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.0742 (0.0989)
Epoch: [179][12/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.0899 (0.0982)
Epoch: [179][13/24]Time: 0.025 (0.025) Data: 0.001 (0.001) Loss: 0.0732 (0.0964)
Epoch: [179][14/24]Time: 0.026 (0.025) Data: 0.001 (0.001) Loss: 0.0925 (0.0961)
Epoch: [179][15/24]Time: 0.022 (0.025) Data: 0.001 (0.001) Loss: 0.0876 (0.0956)
Epoch: [179][16/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.0778 (0.0946)
Epoch: [179][17/24]Time: 0.022 (0.025) Data: 0.001 (0.001) Loss: 0.0723 (0.0933)
Epoch: [179][18/24]Time: 0.024 (0.024) Data: 0.001 (0.001) Loss: 0.0935 (0.0933)
Epoch: [179][19/24]Time: 0.023 

Epoch: [183][7/24]Time: 0.024 (0.027) Data: 0.001 (0.001) Loss: 0.0777 (0.0797)
Epoch: [183][8/24]Time: 0.023 (0.027) Data: 0.001 (0.001) Loss: 0.0848 (0.0803)
Epoch: [183][9/24]Time: 0.023 (0.026) Data: 0.001 (0.001) Loss: 0.0883 (0.0811)
Epoch: [183][10/24]Time: 0.023 (0.026) Data: 0.001 (0.001) Loss: 0.0961 (0.0824)
Epoch: [183][11/24]Time: 0.023 (0.026) Data: 0.001 (0.001) Loss: 0.0519 (0.0799)
Epoch: [183][12/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.0893 (0.0806)
Epoch: [183][13/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.0763 (0.0803)
Epoch: [183][14/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.1093 (0.0822)
Epoch: [183][15/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.0595 (0.0808)
Epoch: [183][16/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.0732 (0.0804)
Epoch: [183][17/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.0671 (0.0796)
Epoch: [183][18/24]Time: 0.023 (0.025) Data: 0.001 (0.001) Loss: 0.0802 (0.0797)
Epoch: [183][19/24]Time: 0.023 

Epoch: [187][7/24]Time: 0.021 (0.056) Data: 0.001 (0.033) Loss: 0.1676 (0.1043)
Epoch: [187][8/24]Time: 0.021 (0.052) Data: 0.001 (0.029) Loss: 0.0932 (0.1031)
Epoch: [187][9/24]Time: 0.021 (0.049) Data: 0.001 (0.027) Loss: 0.0529 (0.0981)
Epoch: [187][10/24]Time: 0.022 (0.047) Data: 0.001 (0.024) Loss: 0.0847 (0.0969)
Epoch: [187][11/24]Time: 0.022 (0.045) Data: 0.001 (0.022) Loss: 0.0771 (0.0952)
Epoch: [187][12/24]Time: 0.022 (0.043) Data: 0.001 (0.021) Loss: 0.0712 (0.0934)
Epoch: [187][13/24]Time: 0.022 (0.042) Data: 0.001 (0.019) Loss: 0.0661 (0.0914)
Epoch: [187][14/24]Time: 0.022 (0.040) Data: 0.001 (0.018) Loss: 0.1652 (0.0963)
Epoch: [187][15/24]Time: 0.024 (0.039) Data: 0.001 (0.017) Loss: 0.0721 (0.0948)
Epoch: [187][16/24]Time: 0.023 (0.038) Data: 0.001 (0.016) Loss: 0.0793 (0.0939)
Epoch: [187][17/24]Time: 0.026 (0.038) Data: 0.001 (0.015) Loss: 0.0689 (0.0925)
Epoch: [187][18/24]Time: 0.023 (0.037) Data: 0.001 (0.014) Loss: 0.0911 (0.0924)
Epoch: [187][19/24]Time: 0.022 

Epoch: [191][7/24]Time: 0.023 (0.053) Data: 0.001 (0.029) Loss: 0.1677 (0.1154)
Epoch: [191][8/24]Time: 0.023 (0.050) Data: 0.001 (0.026) Loss: 0.0822 (0.1117)
Epoch: [191][9/24]Time: 0.029 (0.048) Data: 0.001 (0.023) Loss: 0.0621 (0.1067)
Epoch: [191][10/24]Time: 0.026 (0.046) Data: 0.001 (0.021) Loss: 0.1292 (0.1088)
Epoch: [191][11/24]Time: 0.025 (0.044) Data: 0.001 (0.020) Loss: 0.0994 (0.1080)
Epoch: [191][12/24]Time: 0.024 (0.042) Data: 0.001 (0.018) Loss: 0.0827 (0.1060)
Epoch: [191][13/24]Time: 0.024 (0.041) Data: 0.001 (0.017) Loss: 0.1036 (0.1059)
Epoch: [191][14/24]Time: 0.023 (0.040) Data: 0.001 (0.016) Loss: 0.0662 (0.1032)
Epoch: [191][15/24]Time: 0.023 (0.039) Data: 0.001 (0.015) Loss: 0.0718 (0.1013)
Epoch: [191][16/24]Time: 0.024 (0.038) Data: 0.001 (0.014) Loss: 0.0854 (0.1003)
Epoch: [191][17/24]Time: 0.023 (0.037) Data: 0.001 (0.013) Loss: 0.0701 (0.0986)
Epoch: [191][18/24]Time: 0.024 (0.036) Data: 0.001 (0.013) Loss: 0.1127 (0.0994)
Epoch: [191][19/24]Time: 0.023 

Epoch: [195][7/24]Time: 0.023 (0.056) Data: 0.001 (0.032) Loss: 0.1285 (0.1008)
Epoch: [195][8/24]Time: 0.023 (0.052) Data: 0.001 (0.028) Loss: 0.0987 (0.1006)
Epoch: [195][9/24]Time: 0.023 (0.050) Data: 0.001 (0.026) Loss: 0.1051 (0.1010)
Epoch: [195][10/24]Time: 0.023 (0.047) Data: 0.001 (0.023) Loss: 0.1214 (0.1029)
Epoch: [195][11/24]Time: 0.023 (0.045) Data: 0.001 (0.021) Loss: 0.1119 (0.1037)
Epoch: [195][12/24]Time: 0.023 (0.043) Data: 0.001 (0.020) Loss: 0.0831 (0.1021)
Epoch: [195][13/24]Time: 0.022 (0.042) Data: 0.001 (0.018) Loss: 0.1175 (0.1032)
Epoch: [195][14/24]Time: 0.022 (0.041) Data: 0.001 (0.017) Loss: 0.0854 (0.1020)
Epoch: [195][15/24]Time: 0.022 (0.039) Data: 0.001 (0.016) Loss: 0.0934 (0.1014)
Epoch: [195][16/24]Time: 0.022 (0.038) Data: 0.001 (0.015) Loss: 0.0890 (0.1007)
Epoch: [195][17/24]Time: 0.022 (0.038) Data: 0.001 (0.015) Loss: 0.0990 (0.1006)
Epoch: [195][18/24]Time: 0.023 (0.037) Data: 0.001 (0.014) Loss: 0.1150 (0.1014)
Epoch: [195][19/24]Time: 0.023 

Epoch: [199][7/24]Time: 0.022 (0.054) Data: 0.001 (0.029) Loss: 0.0952 (0.1036)
Epoch: [199][8/24]Time: 0.022 (0.050) Data: 0.001 (0.025) Loss: 0.1237 (0.1059)
Epoch: [199][9/24]Time: 0.021 (0.047) Data: 0.001 (0.023) Loss: 0.1055 (0.1058)
Epoch: [199][10/24]Time: 0.021 (0.045) Data: 0.001 (0.021) Loss: 0.0983 (0.1051)
Epoch: [199][11/24]Time: 0.021 (0.043) Data: 0.001 (0.019) Loss: 0.0633 (0.1017)
Epoch: [199][12/24]Time: 0.022 (0.041) Data: 0.001 (0.018) Loss: 0.0795 (0.1000)
Epoch: [199][13/24]Time: 0.022 (0.040) Data: 0.001 (0.017) Loss: 0.0753 (0.0982)
Epoch: [199][14/24]Time: 0.021 (0.039) Data: 0.001 (0.016) Loss: 0.0641 (0.0959)
Epoch: [199][15/24]Time: 0.022 (0.038) Data: 0.001 (0.015) Loss: 0.0774 (0.0948)
Epoch: [199][16/24]Time: 0.021 (0.037) Data: 0.001 (0.014) Loss: 0.1054 (0.0954)
Epoch: [199][17/24]Time: 0.022 (0.036) Data: 0.001 (0.013) Loss: 0.0882 (0.0950)
Epoch: [199][18/24]Time: 0.022 (0.035) Data: 0.001 (0.012) Loss: 0.0896 (0.0947)
Epoch: [199][19/24]Time: 0.023 

error:  1.6253665080512292e-13 step  11
cost:  0.18434420508142374
opt took 0.00min,   11iters
Epoch: [203][2/24]Time: 0.241 (0.094) Data: 0.213 (0.072) Loss: 0.0873 (0.0687)
Epoch: [203][3/24]Time: 0.026 (0.077) Data: 0.001 (0.054) Loss: 0.0729 (0.0698)
Epoch: [203][4/24]Time: 0.037 (0.069) Data: 0.001 (0.043) Loss: 0.1297 (0.0818)
Epoch: [203][5/24]Time: 0.026 (0.062) Data: 0.001 (0.036) Loss: 0.1325 (0.0902)
Epoch: [203][6/24]Time: 0.021 (0.056) Data: 0.001 (0.031) Loss: 0.0832 (0.0892)
Epoch: [203][7/24]Time: 0.022 (0.052) Data: 0.001 (0.027) Loss: 0.0944 (0.0899)
Epoch: [203][8/24]Time: 0.022 (0.049) Data: 0.001 (0.024) Loss: 0.0863 (0.0895)
Epoch: [203][9/24]Time: 0.022 (0.046) Data: 0.001 (0.022) Loss: 0.0821 (0.0887)
Epoch: [203][10/24]Time: 0.022 (0.044) Data: 0.001 (0.020) Loss: 0.1363 (0.0931)
Epoch: [203][11/24]Time: 0.023 (0.042) Data: 0.001 (0.019) Loss: 0.1069 (0.0942)
Epoch: [203][12/24]Time: 0.022 (0.041) Data: 0.001 (0.017) Loss: 0.0510 (0.0909)
Epoch: [203][13/24]Tim

10-NN,s=0.1: TOP1:  51.833333333333336
best accuracy: 54.54
Size pf grid:  195

Epoch: 207
ResNet1D
0.03
Epoch: [207][0/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.0672 (0.0672)
error:  7.671641100159832e-14 step  11
cost:  0.1962979644962425
opt took 0.00min,   11iters
Epoch: [207][1/24]Time: 0.276 (0.149) Data: 0.249 (0.125) Loss: 0.1679 (0.1176)
Epoch: [207][2/24]Time: 0.026 (0.108) Data: 0.001 (0.084) Loss: 0.1125 (0.1159)
Epoch: [207][3/24]Time: 0.026 (0.088) Data: 0.001 (0.063) Loss: 0.2050 (0.1382)
Epoch: [207][4/24]Time: 0.026 (0.075) Data: 0.001 (0.051) Loss: 0.0936 (0.1292)
Epoch: [207][5/24]Time: 0.023 (0.067) Data: 0.001 (0.042) Loss: 0.0723 (0.1198)
Epoch: [207][6/24]Time: 0.023 (0.060) Data: 0.001 (0.036) Loss: 0.1160 (0.1192)
Epoch: [207][7/24]Time: 0.022 (0.056) Data: 0.001 (0.032) Loss: 0.1286 (0.1204)
Epoch: [207][8/24]Time: 0.022 (0.052) Data: 0.001 (0.028) Loss: 0.1039 (0.1186)
Epoch: [207][9/24]Time: 0.023 (0.049) Data: 0.001 (0.026) Loss: 0.0728 (0.1140)
Ep

Epoch: [210][20/24]Time: 0.022 (0.033) Data: 0.001 (0.011) Loss: 0.1132 (0.1077)
Epoch: [210][21/24]Time: 0.023 (0.033) Data: 0.001 (0.010) Loss: 0.1107 (0.1079)
Epoch: [210][22/24]Time: 0.023 (0.032) Data: 0.001 (0.010) Loss: 0.1219 (0.1085)
Epoch: [210][23/24]Time: 0.022 (0.032) Data: 0.001 (0.009) Loss: 0.0883 (0.1077)
10-NN,s=0.1: TOP1:  52.916666666666664
best accuracy: 54.54
Size pf grid:  191

Epoch: 211
ResNet1D
0.03
Epoch: [211][0/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.0842 (0.0842)
Epoch: [211][1/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.0670 (0.0756)
error:  5.88418203051333e-14 step  11
cost:  0.1991277698671846
opt took 0.00min,   11iters
Epoch: [211][2/24]Time: 0.268 (0.104) Data: 0.225 (0.076) Loss: 0.1457 (0.0989)
Epoch: [211][3/24]Time: 0.043 (0.088) Data: 0.001 (0.057) Loss: 0.1798 (0.1192)
Epoch: [211][4/24]Time: 0.037 (0.078) Data: 0.002 (0.046) Loss: 0.0928 (0.1139)
Epoch: [211][5/24]Time: 0.023 (0.069) Data: 0.001 (0.039) Loss: 0.1227 (0.1154)

Epoch: [214][18/24]Time: 0.022 (0.035) Data: 0.001 (0.012) Loss: 0.0880 (0.0964)
Epoch: [214][19/24]Time: 0.022 (0.035) Data: 0.001 (0.011) Loss: 0.0934 (0.0962)
Epoch: [214][20/24]Time: 0.022 (0.034) Data: 0.001 (0.011) Loss: 0.0723 (0.0951)
Epoch: [214][21/24]Time: 0.022 (0.033) Data: 0.001 (0.010) Loss: 0.0723 (0.0941)
Epoch: [214][22/24]Time: 0.022 (0.033) Data: 0.001 (0.010) Loss: 0.0809 (0.0935)
Epoch: [214][23/24]Time: 0.022 (0.032) Data: 0.001 (0.010) Loss: 0.1227 (0.0947)
10-NN,s=0.1: TOP1:  52.708333333333336
best accuracy: 54.71
Size pf grid:  187

Epoch: 215
ResNet1D
0.03
Epoch: [215][0/24]Time: 0.023 (0.023) Data: 0.002 (0.002) Loss: 0.0817 (0.0817)
Epoch: [215][1/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0603 (0.0710)
Epoch: [215][2/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.0509 (0.0643)
error:  1.9817480989559044e-13 step  11
cost:  0.20794364777363208
opt took 0.00min,   11iters
Epoch: [215][3/24]Time: 0.264 (0.082) Data: 0.237 (0.060) Loss: 0.1763 (0.

Epoch: [218][16/24]Time: 0.023 (0.038) Data: 0.001 (0.015) Loss: 0.0920 (0.0929)
Epoch: [218][17/24]Time: 0.023 (0.037) Data: 0.001 (0.014) Loss: 0.1076 (0.0937)
Epoch: [218][18/24]Time: 0.023 (0.036) Data: 0.001 (0.013) Loss: 0.0828 (0.0931)
Epoch: [218][19/24]Time: 0.023 (0.036) Data: 0.001 (0.012) Loss: 0.0677 (0.0918)
Epoch: [218][20/24]Time: 0.023 (0.035) Data: 0.001 (0.012) Loss: 0.1026 (0.0923)
Epoch: [218][21/24]Time: 0.022 (0.035) Data: 0.001 (0.011) Loss: 0.0895 (0.0922)
Epoch: [218][22/24]Time: 0.023 (0.034) Data: 0.001 (0.011) Loss: 0.0851 (0.0919)
Epoch: [218][23/24]Time: 0.022 (0.034) Data: 0.001 (0.010) Loss: 0.0956 (0.0921)
10-NN,s=0.1: TOP1:  52.625
best accuracy: 54.71
Size pf grid:  183

Epoch: 219
ResNet1D
0.03
Epoch: [219][0/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.0692 (0.0692)
Epoch: [219][1/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.0619 (0.0655)
Epoch: [219][2/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1009 (0.0773)
error:  8.471001677

Epoch: [222][12/24]Time: 0.021 (0.039) Data: 0.001 (0.017) Loss: 0.1231 (0.0887)
Epoch: [222][13/24]Time: 0.021 (0.038) Data: 0.001 (0.016) Loss: 0.0868 (0.0885)
Epoch: [222][14/24]Time: 0.021 (0.037) Data: 0.001 (0.015) Loss: 0.0741 (0.0876)
Epoch: [222][15/24]Time: 0.021 (0.036) Data: 0.001 (0.014) Loss: 0.0917 (0.0878)
Epoch: [222][16/24]Time: 0.021 (0.035) Data: 0.001 (0.014) Loss: 0.1007 (0.0886)
Epoch: [222][17/24]Time: 0.021 (0.034) Data: 0.001 (0.013) Loss: 0.1117 (0.0899)
Epoch: [222][18/24]Time: 0.021 (0.034) Data: 0.001 (0.012) Loss: 0.0889 (0.0898)
Epoch: [222][19/24]Time: 0.021 (0.033) Data: 0.001 (0.012) Loss: 0.0616 (0.0884)
Epoch: [222][20/24]Time: 0.021 (0.032) Data: 0.001 (0.011) Loss: 0.0769 (0.0878)
Epoch: [222][21/24]Time: 0.021 (0.032) Data: 0.001 (0.011) Loss: 0.0781 (0.0874)
Epoch: [222][22/24]Time: 0.021 (0.031) Data: 0.001 (0.010) Loss: 0.1092 (0.0883)
Epoch: [222][23/24]Time: 0.021 (0.031) Data: 0.001 (0.010) Loss: 0.0993 (0.0888)
10-NN,s=0.1: TOP1:  51.25
be

Epoch: [226][11/24]Time: 0.022 (0.042) Data: 0.001 (0.018) Loss: 0.0938 (0.0889)
Epoch: [226][12/24]Time: 0.022 (0.041) Data: 0.001 (0.017) Loss: 0.1319 (0.0922)
Epoch: [226][13/24]Time: 0.022 (0.040) Data: 0.001 (0.016) Loss: 0.0985 (0.0926)
Epoch: [226][14/24]Time: 0.022 (0.038) Data: 0.001 (0.015) Loss: 0.0695 (0.0911)
Epoch: [226][15/24]Time: 0.022 (0.037) Data: 0.001 (0.014) Loss: 0.1326 (0.0937)
Epoch: [226][16/24]Time: 0.023 (0.036) Data: 0.001 (0.013) Loss: 0.0815 (0.0930)
Epoch: [226][17/24]Time: 0.022 (0.036) Data: 0.001 (0.012) Loss: 0.0522 (0.0907)
Epoch: [226][18/24]Time: 0.022 (0.035) Data: 0.001 (0.012) Loss: 0.0843 (0.0904)
Epoch: [226][19/24]Time: 0.022 (0.034) Data: 0.001 (0.011) Loss: 0.0700 (0.0894)
Epoch: [226][20/24]Time: 0.022 (0.034) Data: 0.001 (0.011) Loss: 0.0951 (0.0896)
Epoch: [226][21/24]Time: 0.022 (0.033) Data: 0.001 (0.010) Loss: 0.0767 (0.0890)
Epoch: [226][22/24]Time: 0.022 (0.033) Data: 0.001 (0.010) Loss: 0.0990 (0.0895)
Epoch: [226][23/24]Time: 0.0

Epoch: [230][9/24]Time: 0.026 (0.048) Data: 0.001 (0.025) Loss: 0.1329 (0.0901)
Epoch: [230][10/24]Time: 0.023 (0.045) Data: 0.001 (0.023) Loss: 0.0956 (0.0906)
Epoch: [230][11/24]Time: 0.023 (0.043) Data: 0.001 (0.021) Loss: 0.0806 (0.0898)
Epoch: [230][12/24]Time: 0.023 (0.042) Data: 0.001 (0.019) Loss: 0.1112 (0.0914)
Epoch: [230][13/24]Time: 0.023 (0.040) Data: 0.001 (0.018) Loss: 0.1024 (0.0922)
Epoch: [230][14/24]Time: 0.023 (0.039) Data: 0.001 (0.017) Loss: 0.0780 (0.0912)
Epoch: [230][15/24]Time: 0.023 (0.038) Data: 0.001 (0.016) Loss: 0.0913 (0.0912)
Epoch: [230][16/24]Time: 0.023 (0.037) Data: 0.001 (0.015) Loss: 0.1170 (0.0928)
Epoch: [230][17/24]Time: 0.023 (0.037) Data: 0.001 (0.014) Loss: 0.0801 (0.0921)
Epoch: [230][18/24]Time: 0.023 (0.036) Data: 0.001 (0.013) Loss: 0.0988 (0.0924)
Epoch: [230][19/24]Time: 0.023 (0.035) Data: 0.001 (0.013) Loss: 0.0851 (0.0920)
Epoch: [230][20/24]Time: 0.023 (0.035) Data: 0.001 (0.012) Loss: 0.0888 (0.0919)
Epoch: [230][21/24]Time: 0.02

Epoch: [234][5/24]Time: 0.232 (0.056) Data: 0.203 (0.035) Loss: 0.0599 (0.0615)
Epoch: [234][6/24]Time: 0.034 (0.053) Data: 0.001 (0.030) Loss: 0.1069 (0.0680)
Epoch: [234][7/24]Time: 0.026 (0.050) Data: 0.001 (0.026) Loss: 0.1087 (0.0731)
Epoch: [234][8/24]Time: 0.026 (0.047) Data: 0.001 (0.023) Loss: 0.0696 (0.0727)
Epoch: [234][9/24]Time: 0.021 (0.044) Data: 0.001 (0.021) Loss: 0.0795 (0.0734)
Epoch: [234][10/24]Time: 0.023 (0.042) Data: 0.001 (0.019) Loss: 0.0921 (0.0751)
Epoch: [234][11/24]Time: 0.023 (0.041) Data: 0.001 (0.018) Loss: 0.1497 (0.0813)
Epoch: [234][12/24]Time: 0.023 (0.039) Data: 0.001 (0.016) Loss: 0.1084 (0.0834)
Epoch: [234][13/24]Time: 0.022 (0.038) Data: 0.001 (0.015) Loss: 0.1510 (0.0882)
Epoch: [234][14/24]Time: 0.023 (0.037) Data: 0.001 (0.014) Loss: 0.1178 (0.0902)
Epoch: [234][15/24]Time: 0.022 (0.036) Data: 0.001 (0.013) Loss: 0.1025 (0.0909)
Epoch: [234][16/24]Time: 0.022 (0.035) Data: 0.001 (0.013) Loss: 0.1165 (0.0925)
Epoch: [234][17/24]Time: 0.022 (0

error:  1.13464793116691e-13 step  11
cost:  0.19187087898674204
opt took 0.00min,   11iters
Epoch: [238][6/24]Time: 0.233 (0.051) Data: 0.205 (0.030) Loss: 0.1516 (0.0859)
Epoch: [238][7/24]Time: 0.027 (0.048) Data: 0.001 (0.026) Loss: 0.1136 (0.0894)
Epoch: [238][8/24]Time: 0.026 (0.046) Data: 0.001 (0.023) Loss: 0.1036 (0.0910)
Epoch: [238][9/24]Time: 0.026 (0.044) Data: 0.001 (0.021) Loss: 0.1440 (0.0963)
Epoch: [238][10/24]Time: 0.024 (0.042) Data: 0.001 (0.019) Loss: 0.0831 (0.0951)
Epoch: [238][11/24]Time: 0.023 (0.041) Data: 0.001 (0.018) Loss: 0.1449 (0.0992)
Epoch: [238][12/24]Time: 0.023 (0.039) Data: 0.001 (0.017) Loss: 0.0919 (0.0987)
Epoch: [238][13/24]Time: 0.023 (0.038) Data: 0.001 (0.015) Loss: 0.0871 (0.0978)
Epoch: [238][14/24]Time: 0.023 (0.037) Data: 0.001 (0.014) Loss: 0.0704 (0.0960)
Epoch: [238][15/24]Time: 0.023 (0.036) Data: 0.001 (0.014) Loss: 0.0860 (0.0954)
Epoch: [238][16/24]Time: 0.023 (0.035) Data: 0.001 (0.013) Loss: 0.0805 (0.0945)
Epoch: [238][17/24]T

error:  1.4099832412739488e-14 step  11
cost:  0.1891180709891579
opt took 0.00min,   11iters
Epoch: [242][6/24]Time: 0.242 (0.053) Data: 0.215 (0.031) Loss: 0.2013 (0.1122)
Epoch: [242][7/24]Time: 0.028 (0.049) Data: 0.001 (0.027) Loss: 0.1017 (0.1109)
Epoch: [242][8/24]Time: 0.026 (0.047) Data: 0.001 (0.025) Loss: 0.1982 (0.1206)
Epoch: [242][9/24]Time: 0.026 (0.045) Data: 0.001 (0.022) Loss: 0.1260 (0.1211)
Epoch: [242][10/24]Time: 0.023 (0.043) Data: 0.001 (0.020) Loss: 0.1186 (0.1209)
Epoch: [242][11/24]Time: 0.022 (0.041) Data: 0.001 (0.019) Loss: 0.2207 (0.1292)
Epoch: [242][12/24]Time: 0.022 (0.040) Data: 0.001 (0.017) Loss: 0.1732 (0.1326)
Epoch: [242][13/24]Time: 0.024 (0.038) Data: 0.001 (0.016) Loss: 0.1661 (0.1350)
Epoch: [242][14/24]Time: 0.023 (0.037) Data: 0.001 (0.015) Loss: 0.1461 (0.1357)
Epoch: [242][15/24]Time: 0.023 (0.036) Data: 0.001 (0.014) Loss: 0.0741 (0.1319)
Epoch: [242][16/24]Time: 0.022 (0.036) Data: 0.001 (0.013) Loss: 0.1787 (0.1346)
Epoch: [242][17/24]

error:  1.1324274851176597e-13 step  11
cost:  0.18608468417462468
opt took 0.00min,   11iters
Epoch: [246][6/24]Time: 0.325 (0.065) Data: 0.285 (0.041) Loss: 0.1994 (0.1701)
Epoch: [246][7/24]Time: 0.036 (0.061) Data: 0.001 (0.036) Loss: 0.2135 (0.1756)
Epoch: [246][8/24]Time: 0.038 (0.058) Data: 0.001 (0.032) Loss: 0.1678 (0.1747)
Epoch: [246][9/24]Time: 0.023 (0.055) Data: 0.001 (0.029) Loss: 0.1653 (0.1738)
Epoch: [246][10/24]Time: 0.021 (0.052) Data: 0.001 (0.027) Loss: 0.2094 (0.1770)
Epoch: [246][11/24]Time: 0.021 (0.049) Data: 0.001 (0.025) Loss: 0.1809 (0.1773)
Epoch: [246][12/24]Time: 0.021 (0.047) Data: 0.001 (0.023) Loss: 0.1164 (0.1726)
Epoch: [246][13/24]Time: 0.021 (0.045) Data: 0.001 (0.021) Loss: 0.2271 (0.1765)
Epoch: [246][14/24]Time: 0.022 (0.044) Data: 0.001 (0.020) Loss: 0.1064 (0.1718)
Epoch: [246][15/24]Time: 0.022 (0.042) Data: 0.001 (0.019) Loss: 0.1938 (0.1732)
Epoch: [246][16/24]Time: 0.021 (0.041) Data: 0.001 (0.018) Loss: 0.2359 (0.1769)
Epoch: [246][17/24

error:  1.0902390101819037e-13 step  11
cost:  0.17179509050179023
opt took 0.00min,   11iters
Epoch: [250][7/24]Time: 0.235 (0.049) Data: 0.208 (0.027) Loss: 0.1430 (0.1618)
Epoch: [250][8/24]Time: 0.038 (0.048) Data: 0.001 (0.024) Loss: 0.2376 (0.1703)
Epoch: [250][9/24]Time: 0.036 (0.047) Data: 0.001 (0.022) Loss: 0.2260 (0.1758)
Epoch: [250][10/24]Time: 0.030 (0.045) Data: 0.001 (0.020) Loss: 0.2429 (0.1819)
Epoch: [250][11/24]Time: 0.021 (0.043) Data: 0.001 (0.018) Loss: 0.1621 (0.1803)
Epoch: [250][12/24]Time: 0.021 (0.041) Data: 0.001 (0.017) Loss: 0.1414 (0.1773)
Epoch: [250][13/24]Time: 0.021 (0.040) Data: 0.001 (0.016) Loss: 0.1649 (0.1764)
Epoch: [250][14/24]Time: 0.022 (0.039) Data: 0.001 (0.015) Loss: 0.2632 (0.1822)
Epoch: [250][15/24]Time: 0.022 (0.038) Data: 0.001 (0.014) Loss: 0.1706 (0.1815)
Epoch: [250][16/24]Time: 0.021 (0.037) Data: 0.001 (0.013) Loss: 0.1932 (0.1822)
Epoch: [250][17/24]Time: 0.021 (0.036) Data: 0.001 (0.012) Loss: 0.2269 (0.1846)
Epoch: [250][18/2

error:  2.8055335832277706e-13 step  11
cost:  0.16443029258227795
opt took 0.00min,   11iters
Epoch: [254][8/24]Time: 0.251 (0.049) Data: 0.222 (0.025) Loss: 0.1939 (0.1618)
Epoch: [254][9/24]Time: 0.029 (0.047) Data: 0.001 (0.023) Loss: 0.1758 (0.1632)
Epoch: [254][10/24]Time: 0.027 (0.045) Data: 0.001 (0.021) Loss: 0.2413 (0.1703)
Epoch: [254][11/24]Time: 0.026 (0.043) Data: 0.001 (0.019) Loss: 0.1731 (0.1705)
Epoch: [254][12/24]Time: 0.022 (0.042) Data: 0.001 (0.018) Loss: 0.1901 (0.1720)
Epoch: [254][13/24]Time: 0.023 (0.040) Data: 0.001 (0.017) Loss: 0.1996 (0.1740)
Epoch: [254][14/24]Time: 0.022 (0.039) Data: 0.001 (0.016) Loss: 0.1639 (0.1733)
Epoch: [254][15/24]Time: 0.022 (0.038) Data: 0.001 (0.015) Loss: 0.1745 (0.1734)
Epoch: [254][16/24]Time: 0.022 (0.037) Data: 0.001 (0.014) Loss: 0.1289 (0.1708)
Epoch: [254][17/24]Time: 0.023 (0.036) Data: 0.001 (0.013) Loss: 0.2094 (0.1729)
Epoch: [254][18/24]Time: 0.022 (0.036) Data: 0.001 (0.013) Loss: 0.1758 (0.1731)
Epoch: [254][19/

error:  2.1893598045608087e-13 step  11
cost:  0.1711719762052207
opt took 0.00min,   11iters
Epoch: [258][8/24]Time: 0.249 (0.047) Data: 0.223 (0.026) Loss: 0.1948 (0.1691)
Epoch: [258][9/24]Time: 0.038 (0.046) Data: 0.001 (0.023) Loss: 0.1614 (0.1683)
Epoch: [258][10/24]Time: 0.037 (0.045) Data: 0.001 (0.021) Loss: 0.1523 (0.1669)
Epoch: [258][11/24]Time: 0.029 (0.044) Data: 0.001 (0.019) Loss: 0.1731 (0.1674)
Epoch: [258][12/24]Time: 0.021 (0.042) Data: 0.001 (0.018) Loss: 0.2041 (0.1702)
Epoch: [258][13/24]Time: 0.021 (0.041) Data: 0.001 (0.017) Loss: 0.1239 (0.1669)
Epoch: [258][14/24]Time: 0.021 (0.039) Data: 0.001 (0.016) Loss: 0.1933 (0.1687)
Epoch: [258][15/24]Time: 0.021 (0.038) Data: 0.001 (0.015) Loss: 0.1472 (0.1673)
Epoch: [258][16/24]Time: 0.021 (0.037) Data: 0.001 (0.014) Loss: 0.2256 (0.1707)
Epoch: [258][17/24]Time: 0.021 (0.036) Data: 0.001 (0.013) Loss: 0.1579 (0.1700)
Epoch: [258][18/24]Time: 0.021 (0.035) Data: 0.001 (0.012) Loss: 0.1651 (0.1698)
Epoch: [258][19/2

Epoch: [262][8/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2370 (0.1940)
error:  1.9417800700693988e-13 step  11
cost:  0.16841726355537445
opt took 0.00min,   11iters
Epoch: [262][9/24]Time: 0.283 (0.049) Data: 0.252 (0.026) Loss: 0.1822 (0.1928)
Epoch: [262][10/24]Time: 0.031 (0.047) Data: 0.001 (0.024) Loss: 0.1773 (0.1914)
Epoch: [262][11/24]Time: 0.030 (0.046) Data: 0.001 (0.022) Loss: 0.1578 (0.1886)
Epoch: [262][12/24]Time: 0.027 (0.045) Data: 0.001 (0.020) Loss: 0.1986 (0.1894)
Epoch: [262][13/24]Time: 0.023 (0.043) Data: 0.001 (0.019) Loss: 0.1530 (0.1868)
Epoch: [262][14/24]Time: 0.028 (0.042) Data: 0.001 (0.018) Loss: 0.1906 (0.1870)
Epoch: [262][15/24]Time: 0.028 (0.041) Data: 0.001 (0.017) Loss: 0.1728 (0.1861)
Epoch: [262][16/24]Time: 0.028 (0.040) Data: 0.001 (0.016) Loss: 0.1912 (0.1864)
Epoch: [262][17/24]Time: 0.029 (0.040) Data: 0.001 (0.015) Loss: 0.1586 (0.1849)
Epoch: [262][18/24]Time: 0.028 (0.039) Data: 0.001 (0.014) Loss: 0.1992 (0.1856)
Epoch: [262][19/

Epoch: [266][8/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2296 (0.1714)
Epoch: [266][9/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.1617 (0.1705)
error:  1.8973711490843925e-13 step  11
cost:  0.17421350568279226
opt took 0.00min,   11iters
Epoch: [266][10/24]Time: 0.292 (0.047) Data: 0.263 (0.025) Loss: 0.1903 (0.1723)
Epoch: [266][11/24]Time: 0.027 (0.046) Data: 0.001 (0.023) Loss: 0.1751 (0.1725)
Epoch: [266][12/24]Time: 0.025 (0.044) Data: 0.001 (0.021) Loss: 0.2163 (0.1759)
Epoch: [266][13/24]Time: 0.021 (0.042) Data: 0.001 (0.020) Loss: 0.1725 (0.1756)
Epoch: [266][14/24]Time: 0.025 (0.041) Data: 0.001 (0.018) Loss: 0.1651 (0.1749)
Epoch: [266][15/24]Time: 0.021 (0.040) Data: 0.001 (0.017) Loss: 0.1612 (0.1741)
Epoch: [266][16/24]Time: 0.021 (0.039) Data: 0.001 (0.016) Loss: 0.1614 (0.1733)
Epoch: [266][17/24]Time: 0.021 (0.038) Data: 0.001 (0.015) Loss: 0.1727 (0.1733)
Epoch: [266][18/24]Time: 0.022 (0.037) Data: 0.001 (0.015) Loss: 0.1614 (0.1727)
Epoch: [266][19/

Epoch: [270][8/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1629 (0.1614)
error:  1.6009416015094757e-13 step  11
cost:  0.17106777152098998
opt took 0.00min,   11iters
Epoch: [270][9/24]Time: 0.234 (0.044) Data: 0.211 (0.022) Loss: 0.1630 (0.1616)
Epoch: [270][10/24]Time: 0.021 (0.041) Data: 0.001 (0.020) Loss: 0.1618 (0.1616)
Epoch: [270][11/24]Time: 0.021 (0.040) Data: 0.001 (0.018) Loss: 0.1992 (0.1647)
Epoch: [270][12/24]Time: 0.021 (0.038) Data: 0.001 (0.017) Loss: 0.1548 (0.1640)
Epoch: [270][13/24]Time: 0.023 (0.037) Data: 0.001 (0.016) Loss: 0.2252 (0.1683)
Epoch: [270][14/24]Time: 0.023 (0.036) Data: 0.001 (0.015) Loss: 0.1592 (0.1677)
Epoch: [270][15/24]Time: 0.021 (0.035) Data: 0.001 (0.014) Loss: 0.1525 (0.1668)
Epoch: [270][16/24]Time: 0.021 (0.035) Data: 0.001 (0.013) Loss: 0.2078 (0.1692)
Epoch: [270][17/24]Time: 0.021 (0.034) Data: 0.001 (0.012) Loss: 0.2028 (0.1710)
Epoch: [270][18/24]Time: 0.022 (0.033) Data: 0.001 (0.012) Loss: 0.1825 (0.1716)
Epoch: [270][19/

Epoch: [274][9/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.1704 (0.1756)
error:  1.1812772982011666e-13 step  11
cost:  0.1661581888492219
opt took 0.00min,   11iters
Epoch: [274][10/24]Time: 0.234 (0.041) Data: 0.207 (0.019) Loss: 0.1439 (0.1727)
Epoch: [274][11/24]Time: 0.027 (0.039) Data: 0.001 (0.018) Loss: 0.1892 (0.1741)
Epoch: [274][12/24]Time: 0.026 (0.038) Data: 0.001 (0.017) Loss: 0.1813 (0.1746)
Epoch: [274][13/24]Time: 0.027 (0.038) Data: 0.001 (0.015) Loss: 0.1738 (0.1746)
Epoch: [274][14/24]Time: 0.024 (0.037) Data: 0.001 (0.015) Loss: 0.2204 (0.1776)
Epoch: [274][15/24]Time: 0.023 (0.036) Data: 0.001 (0.014) Loss: 0.1984 (0.1789)
Epoch: [274][16/24]Time: 0.023 (0.035) Data: 0.001 (0.013) Loss: 0.1556 (0.1775)
Epoch: [274][17/24]Time: 0.023 (0.034) Data: 0.001 (0.012) Loss: 0.1715 (0.1772)
Epoch: [274][18/24]Time: 0.023 (0.034) Data: 0.001 (0.012) Loss: 0.1484 (0.1757)
Epoch: [274][19/24]Time: 0.023 (0.033) Data: 0.001 (0.011) Loss: 0.1761 (0.1757)
Epoch: [274][20/

Epoch: [278][9/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.1387 (0.1659)
error:  1.9517720772910252e-13 step  11
cost:  0.17072863062559115
opt took 0.00min,   11iters
Epoch: [278][10/24]Time: 0.272 (0.044) Data: 0.245 (0.023) Loss: 0.1340 (0.1630)
Epoch: [278][11/24]Time: 0.026 (0.042) Data: 0.001 (0.021) Loss: 0.1606 (0.1628)
Epoch: [278][12/24]Time: 0.026 (0.041) Data: 0.001 (0.020) Loss: 0.2080 (0.1663)
Epoch: [278][13/24]Time: 0.026 (0.040) Data: 0.001 (0.018) Loss: 0.2510 (0.1724)
Epoch: [278][14/24]Time: 0.024 (0.039) Data: 0.001 (0.017) Loss: 0.1935 (0.1738)
Epoch: [278][15/24]Time: 0.023 (0.038) Data: 0.001 (0.016) Loss: 0.1516 (0.1724)
Epoch: [278][16/24]Time: 0.023 (0.037) Data: 0.001 (0.015) Loss: 0.1899 (0.1734)
Epoch: [278][17/24]Time: 0.023 (0.036) Data: 0.001 (0.014) Loss: 0.1993 (0.1748)
Epoch: [278][18/24]Time: 0.023 (0.036) Data: 0.001 (0.014) Loss: 0.1350 (0.1728)
Epoch: [278][19/24]Time: 0.024 (0.035) Data: 0.001 (0.013) Loss: 0.2029 (0.1743)
Epoch: [278][20

Epoch: [282][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1509 (0.1574)
Epoch: [282][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1108 (0.1532)
error:  2.1316282072803006e-14 step  11
cost:  0.15971032389352102
opt took 0.00min,   11iters
Epoch: [282][11/24]Time: 0.248 (0.041) Data: 0.221 (0.019) Loss: 0.2079 (0.1577)
Epoch: [282][12/24]Time: 0.029 (0.040) Data: 0.001 (0.018) Loss: 0.1904 (0.1602)
Epoch: [282][13/24]Time: 0.027 (0.039) Data: 0.001 (0.017) Loss: 0.1951 (0.1627)
Epoch: [282][14/24]Time: 0.027 (0.038) Data: 0.001 (0.016) Loss: 0.1698 (0.1632)
Epoch: [282][15/24]Time: 0.023 (0.037) Data: 0.001 (0.015) Loss: 0.1956 (0.1652)
Epoch: [282][16/24]Time: 0.023 (0.036) Data: 0.001 (0.014) Loss: 0.2328 (0.1692)
Epoch: [282][17/24]Time: 0.023 (0.036) Data: 0.001 (0.013) Loss: 0.1367 (0.1674)
Epoch: [282][18/24]Time: 0.023 (0.035) Data: 0.001 (0.012) Loss: 0.1883 (0.1685)
Epoch: [282][19/24]Time: 0.023 (0.034) Data: 0.001 (0.012) Loss: 0.2122 (0.1707)
Epoch: [282][20

Epoch: [286][9/24]Time: 0.027 (0.022) Data: 0.001 (0.001) Loss: 0.1347 (0.1613)
Epoch: [286][10/24]Time: 0.026 (0.023) Data: 0.001 (0.001) Loss: 0.1407 (0.1594)
Epoch: [286][11/24]Time: 0.025 (0.023) Data: 0.001 (0.001) Loss: 0.1910 (0.1620)
error:  1.1124434706744069e-13 step  11
cost:  0.17160224415986994
opt took 0.00min,   11iters
Epoch: [286][12/24]Time: 0.301 (0.044) Data: 0.264 (0.021) Loss: 0.1421 (0.1605)
Epoch: [286][13/24]Time: 0.032 (0.043) Data: 0.001 (0.020) Loss: 0.1647 (0.1608)
Epoch: [286][14/24]Time: 0.035 (0.043) Data: 0.001 (0.018) Loss: 0.1710 (0.1615)
Epoch: [286][15/24]Time: 0.026 (0.042) Data: 0.001 (0.017) Loss: 0.1519 (0.1609)
Epoch: [286][16/24]Time: 0.024 (0.041) Data: 0.001 (0.016) Loss: 0.1722 (0.1616)
Epoch: [286][17/24]Time: 0.024 (0.040) Data: 0.001 (0.016) Loss: 0.1743 (0.1623)
Epoch: [286][18/24]Time: 0.025 (0.039) Data: 0.001 (0.015) Loss: 0.2028 (0.1644)
Epoch: [286][19/24]Time: 0.024 (0.038) Data: 0.001 (0.014) Loss: 0.1837 (0.1654)
Epoch: [286][20

Epoch: [290][9/24]Time: 0.023 (0.022) Data: 0.001 (0.001) Loss: 0.1416 (0.1569)
Epoch: [290][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1981 (0.1606)
Epoch: [290][11/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1818 (0.1624)
error:  1.667554982986985e-13 step  11
cost:  0.1697010509155594
opt took 0.00min,   11iters
Epoch: [290][12/24]Time: 0.239 (0.039) Data: 0.214 (0.017) Loss: 0.1271 (0.1597)
Epoch: [290][13/24]Time: 0.031 (0.038) Data: 0.001 (0.016) Loss: 0.1948 (0.1622)
Epoch: [290][14/24]Time: 0.030 (0.038) Data: 0.001 (0.015) Loss: 0.1988 (0.1646)
Epoch: [290][15/24]Time: 0.031 (0.037) Data: 0.001 (0.014) Loss: 0.1795 (0.1656)
Epoch: [290][16/24]Time: 0.026 (0.036) Data: 0.001 (0.014) Loss: 0.1749 (0.1661)
Epoch: [290][17/24]Time: 0.026 (0.036) Data: 0.001 (0.013) Loss: 0.1694 (0.1663)
Epoch: [290][18/24]Time: 0.024 (0.035) Data: 0.001 (0.012) Loss: 0.2168 (0.1690)
Epoch: [290][19/24]Time: 0.025 (0.035) Data: 0.001 (0.012) Loss: 0.1895 (0.1700)
Epoch: [290][20/2

Epoch: [294][8/24]Time: 0.025 (0.023) Data: 0.001 (0.001) Loss: 0.1580 (0.1660)
Epoch: [294][9/24]Time: 0.024 (0.023) Data: 0.001 (0.001) Loss: 0.1487 (0.1642)
Epoch: [294][10/24]Time: 0.024 (0.023) Data: 0.001 (0.001) Loss: 0.0975 (0.1582)
Epoch: [294][11/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.1416 (0.1568)
error:  3.2573943542502093e-13 step  11
cost:  0.17421613120425655
opt took 0.00min,   11iters
Epoch: [294][12/24]Time: 0.246 (0.040) Data: 0.218 (0.018) Loss: 0.2529 (0.1642)
Epoch: [294][13/24]Time: 0.027 (0.039) Data: 0.001 (0.016) Loss: 0.1943 (0.1663)
Epoch: [294][14/24]Time: 0.027 (0.039) Data: 0.001 (0.015) Loss: 0.1777 (0.1671)
Epoch: [294][15/24]Time: 0.027 (0.038) Data: 0.001 (0.014) Loss: 0.2223 (0.1705)
Epoch: [294][16/24]Time: 0.024 (0.037) Data: 0.001 (0.014) Loss: 0.1801 (0.1711)
Epoch: [294][17/24]Time: 0.022 (0.036) Data: 0.001 (0.013) Loss: 0.1526 (0.1701)
Epoch: [294][18/24]Time: 0.021 (0.035) Data: 0.001 (0.012) Loss: 0.1845 (0.1708)
Epoch: [294][19/

Epoch: [298][9/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.1859 (0.1615)
Epoch: [298][10/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.1375 (0.1594)
Epoch: [298][11/24]Time: 0.026 (0.023) Data: 0.001 (0.001) Loss: 0.2006 (0.1628)
Epoch: [298][12/24]Time: 0.024 (0.023) Data: 0.001 (0.001) Loss: 0.1752 (0.1637)
error:  1.8496315590255108e-13 step  11
cost:  0.16747316376694832
opt took 0.00min,   11iters
Epoch: [298][13/24]Time: 0.249 (0.039) Data: 0.222 (0.017) Loss: 0.2102 (0.1671)
Epoch: [298][14/24]Time: 0.028 (0.038) Data: 0.001 (0.016) Loss: 0.1701 (0.1673)
Epoch: [298][15/24]Time: 0.028 (0.038) Data: 0.001 (0.015) Loss: 0.1835 (0.1683)
Epoch: [298][16/24]Time: 0.027 (0.037) Data: 0.001 (0.014) Loss: 0.1717 (0.1685)
Epoch: [298][17/24]Time: 0.023 (0.036) Data: 0.001 (0.013) Loss: 0.1524 (0.1676)
Epoch: [298][18/24]Time: 0.024 (0.036) Data: 0.001 (0.013) Loss: 0.2363 (0.1712)
Epoch: [298][19/24]Time: 0.023 (0.035) Data: 0.001 (0.012) Loss: 0.1917 (0.1722)
Epoch: [298][20

Epoch: [302][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1747 (0.1697)
Epoch: [302][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2542 (0.1773)
Epoch: [302][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1407 (0.1743)
Epoch: [302][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1454 (0.1721)
error:  3.0753177782116836e-14 step  11
cost:  0.1751244483935277
opt took 0.00min,   11iters
Epoch: [302][13/24]Time: 0.237 (0.037) Data: 0.207 (0.016) Loss: 0.2606 (0.1784)
Epoch: [302][14/24]Time: 0.029 (0.037) Data: 0.001 (0.015) Loss: 0.1529 (0.1767)
Epoch: [302][15/24]Time: 0.027 (0.036) Data: 0.001 (0.014) Loss: 0.1798 (0.1769)
Epoch: [302][16/24]Time: 0.035 (0.036) Data: 0.001 (0.013) Loss: 0.1219 (0.1736)
Epoch: [302][17/24]Time: 0.021 (0.035) Data: 0.001 (0.012) Loss: 0.2119 (0.1758)
Epoch: [302][18/24]Time: 0.021 (0.034) Data: 0.001 (0.012) Loss: 0.1710 (0.1755)
Epoch: [302][19/24]Time: 0.021 (0.034) Data: 0.001 (0.011) Loss: 0.1782 (0.1757)
Epoch: [302][20/

Epoch: [306][8/24]Time: 0.024 (0.025) Data: 0.001 (0.001) Loss: 0.1111 (0.1798)
Epoch: [306][9/24]Time: 0.024 (0.025) Data: 0.001 (0.001) Loss: 0.1320 (0.1750)
Epoch: [306][10/24]Time: 0.024 (0.025) Data: 0.001 (0.001) Loss: 0.1625 (0.1739)
Epoch: [306][11/24]Time: 0.035 (0.025) Data: 0.001 (0.001) Loss: 0.1579 (0.1725)
Epoch: [306][12/24]Time: 0.033 (0.026) Data: 0.001 (0.001) Loss: 0.1222 (0.1687)
Epoch: [306][13/24]Time: 0.031 (0.026) Data: 0.001 (0.001) Loss: 0.1520 (0.1675)
error:  1.1812772982011666e-13 step  11
cost:  0.17495578164421619
opt took 0.00min,   11iters
Epoch: [306][14/24]Time: 0.296 (0.044) Data: 0.265 (0.019) Loss: 0.2004 (0.1697)
Epoch: [306][15/24]Time: 0.032 (0.044) Data: 0.001 (0.018) Loss: 0.1794 (0.1703)
Epoch: [306][16/24]Time: 0.039 (0.043) Data: 0.001 (0.017) Loss: 0.1529 (0.1693)
Epoch: [306][17/24]Time: 0.022 (0.042) Data: 0.001 (0.016) Loss: 0.2008 (0.1710)
Epoch: [306][18/24]Time: 0.024 (0.041) Data: 0.001 (0.015) Loss: 0.1685 (0.1709)
Epoch: [306][19/

Epoch: [310][8/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.1998 (0.1721)
Epoch: [310][9/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1562 (0.1706)
Epoch: [310][10/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1890 (0.1722)
Epoch: [310][11/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1677 (0.1719)
Epoch: [310][12/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1028 (0.1665)
Epoch: [310][13/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1843 (0.1678)
Epoch: [310][14/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.1427 (0.1661)
error:  2.1405099914773018e-13 step  11
cost:  0.16785718453823756
opt took 0.00min,   11iters
Epoch: [310][15/24]Time: 0.276 (0.038) Data: 0.245 (0.016) Loss: 0.2312 (0.1702)
Epoch: [310][16/24]Time: 0.032 (0.038) Data: 0.001 (0.015) Loss: 0.2048 (0.1722)
Epoch: [310][17/24]Time: 0.032 (0.038) Data: 0.001 (0.014) Loss: 0.1653 (0.1719)
Epoch: [310][18/24]Time: 0.028 (0.037) Data: 0.001 (0.014) Loss: 0.1652 (0.1715)
Epoch: [310][19/

Epoch: [314][9/24]Time: 0.023 (0.022) Data: 0.001 (0.001) Loss: 0.1439 (0.1674)
Epoch: [314][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1583 (0.1666)
Epoch: [314][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1207 (0.1628)
Epoch: [314][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1787 (0.1640)
Epoch: [314][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2264 (0.1685)
Epoch: [314][14/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.1751 (0.1689)
error:  5.517808432387028e-14 step  11
cost:  0.16480635038391278
opt took 0.00min,   11iters
Epoch: [314][15/24]Time: 0.254 (0.036) Data: 0.227 (0.015) Loss: 0.1659 (0.1687)
Epoch: [314][16/24]Time: 0.025 (0.035) Data: 0.001 (0.014) Loss: 0.1866 (0.1698)
Epoch: [314][17/24]Time: 0.026 (0.035) Data: 0.001 (0.013) Loss: 0.2161 (0.1723)
Epoch: [314][18/24]Time: 0.026 (0.034) Data: 0.001 (0.013) Loss: 0.1589 (0.1716)
Epoch: [314][19/24]Time: 0.024 (0.034) Data: 0.001 (0.012) Loss: 0.1424 (0.1702)
Epoch: [314][20/

Epoch: [318][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1867 (0.1696)
Epoch: [318][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.1530 (0.1681)
Epoch: [318][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1340 (0.1652)
Epoch: [318][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1984 (0.1678)
Epoch: [318][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1659 (0.1676)
Epoch: [318][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1647 (0.1675)
Epoch: [318][15/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1265 (0.1649)
error:  5.551115123125783e-14 step  11
cost:  0.18049879883258596
opt took 0.00min,   11iters
Epoch: [318][16/24]Time: 0.244 (0.035) Data: 0.217 (0.014) Loss: 0.2465 (0.1697)
Epoch: [318][17/24]Time: 0.027 (0.034) Data: 0.001 (0.013) Loss: 0.1807 (0.1703)
Epoch: [318][18/24]Time: 0.026 (0.034) Data: 0.001 (0.012) Loss: 0.1253 (0.1679)
Epoch: [318][19/24]Time: 0.026 (0.034) Data: 0.001 (0.012) Loss: 0.1800 (0.1685)
Epoch: [318][20/

Epoch: [322][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2575 (0.2213)
Epoch: [322][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1762 (0.2172)
Epoch: [322][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2975 (0.2239)
Epoch: [322][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1726 (0.2200)
Epoch: [322][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1603 (0.2157)
Epoch: [322][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1608 (0.2121)
Epoch: [322][15/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.2437 (0.2140)
error:  5.4511950509095186e-14 step  11
cost:  0.16997229784239765
opt took 0.00min,   11iters
Epoch: [322][16/24]Time: 0.212 (0.033) Data: 0.190 (0.012) Loss: 0.2499 (0.2161)
Epoch: [322][17/24]Time: 0.026 (0.032) Data: 0.001 (0.011) Loss: 0.2802 (0.2197)
Epoch: [322][18/24]Time: 0.026 (0.032) Data: 0.001 (0.011) Loss: 0.2298 (0.2202)
Epoch: [322][19/24]Time: 0.026 (0.032) Data: 0.001 (0.010) Loss: 0.2335 (0.2209)
Epoch: [322][20

Epoch: [326][9/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2785 (0.2929)
Epoch: [326][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3941 (0.3021)
Epoch: [326][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3133 (0.3030)
Epoch: [326][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3096 (0.3036)
Epoch: [326][13/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.3313 (0.3055)
Epoch: [326][14/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.2662 (0.3029)
Epoch: [326][15/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.3294 (0.3046)
error:  2.3070434451710753e-13 step  11
cost:  0.1709291051372952
opt took 0.00min,   11iters
Epoch: [326][16/24]Time: 0.232 (0.034) Data: 0.207 (0.013) Loss: 0.2716 (0.3026)
Epoch: [326][17/24]Time: 0.026 (0.033) Data: 0.001 (0.012) Loss: 0.2541 (0.2999)
Epoch: [326][18/24]Time: 0.026 (0.033) Data: 0.001 (0.012) Loss: 0.2873 (0.2993)
Epoch: [326][19/24]Time: 0.024 (0.033) Data: 0.001 (0.011) Loss: 0.2345 (0.2960)
Epoch: [326][20/

Epoch: [330][8/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2983 (0.3370)
Epoch: [330][9/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2778 (0.3311)
Epoch: [330][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2644 (0.3250)
Epoch: [330][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3126 (0.3240)
Epoch: [330][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2881 (0.3212)
Epoch: [330][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3040 (0.3200)
Epoch: [330][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2919 (0.3181)
Epoch: [330][15/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2867 (0.3162)
Epoch: [330][16/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1970 (0.3091)
error:  8.426592756904938e-14 step  11
cost:  0.1504410250625573
opt took 0.00min,   11iters
Epoch: [330][17/24]Time: 0.247 (0.034) Data: 0.210 (0.012) Loss: 0.3412 (0.3109)
Epoch: [330][18/24]Time: 0.027 (0.034) Data: 0.001 (0.012) Loss: 0.3154 (0.3112)
Epoch: [330][19/24

Epoch: [334][8/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3467 (0.3313)
Epoch: [334][9/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3288 (0.3310)
Epoch: [334][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2483 (0.3235)
Epoch: [334][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2865 (0.3204)
Epoch: [334][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2432 (0.3145)
Epoch: [334][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2196 (0.3077)
Epoch: [334][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3630 (0.3114)
Epoch: [334][15/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3437 (0.3134)
Epoch: [334][16/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3616 (0.3162)
error:  1.0902390101819037e-13 step  11
cost:  0.15449638389404355
opt took 0.00min,   11iters
Epoch: [334][17/24]Time: 0.310 (0.038) Data: 0.278 (0.016) Loss: 0.3120 (0.3160)
Epoch: [334][18/24]Time: 0.032 (0.037) Data: 0.001 (0.015) Loss: 0.2576 (0.3129)
Epoch: [334][19/

Epoch: [338][9/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.3393 (0.2889)
Epoch: [338][10/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.3025 (0.2901)
Epoch: [338][11/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.2717 (0.2886)
Epoch: [338][12/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.3184 (0.2909)
Epoch: [338][13/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.3035 (0.2918)
Epoch: [338][14/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.2985 (0.2922)
Epoch: [338][15/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.2789 (0.2914)
Epoch: [338][16/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.3072 (0.2923)
Epoch: [338][17/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.4122 (0.2990)
error:  3.2529534621517087e-14 step  11
cost:  0.15790075909782703
opt took 0.00min,   11iters
Epoch: [338][18/24]Time: 0.270 (0.034) Data: 0.240 (0.013) Loss: 0.2269 (0.2952)
Epoch: [338][19/24]Time: 0.030 (0.034) Data: 0.001 (0.013) Loss: 0.3464 (0.2977)
Epoch: [338][20

Epoch: [342][8/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.3742 (0.3130)
Epoch: [342][9/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3567 (0.3174)
Epoch: [342][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2317 (0.3096)
Epoch: [342][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.1814 (0.2989)
Epoch: [342][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3146 (0.3001)
Epoch: [342][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2981 (0.3000)
Epoch: [342][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2464 (0.2964)
Epoch: [342][15/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2865 (0.2958)
Epoch: [342][16/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3351 (0.2981)
Epoch: [342][17/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3748 (0.3023)
Epoch: [342][18/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3027 (0.3024)
error:  2.6645352591003757e-13 step  11
cost:  0.15262583482419032
opt took 0.00min,   11iters
Epoch: [342][19/

Epoch: [346][8/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2492 (0.2893)
Epoch: [346][9/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3664 (0.2970)
Epoch: [346][10/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2792 (0.2954)
Epoch: [346][11/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2273 (0.2897)
Epoch: [346][12/24]Time: 0.024 (0.023) Data: 0.001 (0.001) Loss: 0.2294 (0.2851)
Epoch: [346][13/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3822 (0.2920)
Epoch: [346][14/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3220 (0.2940)
Epoch: [346][15/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3344 (0.2966)
Epoch: [346][16/24]Time: 0.025 (0.023) Data: 0.001 (0.001) Loss: 0.3651 (0.3006)
Epoch: [346][17/24]Time: 0.026 (0.023) Data: 0.001 (0.001) Loss: 0.2635 (0.2985)
Epoch: [346][18/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.1858 (0.2926)
error:  2.8754776337791554e-14 step  11
cost:  0.15514769703290118
opt took 0.00min,   11iters
Epoch: [346][19/

Epoch: [350][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.3231 (0.3011)
Epoch: [350][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2383 (0.2954)
Epoch: [350][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2776 (0.2939)
Epoch: [350][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2857 (0.2933)
Epoch: [350][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2318 (0.2889)
Epoch: [350][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2862 (0.2887)
Epoch: [350][15/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2851 (0.2885)
Epoch: [350][16/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2492 (0.2862)
Epoch: [350][17/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.4293 (0.2941)
Epoch: [350][18/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3257 (0.2958)
error:  5.972999872483342e-14 step  11
cost:  0.15082021114353683
opt took 0.00min,   11iters
Epoch: [350][19/24]Time: 0.235 (0.032) Data: 0.206 (0.011) Loss: 0.4212 (0.3021)
Epoch: [350][20/

Epoch: [354][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.3287 (0.3038)
Epoch: [354][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3603 (0.3090)
Epoch: [354][11/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.4273 (0.3188)
Epoch: [354][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3097 (0.3181)
Epoch: [354][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2941 (0.3164)
Epoch: [354][14/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2108 (0.3094)
Epoch: [354][15/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.3106 (0.3094)
Epoch: [354][16/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2796 (0.3077)
Epoch: [354][17/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2794 (0.3061)
Epoch: [354][18/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2923 (0.3054)
Epoch: [354][19/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2928 (0.3048)
error:  9.769962616701378e-14 step  11
cost:  0.15323047430616715
opt took 0.00min,   11iters
Epoch: [354][20/

Epoch: [358][8/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.3687 (0.3339)
Epoch: [358][9/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.3853 (0.3390)
Epoch: [358][10/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.3318 (0.3384)
Epoch: [358][11/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.2474 (0.3308)
Epoch: [358][12/24]Time: 0.023 (0.024) Data: 0.001 (0.001) Loss: 0.3142 (0.3295)
Epoch: [358][13/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3245 (0.3292)
Epoch: [358][14/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3131 (0.3281)
Epoch: [358][15/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2889 (0.3256)
Epoch: [358][16/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2266 (0.3198)
Epoch: [358][17/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.1917 (0.3127)
Epoch: [358][18/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2734 (0.3106)
Epoch: [358][19/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2574 (0.3080)
error:  1.1024514634527804e-13

Epoch: [362][8/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2842 (0.2966)
Epoch: [362][9/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2645 (0.2934)
Epoch: [362][10/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.3072 (0.2946)
Epoch: [362][11/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2664 (0.2923)
Epoch: [362][12/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.3327 (0.2954)
Epoch: [362][13/24]Time: 0.027 (0.023) Data: 0.001 (0.001) Loss: 0.2973 (0.2955)
Epoch: [362][14/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.3008 (0.2959)
Epoch: [362][15/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2764 (0.2947)
Epoch: [362][16/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2874 (0.2942)
Epoch: [362][17/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2974 (0.2944)
Epoch: [362][18/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.3682 (0.2983)
Epoch: [362][19/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.3157 (0.2992)
Epoch: [362][20/24]Time: 0.022

Epoch: [366][8/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2849 (0.3035)
Epoch: [366][9/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3099 (0.3041)
Epoch: [366][10/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2342 (0.2977)
Epoch: [366][11/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.4033 (0.3065)
Epoch: [366][12/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.3146 (0.3072)
Epoch: [366][13/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2263 (0.3014)
Epoch: [366][14/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2584 (0.2985)
Epoch: [366][15/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2527 (0.2957)
Epoch: [366][16/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.2561 (0.2933)
Epoch: [366][17/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.2884 (0.2931)
Epoch: [366][18/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2849 (0.2926)
Epoch: [366][19/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.3438 (0.2952)
Epoch: [366][20/24]Time: 0.022

Epoch: [370][8/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3244 (0.2904)
Epoch: [370][9/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2936 (0.2907)
Epoch: [370][10/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2971 (0.2913)
Epoch: [370][11/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.4454 (0.3041)
Epoch: [370][12/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.2536 (0.3002)
Epoch: [370][13/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.3237 (0.3019)
Epoch: [370][14/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.3469 (0.3049)
Epoch: [370][15/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2433 (0.3010)
Epoch: [370][16/24]Time: 0.029 (0.023) Data: 0.001 (0.001) Loss: 0.3377 (0.3032)
Epoch: [370][17/24]Time: 0.028 (0.023) Data: 0.001 (0.001) Loss: 0.2372 (0.2995)
Epoch: [370][18/24]Time: 0.027 (0.024) Data: 0.001 (0.001) Loss: 0.2087 (0.2948)
Epoch: [370][19/24]Time: 0.021 (0.023) Data: 0.001 (0.001) Loss: 0.2977 (0.2949)
Epoch: [370][20/24]Time: 0.021

Epoch: [374][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2476 (0.3059)
Epoch: [374][10/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3076 (0.3060)
Epoch: [374][11/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2417 (0.3007)
Epoch: [374][12/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2926 (0.3001)
Epoch: [374][13/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2923 (0.2995)
Epoch: [374][14/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.2891 (0.2988)
Epoch: [374][15/24]Time: 0.021 (0.022) Data: 0.001 (0.001) Loss: 0.3447 (0.3017)
Epoch: [374][16/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.2933 (0.3012)
Epoch: [374][17/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.2954 (0.3009)
Epoch: [374][18/24]Time: 0.021 (0.021) Data: 0.001 (0.001) Loss: 0.2580 (0.2986)
Epoch: [374][19/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2913 (0.2982)
Epoch: [374][20/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.3010 (0.2984)
Epoch: [374][21/24]Time: 0.02

Epoch: [378][8/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2144 (0.3064)
Epoch: [378][9/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2699 (0.3028)
Epoch: [378][10/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2186 (0.2951)
Epoch: [378][11/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2545 (0.2917)
Epoch: [378][12/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2322 (0.2872)
Epoch: [378][13/24]Time: 0.023 (0.022) Data: 0.001 (0.001) Loss: 0.4361 (0.2978)
Epoch: [378][14/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2308 (0.2933)
Epoch: [378][15/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.2444 (0.2903)
Epoch: [378][16/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.3241 (0.2923)
Epoch: [378][17/24]Time: 0.023 (0.022) Data: 0.001 (0.001) Loss: 0.3184 (0.2937)
Epoch: [378][18/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.3357 (0.2959)
Epoch: [378][19/24]Time: 0.022 (0.022) Data: 0.001 (0.001) Loss: 0.3513 (0.2987)
Epoch: [378][20/24]Time: 0.022

Epoch: [382][7/24]Time: 0.022 (0.051) Data: 0.001 (0.027) Loss: 0.3175 (0.3168)
Epoch: [382][8/24]Time: 0.023 (0.048) Data: 0.001 (0.024) Loss: 0.3683 (0.3225)
Epoch: [382][9/24]Time: 0.023 (0.045) Data: 0.001 (0.022) Loss: 0.2457 (0.3149)
Epoch: [382][10/24]Time: 0.022 (0.043) Data: 0.001 (0.020) Loss: 0.3050 (0.3140)
Epoch: [382][11/24]Time: 0.022 (0.042) Data: 0.001 (0.019) Loss: 0.3684 (0.3185)
Epoch: [382][12/24]Time: 0.022 (0.040) Data: 0.001 (0.017) Loss: 0.3545 (0.3213)
Epoch: [382][13/24]Time: 0.022 (0.039) Data: 0.001 (0.016) Loss: 0.2654 (0.3173)
Epoch: [382][14/24]Time: 0.023 (0.038) Data: 0.001 (0.015) Loss: 0.2802 (0.3148)
Epoch: [382][15/24]Time: 0.022 (0.037) Data: 0.001 (0.014) Loss: 0.2614 (0.3115)
Epoch: [382][16/24]Time: 0.022 (0.036) Data: 0.001 (0.013) Loss: 0.2626 (0.3086)
Epoch: [382][17/24]Time: 0.022 (0.035) Data: 0.001 (0.013) Loss: 0.2249 (0.3039)
Epoch: [382][18/24]Time: 0.036 (0.035) Data: 0.001 (0.012) Loss: 0.2482 (0.3010)
Epoch: [382][19/24]Time: 0.030 

Epoch: [386][8/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2998 (0.3154)
Epoch: [386][9/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2506 (0.3090)
Epoch: [386][10/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2232 (0.3012)
Epoch: [386][11/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3035 (0.3014)
Epoch: [386][12/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2136 (0.2946)
Epoch: [386][13/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2921 (0.2944)
Epoch: [386][14/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.3072 (0.2953)
Epoch: [386][15/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2939 (0.2952)
Epoch: [386][16/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2940 (0.2951)
Epoch: [386][17/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.2901 (0.2948)
Epoch: [386][18/24]Time: 0.023 (0.023) Data: 0.001 (0.001) Loss: 0.1773 (0.2887)
Epoch: [386][19/24]Time: 0.022 (0.023) Data: 0.001 (0.001) Loss: 0.2273 (0.2856)
Epoch: [386][20/24]Time: 0.022

Epoch: [390][7/24]Time: 0.023 (0.053) Data: 0.001 (0.029) Loss: 0.2724 (0.2985)
Epoch: [390][8/24]Time: 0.023 (0.049) Data: 0.001 (0.026) Loss: 0.2712 (0.2955)
Epoch: [390][9/24]Time: 0.023 (0.047) Data: 0.001 (0.023) Loss: 0.2518 (0.2911)
Epoch: [390][10/24]Time: 0.022 (0.045) Data: 0.001 (0.021) Loss: 0.2989 (0.2918)
Epoch: [390][11/24]Time: 0.022 (0.043) Data: 0.001 (0.020) Loss: 0.3011 (0.2926)
Epoch: [390][12/24]Time: 0.022 (0.041) Data: 0.001 (0.018) Loss: 0.2976 (0.2930)
Epoch: [390][13/24]Time: 0.023 (0.040) Data: 0.001 (0.017) Loss: 0.2764 (0.2918)
Epoch: [390][14/24]Time: 0.023 (0.039) Data: 0.001 (0.016) Loss: 0.2947 (0.2920)
Epoch: [390][15/24]Time: 0.022 (0.038) Data: 0.001 (0.015) Loss: 0.2104 (0.2869)
Epoch: [390][16/24]Time: 0.022 (0.037) Data: 0.001 (0.014) Loss: 0.2877 (0.2869)
Epoch: [390][17/24]Time: 0.022 (0.036) Data: 0.001 (0.013) Loss: 0.2101 (0.2827)
Epoch: [390][18/24]Time: 0.022 (0.035) Data: 0.001 (0.013) Loss: 0.3005 (0.2836)
Epoch: [390][19/24]Time: 0.022 

Epoch: [394][6/24]Time: 0.023 (0.062) Data: 0.001 (0.035) Loss: 0.2697 (0.2883)
Epoch: [394][7/24]Time: 0.023 (0.057) Data: 0.001 (0.030) Loss: 0.2262 (0.2805)
Epoch: [394][8/24]Time: 0.023 (0.053) Data: 0.001 (0.027) Loss: 0.2600 (0.2782)
Epoch: [394][9/24]Time: 0.024 (0.050) Data: 0.001 (0.024) Loss: 0.2756 (0.2780)
Epoch: [394][10/24]Time: 0.023 (0.048) Data: 0.001 (0.022) Loss: 0.2594 (0.2763)
Epoch: [394][11/24]Time: 0.023 (0.046) Data: 0.001 (0.021) Loss: 0.2506 (0.2742)
Epoch: [394][12/24]Time: 0.023 (0.044) Data: 0.001 (0.019) Loss: 0.2678 (0.2737)
Epoch: [394][13/24]Time: 0.023 (0.043) Data: 0.001 (0.018) Loss: 0.2076 (0.2689)
Epoch: [394][14/24]Time: 0.023 (0.041) Data: 0.001 (0.017) Loss: 0.2646 (0.2687)
Epoch: [394][15/24]Time: 0.023 (0.040) Data: 0.001 (0.016) Loss: 0.2668 (0.2685)
Epoch: [394][16/24]Time: 0.023 (0.039) Data: 0.001 (0.015) Loss: 0.4003 (0.2763)
Epoch: [394][17/24]Time: 0.023 (0.038) Data: 0.001 (0.014) Loss: 0.3228 (0.2789)
Epoch: [394][18/24]Time: 0.023 (

Epoch: [398][1/24]Time: 0.298 (0.160) Data: 0.270 (0.136) Loss: 0.2568 (0.2400)
Epoch: [398][2/24]Time: 0.026 (0.115) Data: 0.001 (0.091) Loss: 0.3161 (0.2654)
Epoch: [398][3/24]Time: 0.028 (0.094) Data: 0.001 (0.069) Loss: 0.3068 (0.2758)
Epoch: [398][4/24]Time: 0.047 (0.084) Data: 0.005 (0.056) Loss: 0.2641 (0.2734)
Epoch: [398][5/24]Time: 0.021 (0.074) Data: 0.001 (0.047) Loss: 0.3460 (0.2855)
Epoch: [398][6/24]Time: 0.021 (0.066) Data: 0.001 (0.040) Loss: 0.2210 (0.2763)
Epoch: [398][7/24]Time: 0.021 (0.061) Data: 0.001 (0.035) Loss: 0.3795 (0.2892)
Epoch: [398][8/24]Time: 0.022 (0.056) Data: 0.001 (0.031) Loss: 0.3338 (0.2942)
Epoch: [398][9/24]Time: 0.021 (0.053) Data: 0.001 (0.028) Loss: 0.3093 (0.2957)
Epoch: [398][10/24]Time: 0.021 (0.050) Data: 0.001 (0.026) Loss: 0.2509 (0.2916)
Epoch: [398][11/24]Time: 0.021 (0.048) Data: 0.001 (0.024) Loss: 0.3715 (0.2983)
Epoch: [398][12/24]Time: 0.021 (0.046) Data: 0.001 (0.022) Loss: 0.2357 (0.2935)
Epoch: [398][13/24]Time: 0.021 (0.044

In [23]:
print (end-start)

621.6017904281616
