In [11]:
%matplotlib notebook
import cvxpy as cp
import dccp
import torch
import numpy as np
from cvxpylayers.torch import CvxpyLayer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn.metrics import zero_one_loss, confusion_matrix
from scipy.io import arff
import pandas as pd
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.utils import shuffle
import matplotlib.patches as mpatches
import json
import random
import math
import os, psutil
from datetime import datetime

torch.set_default_dtype(torch.float64)
torch.manual_seed(0)
np.random.seed(0)

TRAIN_SLOPE = 1
EVAL_SLOPE = 5
X_LOWER_BOUND = -10
X_UPPER_BOUND = 10
SEED = 0

# Utils

In [12]:
def split_data(X, Y, percentage):
    num_val = int(len(X)*percentage)
    return X[num_val:], Y[num_val:], X[:num_val], Y[:num_val]

def shuffle(X, Y):
    torch.manual_seed(0)
    np.random.seed(0)
    data = torch.cat((Y, X), 1)
    data = data[torch.randperm(data.size()[0])]
    X = data[:, 1:]
    Y = data[:, 0]
    return X, Y

def conf_mat(Y1, Y2):
    num_of_samples = len(Y1)
    mat = confusion_matrix(Y1, Y2, labels=[-1, 1])*100/num_of_samples
    acc = np.trace(mat)
    return mat, acc

def calc_accuracy(Y, Ypred):
    num = len(Y)
    temp = Y - Ypred
    acc = len(temp[temp == 0])*1./num
    return acc

# CCP classes

In [13]:
class CCP:
    def __init__(self, x_dim, batch_size, funcs, scale):
        self.f_derivative = funcs["f_derivative"]
        self.g = funcs["g"]
        self.c = funcs["c"]
        self.batch_size = batch_size
        
        self.x = cp.Variable((batch_size, x_dim))
        self.xt = cp.Parameter((batch_size, x_dim))
        self.r = cp.Parameter((batch_size, x_dim))
        self.w = cp.Parameter(x_dim)
        self.b = cp.Parameter(1)
        self.slope = cp.Parameter(1)

        target = cp.diag(self.x@(self.f_derivative(self.xt, self.w, self.b, self.slope).T))-self.g(self.x, self.w, self.b, self.slope)-self.c(self.x, self.r, x_dim, scale)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        self.prob = cp.Problem(cp.Maximize(cp.sum(target)), constraints)
        
    def ccp(self, r):
        """
        numpy to numpy
        """
        self.xt.value = r
        self.r.value = r
        result = self.prob.solve()
        diff = np.linalg.norm(self.xt.value - self.x.value)
        cnt = 0
        while diff > 0.001 and cnt < 100:
            cnt += 1
            self.xt.value = self.x.value
            result = self.prob.solve()
            diff = np.linalg.norm(self.x.value - self.xt.value)/self.batch_size
        return self.x.value
    
    def optimize_X(self, X, w, b, slope):
        """
        tensor to tensor
        """
        w = w.detach().numpy()
        b = b.detach().numpy()
        slope = np.full(1, slope)
        X = X.numpy()
        
        self.w.value = w
        self.b.value = b
        self.slope.value = slope
        return torch.from_numpy(self.ccp(X))

In [14]:
class DELTA():
    
    def __init__(self, x_dim, funcs, scale):
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.r = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.w = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.b = cp.Parameter(1, value = np.random.randn(1))
        self.f_der = cp.Parameter(x_dim, value = np.random.randn(x_dim))

        target = self.x@self.f_der-self.g(self.x, self.w, self.b, TRAIN_SLOPE)-self.c(self.x, self.r, x_dim, scale)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        objective = cp.Maximize(target)
        problem = cp.Problem(objective, constraints)
        self.layer = CvxpyLayer(problem, parameters=[self.r, self.w, self.b, self.f_der],
                                variables=[self.x])
        
    def optimize_X(self, X, w, b, F_DER):
        return self.layer(X, w, b, F_DER)[0]

# Gain & Cost functions

In [15]:
def score(x, w, b):
    return x@w + b

def f(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) + 1)]), 2)

def g(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) - 1)]), 2)

def c(x, r, x_dim, scale):
    return (scale)*cp.sum_squares(x-r)

def f_derivative(x, w, b, slope):
    return 0.5*cp.multiply(slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1)), w)
    
def f_batch(x, w, b, slope):
    return 0.5*cp.norm(cp.vstack([np.ones(x.shape[0]), (slope*score(x, w, b) + 1)]), 2, axis=0)

def g_batch(x, w, b, slope):
    return 0.5*cp.norm(cp.vstack([np.ones((1, x.shape[0])), cp.reshape((slope*score(x, w, b) - 1), (1, x.shape[0]))]), 2, axis=0)

def c_batch(x, r, x_dim, scale):
    return (scale)*cp.square(cp.norm(x-r, 2, axis=1))

def f_derivative_batch(x, w, b, slope):
    nablas = 0.5*slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1))
    return cp.reshape(nablas, (nablas.shape[0], 1))@cp.reshape(w, (1, x.shape[1]))

# Model

In [16]:
class MyStrategicModel(torch.nn.Module):
    def __init__(self, x_dim, batch_size, funcs, funcs_batch, train_slope, eval_slope, scale, strategic=False):
        torch.manual_seed(0)
        np.random.seed(0)

        super(MyStrategicModel, self).__init__()
        self.x_dim = x_dim
        self.batch_size = batch_size
        self.train_slope, self.eval_slope = train_slope, eval_slope
        self.w = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(x_dim, dtype=torch.float64, requires_grad=True)))
        self.b = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(1, dtype=torch.float64, requires_grad=True)))
        self.strategic = strategic
        self.ccp = CCP(x_dim, batch_size, funcs_batch, scale)
        self.delta = DELTA(x_dim, funcs, scale)
        self.ccp_time = 0
        self.total_time = 0

    def forward(self, X, evaluation=False):
        if self.strategic:
            if evaluation:
                t1 = time.time()
                XT = self.ccp.optimize_X(X, self.w, self.b, self.eval_slope)
                self.ccp_time += time.time()-t1
                X_opt = XT
            else:
                t1 = time.time()
                XT = self.ccp.optimize_X(X, self.w, self.b, self.train_slope)
                self.ccp_time += time.time()-t1
                F_DER = self.get_f_ders(XT, self.train_slope)
                X_opt = self.delta.optimize_X(X, self.w, self.b, F_DER) # Xopt should be equal to XT but we do it again for the gradients
            output = self.score(X_opt)
        else:
            output = self.score(X)        
        return output
    
    def optimize_X(self, X, evaluation=False):
        slope = self.eval_slope if evaluation else self.train_slope
        return self.ccp.optimize_X(X, self.w, self.b, slope)
    
    def normalize_weights(self):
        with torch.no_grad():
            norm = torch.sqrt(torch.sum(self.w**2) + self.b**2)
            self.w /= norm
            self.b /= norm

    def score(self, x):
        return x@self.w + self.b
    
    def get_f_ders(self, XT, slope):
        nablas = 0.5*slope*((slope*self.score(XT) + 1)/torch.sqrt((slope*self.score(XT) + 1)**2 + 1))
        return torch.reshape(nablas, (len(nablas), 1))@torch.reshape(self.w, (1, len(self.w)))

    def calc_accuracy(self, Y, Y_pred):
        Y_pred = torch.sign(Y_pred)
        num = len(Y)
        temp = Y - Y_pred
        acc = len(temp[temp == 0])*1./num        
        return acc
    
    def evaluate(self, X, Y):      
        return self.calc_accuracy(Y, self.forward(X, evaluation=True))
    
    def loss(self, Y, Y_pred):
        return torch.mean(torch.clamp(1 - Y_pred * Y, min=0))
    
    def save_model(self, train_errors, val_errors, train_losses, val_losses, info, path, comment=None):
        if comment is not None:
            path += "/" + comment
            
        filename = path + "/model.pt"
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
        torch.save(self.state_dict(), filename)
                
        pd.DataFrame(np.array(train_errors)).to_csv(path + '/train_errors.csv')
        pd.DataFrame(np.array(val_errors)).to_csv(path + '/val_errors.csv')
        pd.DataFrame(np.array(train_losses)).to_csv(path + '/train_losses.csv')
        pd.DataFrame(np.array(val_losses)).to_csv(path + '/val_losses.csv')
        
        with open(path + "/info.txt", "w") as f:
            f.write(info)
    
    def load_model(self, filename):
        self.load_state_dict(torch.load(filename))
        self.eval()
    
    def fit(self, path, X, Y, Xval, Yval, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False, callback=None, comment=None):
        train_dset = TensorDataset(X, Y)
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        test_dset = TensorDataset(Xval, Yval)
        test_loader = DataLoader(test_dset, batch_size=batch_size, shuffle=True)
        
        opt = opt(self.parameters(), **opt_kwargs)

        train_losses = []
        val_losses = []
        train_errors = []
        val_errors = []
        
        best_val_error = 1
        consecutive_no_improvement = 0

        total_time = time.time()
        for epoch in range(epochs):
            t1 = time.time()
            batch = 1
            train_losses.append([])
            train_errors.append([])
            for Xbatch, Ybatch in train_loader:
#                 try:
                opt.zero_grad()
                Ybatch_pred = self.forward(Xbatch)
                l = self.loss(Ybatch, Ybatch_pred)
                l.backward()
                opt.step()
                train_losses[-1].append(l.item())
                with torch.no_grad():
                    e = self.calc_accuracy(Ybatch, Ybatch_pred)
                    train_errors[-1].append(1-e)
                if verbose:
                    print("batch %03d / %03d | loss: %3.5f | err: %3.5f" %
                          (batch, len(train_loader), np.mean(train_losses[-1]), np.mean(train_errors[-1])))
                batch += 1
                if callback is not None:
                    callback()
#                 except:
#                     print("failed")
                
            with torch.no_grad():
                total_loss = 0
                total_error = 0
                batch = 0
                for Xbatch, Ybatch in test_loader:
#                     try:
                    Yval_pred = self.forward(Xbatch, evaluation=True)
                    val_loss = self.loss(Ybatch, Yval_pred).item()
                    total_loss += val_loss
                    val_error = 1-self.calc_accuracy(Ybatch, Yval_pred)
                    total_error += val_error
                    batch += 1
#                     except:
#                         print("failed")
                        
                avg_loss = total_loss/batch
                avg_error = total_error/batch
                val_losses.append(avg_loss)
                val_errors.append(avg_error)
                if avg_error < best_val_error:
                        consecutive_no_improvement = 0
                        best_val_error = avg_error
                        info = "training time in seconds: {}\nepoch: {}\nbatch size: {}\ntrain slope: {}\neval slope: {}\nlearning rate: {}\nvalidation loss: {}\nvalidation error: {}\n".format(
                        time.time()-total_time, epoch, batch_size, self.train_slope, self.eval_slope, opt_kwargs["lr"], avg_loss, avg_error)
                        self.save_model(train_errors, val_errors, train_losses, val_losses, info, path, comment)
                        print("model saved!")

                else:
                    consecutive_no_improvement += 1
                    if consecutive_no_improvement >= 4:
                        break
                    
            t2 = time.time()
            if verbose:
                print("------------- epoch %03d / %03d | time: %03d sec | loss: %3.5f | err: %3.5f" % (epoch + 1, epochs, t2-t1, val_losses[-1], val_errors[-1]))
        
        self.total_time = time.time()-total_time
        print("training time: {} seconds".format(self.total_time)) 
        return train_errors, val_errors, train_losses, val_losses

In [17]:
def gen_sklearn_data(x_dim, N, informative_frac=1, shift_range=1, scale_range=1, noise_frac=0.01):
    torch.manual_seed(0)
    np.random.seed(0)
    n_informative = int(informative_frac*x_dim)
    n_redundant = x_dim - n_informative
    shift_arr = shift_range*np.random.randn(x_dim)
    scale_arr = scale_range*np.random.randn(x_dim)
    X, Y = make_classification(n_samples=N, n_features=x_dim, n_informative=n_informative, n_redundant=n_redundant,
                               flip_y=noise_frac, shift=shift_arr, scale=scale_arr, random_state=0)
    Y[Y == 0] = -1
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    return torch.from_numpy(X), torch.from_numpy(Y)

In [18]:
path = "./models/runtime_varying_batch_size"
epochs = 5
x_dim = 5
scale = 1
X, Y = gen_sklearn_data(x_dim, 1024)
X, Y, Xval, Yval = split_data(X, Y, 0.25)
print(Xval.size())
print("percent of positive samples: {}%".format(100 * len(Y[Y == 1]) / len(Y)))

funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}
funcs_batch = {"f": f_batch, "g": g_batch, "f_derivative": f_derivative_batch, "c": c_batch, "score": score}

total = []
ccp = []
for batch_size in (2**np.arange(9)).tolist():
    strategic_model = MyStrategicModel(x_dim, batch_size, funcs, funcs_batch, TRAIN_SLOPE, EVAL_SLOPE, scale=scale, strategic=True)
    strategic_model.fit(path, X, Y, Xval, Yval,
                        opt=torch.optim.Adam, opt_kwargs={"lr": (1e-1)},
                        batch_size=batch_size, epochs=epochs, verbose=True,
                       comment="batched")
    
    total_time = strategic_model.total_time
    ccp_time = strategic_model.ccp_time
    total.append(total_time)
    ccp.append(ccp_time)
    pd.DataFrame(np.array(total)).to_csv(path + '/total_timing_results.csv')
    pd.DataFrame(np.array(ccp)).to_csv(path + '/ccp_timing_results.csv')

torch.Size([256, 5])
percent of positive samples: 50.78125%


This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

batch 001 / 768 | loss: 0.31228 | err: 0.00000
batch 002 / 768 | loss: 0.67294 | err: 0.50000
batch 003 / 768 | loss: 0.44863 | err: 0.33333
batch 004 / 768 | loss: 0.81932 | err: 0.50000
batch 005 / 768 | loss: 0.65546 | err: 0.40000
batch 006 / 768 | loss: 0.83054 | err: 0.50000
batch 007 / 768 | loss: 0.71189 | err: 0.42857
batch 008 / 768 | loss: 0.77489 | err: 0.50000
batch 009 / 768 | loss: 0.68879 | err: 0.44444
batch 010 / 768 | loss: 0.61991 | err: 0.40000
batch 011 / 768 | loss: 0.59401 | err: 0.36364
batch 012 / 768 | loss: 0.57693 | err: 0.33333
batch 013 / 768 | loss: 0.53255 | err: 0.30769
batch 014 / 768 | loss: 0.57349 | err: 0.35714
batch 015 / 768 | loss: 0.56817 | err: 0.33333
batch 016 / 768 | loss: 0.53266 | err: 0.31250
batch 017 / 768 | loss: 0.51330 | err: 0.29412
batch 018 / 768 | loss: 0.50176 | err: 0.27778
batch 019 / 768 | loss: 0.56159 | err: 0.31579
batch 020 / 768 | loss: 0.53351 | err: 0.30000
batch 021 / 768 | loss: 0.65893 | err: 0.33333
batch 022 / 7

batch 177 / 768 | loss: 0.53265 | err: 0.23164
batch 178 / 768 | loss: 0.52966 | err: 0.23034
batch 179 / 768 | loss: 0.52670 | err: 0.22905
batch 180 / 768 | loss: 0.53261 | err: 0.23333
batch 181 / 768 | loss: 0.53856 | err: 0.23757
batch 182 / 768 | loss: 0.53560 | err: 0.23626
batch 183 / 768 | loss: 0.53267 | err: 0.23497
batch 184 / 768 | loss: 0.52978 | err: 0.23370
batch 185 / 768 | loss: 0.52691 | err: 0.23243
batch 186 / 768 | loss: 0.52408 | err: 0.23118
batch 187 / 768 | loss: 0.52128 | err: 0.22995
batch 188 / 768 | loss: 0.51851 | err: 0.22872
batch 189 / 768 | loss: 0.51576 | err: 0.22751
batch 190 / 768 | loss: 0.51305 | err: 0.22632
batch 191 / 768 | loss: 0.51036 | err: 0.22513
batch 192 / 768 | loss: 0.50770 | err: 0.22396
batch 193 / 768 | loss: 0.50507 | err: 0.22280
batch 194 / 768 | loss: 0.51253 | err: 0.22680
batch 195 / 768 | loss: 0.51064 | err: 0.22564
batch 196 / 768 | loss: 0.50803 | err: 0.22449
batch 197 / 768 | loss: 0.50545 | err: 0.22335
batch 198 / 7

  "Solution may be inaccurate. Try another solver, "


batch 318 / 768 | loss: 0.49824 | err: 0.22956
batch 319 / 768 | loss: 0.49667 | err: 0.22884
batch 320 / 768 | loss: 0.49512 | err: 0.22812
batch 321 / 768 | loss: 0.49358 | err: 0.22741
batch 322 / 768 | loss: 0.49205 | err: 0.22671
batch 323 / 768 | loss: 0.49821 | err: 0.22910
batch 324 / 768 | loss: 0.50100 | err: 0.23148
batch 325 / 768 | loss: 0.49946 | err: 0.23077
batch 326 / 768 | loss: 0.49792 | err: 0.23006
batch 327 / 768 | loss: 0.49640 | err: 0.22936
batch 328 / 768 | loss: 0.49489 | err: 0.22866
batch 329 / 768 | loss: 0.49338 | err: 0.22796
batch 330 / 768 | loss: 0.49189 | err: 0.22727
batch 331 / 768 | loss: 0.49961 | err: 0.22961
batch 332 / 768 | loss: 0.49810 | err: 0.22892
batch 333 / 768 | loss: 0.49661 | err: 0.22823
batch 334 / 768 | loss: 0.49601 | err: 0.22754
batch 335 / 768 | loss: 0.49453 | err: 0.22687
batch 336 / 768 | loss: 0.50573 | err: 0.22917
batch 337 / 768 | loss: 0.50518 | err: 0.22849
batch 338 / 768 | loss: 0.50373 | err: 0.22781
batch 339 / 7

batch 495 / 768 | loss: 0.48793 | err: 0.23030
batch 496 / 768 | loss: 0.49024 | err: 0.23185
batch 497 / 768 | loss: 0.48926 | err: 0.23139
batch 498 / 768 | loss: 0.48828 | err: 0.23092
batch 499 / 768 | loss: 0.48730 | err: 0.23046
batch 500 / 768 | loss: 0.48632 | err: 0.23000
batch 501 / 768 | loss: 0.48535 | err: 0.22954
batch 502 / 768 | loss: 0.48439 | err: 0.22908
batch 503 / 768 | loss: 0.48342 | err: 0.22863
batch 504 / 768 | loss: 0.48246 | err: 0.22817
batch 505 / 768 | loss: 0.48151 | err: 0.22772
batch 506 / 768 | loss: 0.48079 | err: 0.22727
batch 507 / 768 | loss: 0.47984 | err: 0.22682
batch 508 / 768 | loss: 0.47890 | err: 0.22638
batch 509 / 768 | loss: 0.47796 | err: 0.22593
batch 510 / 768 | loss: 0.48102 | err: 0.22745
batch 511 / 768 | loss: 0.48008 | err: 0.22701
batch 512 / 768 | loss: 0.47914 | err: 0.22656
batch 513 / 768 | loss: 0.47821 | err: 0.22612
batch 514 / 768 | loss: 0.47727 | err: 0.22568
batch 515 / 768 | loss: 0.47635 | err: 0.22524
batch 516 / 7

batch 671 / 768 | loss: 0.47641 | err: 0.22355
batch 672 / 768 | loss: 0.47570 | err: 0.22321
batch 673 / 768 | loss: 0.47747 | err: 0.22437
batch 674 / 768 | loss: 0.47676 | err: 0.22404
batch 675 / 768 | loss: 0.47848 | err: 0.22519
batch 676 / 768 | loss: 0.48018 | err: 0.22633
batch 677 / 768 | loss: 0.47947 | err: 0.22600
batch 678 / 768 | loss: 0.47876 | err: 0.22566
batch 679 / 768 | loss: 0.47806 | err: 0.22533
batch 680 / 768 | loss: 0.47736 | err: 0.22500
batch 681 / 768 | loss: 0.47701 | err: 0.22467
batch 682 / 768 | loss: 0.47820 | err: 0.22581
batch 683 / 768 | loss: 0.47837 | err: 0.22548
batch 684 / 768 | loss: 0.47990 | err: 0.22661
batch 685 / 768 | loss: 0.47920 | err: 0.22628
batch 686 / 768 | loss: 0.48039 | err: 0.22741
batch 687 / 768 | loss: 0.48031 | err: 0.22707
batch 688 / 768 | loss: 0.48235 | err: 0.22820
batch 689 / 768 | loss: 0.48165 | err: 0.22787
batch 690 / 768 | loss: 0.48281 | err: 0.22899
batch 691 / 768 | loss: 0.48292 | err: 0.22865
batch 692 / 7

batch 078 / 768 | loss: 0.42488 | err: 0.19231
batch 079 / 768 | loss: 0.45719 | err: 0.20253
batch 080 / 768 | loss: 0.45147 | err: 0.20000
batch 081 / 768 | loss: 0.44776 | err: 0.19753
batch 082 / 768 | loss: 0.46229 | err: 0.20732
batch 083 / 768 | loss: 0.45672 | err: 0.20482
batch 084 / 768 | loss: 0.45128 | err: 0.20238
batch 085 / 768 | loss: 0.44597 | err: 0.20000
batch 086 / 768 | loss: 0.44162 | err: 0.19767
batch 087 / 768 | loss: 0.43655 | err: 0.19540
batch 088 / 768 | loss: 0.43614 | err: 0.19318
batch 089 / 768 | loss: 0.45997 | err: 0.20225
batch 090 / 768 | loss: 0.45486 | err: 0.20000
batch 091 / 768 | loss: 0.47382 | err: 0.20879
batch 092 / 768 | loss: 0.46867 | err: 0.20652
batch 093 / 768 | loss: 0.46363 | err: 0.20430
batch 094 / 768 | loss: 0.45870 | err: 0.20213
batch 095 / 768 | loss: 0.45550 | err: 0.20000
batch 096 / 768 | loss: 0.45076 | err: 0.19792
batch 097 / 768 | loss: 0.44611 | err: 0.19588
batch 098 / 768 | loss: 0.44156 | err: 0.19388
batch 099 / 7

batch 253 / 768 | loss: 0.39500 | err: 0.18182
batch 254 / 768 | loss: 0.39383 | err: 0.18110
batch 255 / 768 | loss: 0.39229 | err: 0.18039
batch 256 / 768 | loss: 0.39075 | err: 0.17969
batch 257 / 768 | loss: 0.39562 | err: 0.18288
batch 258 / 768 | loss: 0.39409 | err: 0.18217
batch 259 / 768 | loss: 0.39883 | err: 0.18533
batch 260 / 768 | loss: 0.39730 | err: 0.18462
batch 261 / 768 | loss: 0.39578 | err: 0.18391
batch 262 / 768 | loss: 0.39426 | err: 0.18321
batch 263 / 768 | loss: 0.39277 | err: 0.18251
batch 264 / 768 | loss: 0.39128 | err: 0.18182
batch 265 / 768 | loss: 0.38980 | err: 0.18113
batch 266 / 768 | loss: 0.38834 | err: 0.18045
batch 267 / 768 | loss: 0.39246 | err: 0.18352
batch 268 / 768 | loss: 0.39811 | err: 0.18657
batch 269 / 768 | loss: 0.40606 | err: 0.18959
batch 270 / 768 | loss: 0.40456 | err: 0.18889
batch 271 / 768 | loss: 0.40307 | err: 0.18819
batch 272 / 768 | loss: 0.40274 | err: 0.18750
batch 273 / 768 | loss: 0.40127 | err: 0.18681
batch 274 / 7

batch 428 / 768 | loss: 0.46728 | err: 0.20327
batch 429 / 768 | loss: 0.46619 | err: 0.20280
batch 430 / 768 | loss: 0.46914 | err: 0.20465
batch 431 / 768 | loss: 0.46805 | err: 0.20418
batch 432 / 768 | loss: 0.47040 | err: 0.20602
batch 433 / 768 | loss: 0.46931 | err: 0.20554
batch 434 / 768 | loss: 0.46823 | err: 0.20507
batch 435 / 768 | loss: 0.46716 | err: 0.20460
batch 436 / 768 | loss: 0.46608 | err: 0.20413
batch 437 / 768 | loss: 0.46502 | err: 0.20366
batch 438 / 768 | loss: 0.46396 | err: 0.20320
batch 439 / 768 | loss: 0.46290 | err: 0.20273
batch 440 / 768 | loss: 0.46553 | err: 0.20455
batch 441 / 768 | loss: 0.46488 | err: 0.20408
batch 442 / 768 | loss: 0.46382 | err: 0.20362
batch 443 / 768 | loss: 0.46635 | err: 0.20542
batch 444 / 768 | loss: 0.46530 | err: 0.20495
batch 445 / 768 | loss: 0.46425 | err: 0.20449
batch 446 / 768 | loss: 0.46662 | err: 0.20628
batch 447 / 768 | loss: 0.46557 | err: 0.20582
batch 448 / 768 | loss: 0.46824 | err: 0.20759
batch 449 / 7

batch 604 / 768 | loss: 0.46915 | err: 0.21523
batch 605 / 768 | loss: 0.46837 | err: 0.21488
batch 606 / 768 | loss: 0.46760 | err: 0.21452
batch 607 / 768 | loss: 0.46687 | err: 0.21417
batch 608 / 768 | loss: 0.46610 | err: 0.21382
batch 609 / 768 | loss: 0.46534 | err: 0.21346
batch 610 / 768 | loss: 0.46458 | err: 0.21311
batch 611 / 768 | loss: 0.46381 | err: 0.21277
batch 612 / 768 | loss: 0.46306 | err: 0.21242
batch 613 / 768 | loss: 0.46230 | err: 0.21207
batch 614 / 768 | loss: 0.46155 | err: 0.21173
batch 615 / 768 | loss: 0.46080 | err: 0.21138
batch 616 / 768 | loss: 0.46251 | err: 0.21266
batch 617 / 768 | loss: 0.46176 | err: 0.21232
batch 618 / 768 | loss: 0.46102 | err: 0.21197
batch 619 / 768 | loss: 0.46027 | err: 0.21163
batch 620 / 768 | loss: 0.45985 | err: 0.21129
batch 621 / 768 | loss: 0.46157 | err: 0.21256
batch 622 / 768 | loss: 0.46083 | err: 0.21222
batch 623 / 768 | loss: 0.46009 | err: 0.21188
batch 624 / 768 | loss: 0.45935 | err: 0.21154
batch 625 / 7

batch 010 / 768 | loss: 0.50568 | err: 0.20000
batch 011 / 768 | loss: 0.60440 | err: 0.27273
batch 012 / 768 | loss: 0.55403 | err: 0.25000
batch 013 / 768 | loss: 0.51141 | err: 0.23077
batch 014 / 768 | loss: 0.47488 | err: 0.21429
batch 015 / 768 | loss: 0.44323 | err: 0.20000
batch 016 / 768 | loss: 0.44388 | err: 0.18750
batch 017 / 768 | loss: 0.41777 | err: 0.17647
batch 018 / 768 | loss: 0.39456 | err: 0.16667
batch 019 / 768 | loss: 0.37379 | err: 0.15789
batch 020 / 768 | loss: 0.35510 | err: 0.15000
batch 021 / 768 | loss: 0.33819 | err: 0.14286
batch 022 / 768 | loss: 0.42832 | err: 0.18182
batch 023 / 768 | loss: 0.42509 | err: 0.17391
batch 024 / 768 | loss: 0.41498 | err: 0.16667
batch 025 / 768 | loss: 0.39838 | err: 0.16000
batch 026 / 768 | loss: 0.38305 | err: 0.15385
batch 027 / 768 | loss: 0.36887 | err: 0.14815
batch 028 / 768 | loss: 0.35569 | err: 0.14286
batch 029 / 768 | loss: 0.39514 | err: 0.17241
batch 030 / 768 | loss: 0.38930 | err: 0.16667
batch 031 / 7

batch 361 / 768 | loss: 0.37703 | err: 0.18283
batch 362 / 768 | loss: 0.38023 | err: 0.18508
batch 363 / 768 | loss: 0.38439 | err: 0.18733
batch 364 / 768 | loss: 0.38369 | err: 0.18681
batch 365 / 768 | loss: 0.38779 | err: 0.18904
batch 366 / 768 | loss: 0.38673 | err: 0.18852
batch 367 / 768 | loss: 0.38568 | err: 0.18801
batch 368 / 768 | loss: 0.38463 | err: 0.18750
batch 369 / 768 | loss: 0.38359 | err: 0.18699
batch 370 / 768 | loss: 0.38255 | err: 0.18649
batch 371 / 768 | loss: 0.38152 | err: 0.18598
batch 372 / 768 | loss: 0.38049 | err: 0.18548
batch 373 / 768 | loss: 0.37947 | err: 0.18499
batch 374 / 768 | loss: 0.37846 | err: 0.18449
batch 375 / 768 | loss: 0.37767 | err: 0.18400
batch 376 / 768 | loss: 0.37760 | err: 0.18351
batch 377 / 768 | loss: 0.37659 | err: 0.18302
batch 378 / 768 | loss: 0.37560 | err: 0.18254
batch 379 / 768 | loss: 0.37461 | err: 0.18206
batch 380 / 768 | loss: 0.37899 | err: 0.18421
batch 381 / 768 | loss: 0.37799 | err: 0.18373
batch 382 / 7

batch 536 / 768 | loss: 0.46024 | err: 0.21455
batch 537 / 768 | loss: 0.45938 | err: 0.21415
batch 538 / 768 | loss: 0.46277 | err: 0.21561
batch 539 / 768 | loss: 0.46191 | err: 0.21521
batch 540 / 768 | loss: 0.46401 | err: 0.21667
batch 541 / 768 | loss: 0.46315 | err: 0.21627
batch 542 / 768 | loss: 0.46229 | err: 0.21587
batch 543 / 768 | loss: 0.46144 | err: 0.21547
batch 544 / 768 | loss: 0.46074 | err: 0.21507
batch 545 / 768 | loss: 0.45990 | err: 0.21468
batch 546 / 768 | loss: 0.46193 | err: 0.21612
batch 547 / 768 | loss: 0.46109 | err: 0.21572
batch 548 / 768 | loss: 0.46024 | err: 0.21533
batch 549 / 768 | loss: 0.46341 | err: 0.21676
batch 550 / 768 | loss: 0.46257 | err: 0.21636
batch 551 / 768 | loss: 0.46503 | err: 0.21779
batch 552 / 768 | loss: 0.46419 | err: 0.21739
batch 553 / 768 | loss: 0.46666 | err: 0.21881
batch 554 / 768 | loss: 0.46848 | err: 0.22022
batch 555 / 768 | loss: 0.46764 | err: 0.21982
batch 556 / 768 | loss: 0.47237 | err: 0.22122
batch 557 / 7

batch 714 / 768 | loss: 0.48927 | err: 0.22969
batch 715 / 768 | loss: 0.48858 | err: 0.22937
batch 716 / 768 | loss: 0.48790 | err: 0.22905
batch 717 / 768 | loss: 0.48722 | err: 0.22873
batch 718 / 768 | loss: 0.48889 | err: 0.22981
batch 719 / 768 | loss: 0.49048 | err: 0.23088
batch 720 / 768 | loss: 0.48980 | err: 0.23056
batch 721 / 768 | loss: 0.49155 | err: 0.23162
batch 722 / 768 | loss: 0.49087 | err: 0.23130
batch 723 / 768 | loss: 0.49019 | err: 0.23098
batch 724 / 768 | loss: 0.48952 | err: 0.23066
batch 725 / 768 | loss: 0.48921 | err: 0.23034
batch 726 / 768 | loss: 0.48853 | err: 0.23003
batch 727 / 768 | loss: 0.48793 | err: 0.22971
batch 728 / 768 | loss: 0.48726 | err: 0.22940
batch 729 / 768 | loss: 0.48719 | err: 0.22908
batch 730 / 768 | loss: 0.48652 | err: 0.22877
batch 731 / 768 | loss: 0.48586 | err: 0.22845
batch 732 / 768 | loss: 0.48794 | err: 0.22951
batch 733 / 768 | loss: 0.48929 | err: 0.23056
batch 734 / 768 | loss: 0.48863 | err: 0.23025
batch 735 / 7

batch 120 / 768 | loss: 0.49563 | err: 0.20000
batch 121 / 768 | loss: 0.49153 | err: 0.19835
batch 122 / 768 | loss: 0.49194 | err: 0.19672
batch 123 / 768 | loss: 0.48794 | err: 0.19512
batch 124 / 768 | loss: 0.49396 | err: 0.20161
batch 125 / 768 | loss: 0.49001 | err: 0.20000
batch 126 / 768 | loss: 0.48612 | err: 0.19841
batch 127 / 768 | loss: 0.49423 | err: 0.20472
batch 128 / 768 | loss: 0.49037 | err: 0.20312
batch 129 / 768 | loss: 0.49128 | err: 0.20155
batch 130 / 768 | loss: 0.48750 | err: 0.20000
batch 131 / 768 | loss: 0.50196 | err: 0.20611
batch 132 / 768 | loss: 0.49816 | err: 0.20455
batch 133 / 768 | loss: 0.49442 | err: 0.20301
batch 134 / 768 | loss: 0.49519 | err: 0.20149
batch 135 / 768 | loss: 0.49153 | err: 0.20000
batch 136 / 768 | loss: 0.48791 | err: 0.19853
batch 137 / 768 | loss: 0.48533 | err: 0.19708
batch 138 / 768 | loss: 0.48182 | err: 0.19565
batch 139 / 768 | loss: 0.47835 | err: 0.19424
batch 140 / 768 | loss: 0.47493 | err: 0.19286
batch 141 / 7

batch 295 / 768 | loss: 0.47894 | err: 0.20000
batch 296 / 768 | loss: 0.47732 | err: 0.19932
batch 297 / 768 | loss: 0.48377 | err: 0.20202
batch 298 / 768 | loss: 0.48811 | err: 0.20470
batch 299 / 768 | loss: 0.48648 | err: 0.20401
batch 300 / 768 | loss: 0.48486 | err: 0.20333
batch 301 / 768 | loss: 0.48325 | err: 0.20266
batch 302 / 768 | loss: 0.48165 | err: 0.20199
batch 303 / 768 | loss: 0.48006 | err: 0.20132
batch 304 / 768 | loss: 0.47848 | err: 0.20066
batch 305 / 768 | loss: 0.47691 | err: 0.20000
batch 306 / 768 | loss: 0.47667 | err: 0.19935
batch 307 / 768 | loss: 0.48353 | err: 0.20195
batch 308 / 768 | loss: 0.48196 | err: 0.20130
batch 309 / 768 | loss: 0.48615 | err: 0.20388
batch 310 / 768 | loss: 0.48458 | err: 0.20323
batch 311 / 768 | loss: 0.48350 | err: 0.20257
batch 312 / 768 | loss: 0.48195 | err: 0.20192
batch 313 / 768 | loss: 0.48041 | err: 0.20128
batch 314 / 768 | loss: 0.48032 | err: 0.20064
batch 315 / 768 | loss: 0.47879 | err: 0.20000
batch 316 / 7

batch 471 / 768 | loss: 0.48503 | err: 0.21019
batch 472 / 768 | loss: 0.48716 | err: 0.21186
batch 473 / 768 | loss: 0.48613 | err: 0.21142
batch 474 / 768 | loss: 0.48511 | err: 0.21097
batch 475 / 768 | loss: 0.48504 | err: 0.21053
batch 476 / 768 | loss: 0.48402 | err: 0.21008
batch 477 / 768 | loss: 0.48300 | err: 0.20964
batch 478 / 768 | loss: 0.48199 | err: 0.20921
batch 479 / 768 | loss: 0.48212 | err: 0.20877
batch 480 / 768 | loss: 0.48112 | err: 0.20833
batch 481 / 768 | loss: 0.48012 | err: 0.20790
batch 482 / 768 | loss: 0.47912 | err: 0.20747
batch 483 / 768 | loss: 0.47813 | err: 0.20704
batch 484 / 768 | loss: 0.47714 | err: 0.20661
batch 485 / 768 | loss: 0.47650 | err: 0.20619
batch 486 / 768 | loss: 0.47552 | err: 0.20576
batch 487 / 768 | loss: 0.47454 | err: 0.20534
batch 488 / 768 | loss: 0.47357 | err: 0.20492
batch 489 / 768 | loss: 0.47260 | err: 0.20450
batch 490 / 768 | loss: 0.47164 | err: 0.20408
batch 491 / 768 | loss: 0.47396 | err: 0.20570
batch 492 / 7

batch 646 / 768 | loss: 0.44090 | err: 0.19814
batch 647 / 768 | loss: 0.44022 | err: 0.19784
batch 648 / 768 | loss: 0.43954 | err: 0.19753
batch 649 / 768 | loss: 0.43886 | err: 0.19723
batch 650 / 768 | loss: 0.43819 | err: 0.19692
batch 651 / 768 | loss: 0.43751 | err: 0.19662
batch 652 / 768 | loss: 0.43684 | err: 0.19632
batch 653 / 768 | loss: 0.43617 | err: 0.19602
batch 654 / 768 | loss: 0.43628 | err: 0.19572
batch 655 / 768 | loss: 0.43561 | err: 0.19542
batch 656 / 768 | loss: 0.43495 | err: 0.19512
batch 657 / 768 | loss: 0.43429 | err: 0.19482
batch 658 / 768 | loss: 0.43363 | err: 0.19453
batch 659 / 768 | loss: 0.43297 | err: 0.19423
batch 660 / 768 | loss: 0.43466 | err: 0.19545
batch 661 / 768 | loss: 0.43401 | err: 0.19516
batch 662 / 768 | loss: 0.43335 | err: 0.19486
batch 663 / 768 | loss: 0.43270 | err: 0.19457
batch 664 / 768 | loss: 0.43232 | err: 0.19428
batch 665 / 768 | loss: 0.43185 | err: 0.19398
batch 666 / 768 | loss: 0.43120 | err: 0.19369
batch 667 / 7

batch 053 / 768 | loss: 0.37629 | err: 0.18868
batch 054 / 768 | loss: 0.36933 | err: 0.18519
batch 055 / 768 | loss: 0.39725 | err: 0.20000
batch 056 / 768 | loss: 0.39452 | err: 0.19643
batch 057 / 768 | loss: 0.38760 | err: 0.19298
batch 058 / 768 | loss: 0.38091 | err: 0.18966
batch 059 / 768 | loss: 0.37446 | err: 0.18644
batch 060 / 768 | loss: 0.36822 | err: 0.18333
batch 061 / 768 | loss: 0.36218 | err: 0.18033
batch 062 / 768 | loss: 0.35634 | err: 0.17742
batch 063 / 768 | loss: 0.35068 | err: 0.17460
batch 064 / 768 | loss: 0.34646 | err: 0.17188
batch 065 / 768 | loss: 0.36453 | err: 0.18462
batch 066 / 768 | loss: 0.36472 | err: 0.18182
batch 067 / 768 | loss: 0.35928 | err: 0.17910
batch 068 / 768 | loss: 0.35403 | err: 0.17647
batch 069 / 768 | loss: 0.37884 | err: 0.18841
batch 070 / 768 | loss: 0.37544 | err: 0.18571
batch 071 / 768 | loss: 0.37015 | err: 0.18310
batch 072 / 768 | loss: 0.36839 | err: 0.18056
batch 073 / 768 | loss: 0.36334 | err: 0.17808
batch 074 / 7

batch 230 / 768 | loss: 0.44755 | err: 0.22174
batch 231 / 768 | loss: 0.44562 | err: 0.22078
batch 232 / 768 | loss: 0.44423 | err: 0.21983
batch 233 / 768 | loss: 0.44855 | err: 0.22318
batch 234 / 768 | loss: 0.44664 | err: 0.22222
batch 235 / 768 | loss: 0.44474 | err: 0.22128
batch 236 / 768 | loss: 0.44905 | err: 0.22458
batch 237 / 768 | loss: 0.44771 | err: 0.22363
batch 238 / 768 | loss: 0.44583 | err: 0.22269
batch 239 / 768 | loss: 0.44397 | err: 0.22176
batch 240 / 768 | loss: 0.44435 | err: 0.22083
batch 241 / 768 | loss: 0.44365 | err: 0.21992
batch 242 / 768 | loss: 0.44182 | err: 0.21901
batch 243 / 768 | loss: 0.44022 | err: 0.21811
batch 244 / 768 | loss: 0.43841 | err: 0.21721
batch 245 / 768 | loss: 0.43662 | err: 0.21633
batch 246 / 768 | loss: 0.43566 | err: 0.21545
batch 247 / 768 | loss: 0.43528 | err: 0.21457
batch 248 / 768 | loss: 0.44258 | err: 0.21774
batch 249 / 768 | loss: 0.44080 | err: 0.21687
batch 250 / 768 | loss: 0.43904 | err: 0.21600
batch 251 / 7

batch 405 / 768 | loss: 0.51470 | err: 0.24444
batch 406 / 768 | loss: 0.51343 | err: 0.24384
batch 407 / 768 | loss: 0.51217 | err: 0.24324
batch 408 / 768 | loss: 0.51091 | err: 0.24265
batch 409 / 768 | loss: 0.51051 | err: 0.24205
batch 410 / 768 | loss: 0.51025 | err: 0.24146
batch 411 / 768 | loss: 0.50901 | err: 0.24088
batch 412 / 768 | loss: 0.50928 | err: 0.24029
batch 413 / 768 | loss: 0.50805 | err: 0.23971
batch 414 / 768 | loss: 0.50709 | err: 0.23913
batch 415 / 768 | loss: 0.50587 | err: 0.23855
batch 416 / 768 | loss: 0.50465 | err: 0.23798
batch 417 / 768 | loss: 0.50344 | err: 0.23741
batch 418 / 768 | loss: 0.50224 | err: 0.23684
batch 419 / 768 | loss: 0.50571 | err: 0.23866
batch 420 / 768 | loss: 0.50451 | err: 0.23810
batch 421 / 768 | loss: 0.50393 | err: 0.23753
batch 422 / 768 | loss: 0.50801 | err: 0.23934
batch 423 / 768 | loss: 0.50681 | err: 0.23877
batch 424 / 768 | loss: 0.51060 | err: 0.24057
batch 425 / 768 | loss: 0.50958 | err: 0.24000
batch 426 / 7

batch 581 / 768 | loss: 0.51738 | err: 0.25129
batch 582 / 768 | loss: 0.52106 | err: 0.25258
batch 583 / 768 | loss: 0.52017 | err: 0.25214
batch 584 / 768 | loss: 0.51928 | err: 0.25171
batch 585 / 768 | loss: 0.51906 | err: 0.25128
batch 586 / 768 | loss: 0.51817 | err: 0.25085
batch 587 / 768 | loss: 0.51732 | err: 0.25043
batch 588 / 768 | loss: 0.51644 | err: 0.25000
batch 589 / 768 | loss: 0.51606 | err: 0.24958
batch 590 / 768 | loss: 0.51781 | err: 0.25085
batch 591 / 768 | loss: 0.51914 | err: 0.25212
batch 592 / 768 | loss: 0.51827 | err: 0.25169
batch 593 / 768 | loss: 0.51739 | err: 0.25126
batch 594 / 768 | loss: 0.51652 | err: 0.25084
batch 595 / 768 | loss: 0.51565 | err: 0.25042
batch 596 / 768 | loss: 0.51479 | err: 0.25000
batch 597 / 768 | loss: 0.51393 | err: 0.24958
batch 598 / 768 | loss: 0.51307 | err: 0.24916
batch 599 / 768 | loss: 0.51221 | err: 0.24875
batch 600 / 768 | loss: 0.51136 | err: 0.24833
batch 601 / 768 | loss: 0.51157 | err: 0.24792
batch 602 / 7

batch 758 / 768 | loss: 0.48382 | err: 0.23615
batch 759 / 768 | loss: 0.48318 | err: 0.23584
batch 760 / 768 | loss: 0.48304 | err: 0.23553
batch 761 / 768 | loss: 0.48241 | err: 0.23522
batch 762 / 768 | loss: 0.48177 | err: 0.23491
batch 763 / 768 | loss: 0.48114 | err: 0.23460
batch 764 / 768 | loss: 0.48274 | err: 0.23560
batch 765 / 768 | loss: 0.48429 | err: 0.23660
batch 766 / 768 | loss: 0.48391 | err: 0.23629
batch 767 / 768 | loss: 0.48562 | err: 0.23729
batch 768 / 768 | loss: 0.48725 | err: 0.23828
------------- epoch 005 / 005 | time: 078 sec | loss: 0.61384 | err: 0.29297
training time: 416.2872724533081 seconds
batch 001 / 384 | loss: 0.71836 | err: 0.50000
batch 002 / 384 | loss: 0.75222 | err: 0.50000
batch 003 / 384 | loss: 0.71002 | err: 0.50000
batch 004 / 384 | loss: 0.74705 | err: 0.50000
batch 005 / 384 | loss: 0.59764 | err: 0.40000
batch 006 / 384 | loss: 0.59146 | err: 0.33333
batch 007 / 384 | loss: 0.50836 | err: 0.28571
batch 008 / 384 | loss: 0.48843 | er

batch 162 / 384 | loss: 0.47286 | err: 0.22840
batch 163 / 384 | loss: 0.46996 | err: 0.22699
batch 164 / 384 | loss: 0.46709 | err: 0.22561
batch 165 / 384 | loss: 0.46426 | err: 0.22424
batch 166 / 384 | loss: 0.47118 | err: 0.22590
batch 167 / 384 | loss: 0.46908 | err: 0.22455
batch 168 / 384 | loss: 0.47415 | err: 0.22619
batch 169 / 384 | loss: 0.47284 | err: 0.22485
batch 170 / 384 | loss: 0.47449 | err: 0.22647
batch 171 / 384 | loss: 0.47267 | err: 0.22515
batch 172 / 384 | loss: 0.47812 | err: 0.22965
batch 173 / 384 | loss: 0.47589 | err: 0.22832
batch 174 / 384 | loss: 0.47701 | err: 0.22989
batch 175 / 384 | loss: 0.47524 | err: 0.22857
batch 176 / 384 | loss: 0.47254 | err: 0.22727
batch 177 / 384 | loss: 0.46987 | err: 0.22599
batch 178 / 384 | loss: 0.46723 | err: 0.22472
batch 179 / 384 | loss: 0.46462 | err: 0.22346
batch 180 / 384 | loss: 0.46385 | err: 0.22222
batch 181 / 384 | loss: 0.46563 | err: 0.22376
batch 182 / 384 | loss: 0.46307 | err: 0.22253
batch 183 / 3

batch 338 / 384 | loss: 0.43780 | err: 0.21450
batch 339 / 384 | loss: 0.43651 | err: 0.21386
batch 340 / 384 | loss: 0.43522 | err: 0.21324
batch 341 / 384 | loss: 0.43600 | err: 0.21408
batch 342 / 384 | loss: 0.43764 | err: 0.21491
batch 343 / 384 | loss: 0.43813 | err: 0.21574
batch 344 / 384 | loss: 0.44019 | err: 0.21657
batch 345 / 384 | loss: 0.44062 | err: 0.21739
batch 346 / 384 | loss: 0.44128 | err: 0.21676
batch 347 / 384 | loss: 0.44241 | err: 0.21758
batch 348 / 384 | loss: 0.44121 | err: 0.21695
batch 349 / 384 | loss: 0.44462 | err: 0.21920
batch 350 / 384 | loss: 0.44479 | err: 0.22000
batch 351 / 384 | loss: 0.44499 | err: 0.22080
batch 352 / 384 | loss: 0.44647 | err: 0.22159
batch 353 / 384 | loss: 0.44596 | err: 0.22096
batch 354 / 384 | loss: 0.44499 | err: 0.22034
batch 355 / 384 | loss: 0.44373 | err: 0.21972
batch 356 / 384 | loss: 0.44249 | err: 0.21910
batch 357 / 384 | loss: 0.44232 | err: 0.21849
batch 358 / 384 | loss: 0.44109 | err: 0.21788
batch 359 / 3

batch 129 / 384 | loss: 0.36935 | err: 0.17442
batch 130 / 384 | loss: 0.37271 | err: 0.17692
batch 131 / 384 | loss: 0.36986 | err: 0.17557
batch 132 / 384 | loss: 0.36706 | err: 0.17424
batch 133 / 384 | loss: 0.36430 | err: 0.17293
batch 134 / 384 | loss: 0.37821 | err: 0.17910
batch 135 / 384 | loss: 0.37651 | err: 0.17778
batch 136 / 384 | loss: 0.37470 | err: 0.17647
batch 137 / 384 | loss: 0.37780 | err: 0.17883
batch 138 / 384 | loss: 0.37506 | err: 0.17754
batch 139 / 384 | loss: 0.37236 | err: 0.17626
batch 140 / 384 | loss: 0.36992 | err: 0.17500
batch 141 / 384 | loss: 0.36729 | err: 0.17376
batch 142 / 384 | loss: 0.36471 | err: 0.17254
batch 143 / 384 | loss: 0.36393 | err: 0.17133
batch 144 / 384 | loss: 0.36653 | err: 0.17361
batch 145 / 384 | loss: 0.37034 | err: 0.17586
batch 146 / 384 | loss: 0.36781 | err: 0.17466
batch 147 / 384 | loss: 0.36688 | err: 0.17347
batch 148 / 384 | loss: 0.36440 | err: 0.17230
batch 149 / 384 | loss: 0.37307 | err: 0.17785
batch 150 / 3

batch 304 / 384 | loss: 0.44119 | err: 0.21053
batch 305 / 384 | loss: 0.43974 | err: 0.20984
batch 306 / 384 | loss: 0.43830 | err: 0.20915
batch 307 / 384 | loss: 0.43687 | err: 0.20847
batch 308 / 384 | loss: 0.43783 | err: 0.20942
batch 309 / 384 | loss: 0.43641 | err: 0.20874
batch 310 / 384 | loss: 0.43517 | err: 0.20806
batch 311 / 384 | loss: 0.43619 | err: 0.20900
batch 312 / 384 | loss: 0.43479 | err: 0.20833
batch 313 / 384 | loss: 0.43340 | err: 0.20767
batch 314 / 384 | loss: 0.43523 | err: 0.20860
batch 315 / 384 | loss: 0.43615 | err: 0.20952
batch 316 / 384 | loss: 0.44360 | err: 0.21203
batch 317 / 384 | loss: 0.44313 | err: 0.21136
batch 318 / 384 | loss: 0.44526 | err: 0.21226
batch 319 / 384 | loss: 0.44466 | err: 0.21160
batch 320 / 384 | loss: 0.44630 | err: 0.21250
batch 321 / 384 | loss: 0.44706 | err: 0.21340
batch 322 / 384 | loss: 0.44620 | err: 0.21273
batch 323 / 384 | loss: 0.44482 | err: 0.21207
batch 324 / 384 | loss: 0.44345 | err: 0.21142
batch 325 / 3

batch 094 / 384 | loss: 0.34671 | err: 0.17553
batch 095 / 384 | loss: 0.34560 | err: 0.17368
batch 096 / 384 | loss: 0.35100 | err: 0.17708
batch 097 / 384 | loss: 0.34739 | err: 0.17526
batch 098 / 384 | loss: 0.35286 | err: 0.17857
batch 099 / 384 | loss: 0.34929 | err: 0.17677
batch 100 / 384 | loss: 0.35837 | err: 0.18000
batch 101 / 384 | loss: 0.35483 | err: 0.17822
batch 102 / 384 | loss: 0.35279 | err: 0.17647
batch 103 / 384 | loss: 0.35151 | err: 0.17476
batch 104 / 384 | loss: 0.35506 | err: 0.17788
batch 105 / 384 | loss: 0.35168 | err: 0.17619
batch 106 / 384 | loss: 0.34836 | err: 0.17453
batch 107 / 384 | loss: 0.34511 | err: 0.17290
batch 108 / 384 | loss: 0.34904 | err: 0.17593
batch 109 / 384 | loss: 0.34584 | err: 0.17431
batch 110 / 384 | loss: 0.34270 | err: 0.17273
batch 111 / 384 | loss: 0.33961 | err: 0.17117
batch 112 / 384 | loss: 0.34943 | err: 0.17411
batch 113 / 384 | loss: 0.34633 | err: 0.17257
batch 114 / 384 | loss: 0.34331 | err: 0.17105
batch 115 / 3

batch 269 / 384 | loss: 0.46579 | err: 0.23792
batch 270 / 384 | loss: 0.46767 | err: 0.23889
batch 271 / 384 | loss: 0.46595 | err: 0.23801
batch 272 / 384 | loss: 0.46543 | err: 0.23713
batch 273 / 384 | loss: 0.46632 | err: 0.23810
batch 274 / 384 | loss: 0.46462 | err: 0.23723
batch 275 / 384 | loss: 0.46737 | err: 0.23818
batch 276 / 384 | loss: 0.46828 | err: 0.23913
batch 277 / 384 | loss: 0.47329 | err: 0.24188
batch 278 / 384 | loss: 0.47625 | err: 0.24281
batch 279 / 384 | loss: 0.47939 | err: 0.24552
batch 280 / 384 | loss: 0.47767 | err: 0.24464
batch 281 / 384 | loss: 0.48228 | err: 0.24733
batch 282 / 384 | loss: 0.48062 | err: 0.24645
batch 283 / 384 | loss: 0.47892 | err: 0.24558
batch 284 / 384 | loss: 0.48072 | err: 0.24648
batch 285 / 384 | loss: 0.48107 | err: 0.24737
batch 286 / 384 | loss: 0.48123 | err: 0.24650
batch 287 / 384 | loss: 0.48084 | err: 0.24564
batch 288 / 384 | loss: 0.47917 | err: 0.24479
batch 289 / 384 | loss: 0.48140 | err: 0.24567
batch 290 / 3

batch 058 / 384 | loss: 0.47672 | err: 0.22414
batch 059 / 384 | loss: 0.47144 | err: 0.22034
batch 060 / 384 | loss: 0.47336 | err: 0.22500
batch 061 / 384 | loss: 0.46951 | err: 0.22131
batch 062 / 384 | loss: 0.46193 | err: 0.21774
batch 063 / 384 | loss: 0.45460 | err: 0.21429
batch 064 / 384 | loss: 0.46090 | err: 0.21875
batch 065 / 384 | loss: 0.45893 | err: 0.21538
batch 066 / 384 | loss: 0.47071 | err: 0.21970
batch 067 / 384 | loss: 0.46836 | err: 0.21642
batch 068 / 384 | loss: 0.46147 | err: 0.21324
batch 069 / 384 | loss: 0.45673 | err: 0.21014
batch 070 / 384 | loss: 0.45130 | err: 0.20714
batch 071 / 384 | loss: 0.44801 | err: 0.20423
batch 072 / 384 | loss: 0.45159 | err: 0.20833
batch 073 / 384 | loss: 0.44541 | err: 0.20548
batch 074 / 384 | loss: 0.44961 | err: 0.20946
batch 075 / 384 | loss: 0.44394 | err: 0.20667
batch 076 / 384 | loss: 0.44887 | err: 0.21053
batch 077 / 384 | loss: 0.44513 | err: 0.20779
batch 078 / 384 | loss: 0.44894 | err: 0.21154
batch 079 / 3

batch 234 / 384 | loss: 0.50420 | err: 0.23291
batch 235 / 384 | loss: 0.50257 | err: 0.23191
batch 236 / 384 | loss: 0.50443 | err: 0.23305
batch 237 / 384 | loss: 0.50230 | err: 0.23207
batch 238 / 384 | loss: 0.50075 | err: 0.23109
batch 239 / 384 | loss: 0.49866 | err: 0.23013
batch 240 / 384 | loss: 0.49763 | err: 0.22917
batch 241 / 384 | loss: 0.49557 | err: 0.22822
batch 242 / 384 | loss: 0.49380 | err: 0.22727
batch 243 / 384 | loss: 0.49181 | err: 0.22634
batch 244 / 384 | loss: 0.48979 | err: 0.22541
batch 245 / 384 | loss: 0.48779 | err: 0.22449
batch 246 / 384 | loss: 0.49227 | err: 0.22764
batch 247 / 384 | loss: 0.49088 | err: 0.22672
batch 248 / 384 | loss: 0.48958 | err: 0.22581
batch 249 / 384 | loss: 0.48769 | err: 0.22490
batch 250 / 384 | loss: 0.48574 | err: 0.22400
batch 251 / 384 | loss: 0.48468 | err: 0.22311
batch 252 / 384 | loss: 0.48276 | err: 0.22222
batch 253 / 384 | loss: 0.48085 | err: 0.22134
batch 254 / 384 | loss: 0.48233 | err: 0.22244
batch 255 / 3

batch 023 / 384 | loss: 0.40724 | err: 0.21739
batch 024 / 384 | loss: 0.46646 | err: 0.25000
batch 025 / 384 | loss: 0.47713 | err: 0.26000
batch 026 / 384 | loss: 0.46249 | err: 0.25000
batch 027 / 384 | loss: 0.44536 | err: 0.24074
batch 028 / 384 | loss: 0.46676 | err: 0.25000
batch 029 / 384 | loss: 0.45209 | err: 0.24138
batch 030 / 384 | loss: 0.43702 | err: 0.23333
batch 031 / 384 | loss: 0.42293 | err: 0.22581
batch 032 / 384 | loss: 0.41265 | err: 0.21875
batch 033 / 384 | loss: 0.40544 | err: 0.21212
batch 034 / 384 | loss: 0.39351 | err: 0.20588
batch 035 / 384 | loss: 0.41817 | err: 0.21429
batch 036 / 384 | loss: 0.40921 | err: 0.20833
batch 037 / 384 | loss: 0.41807 | err: 0.21622
batch 038 / 384 | loss: 0.40707 | err: 0.21053
batch 039 / 384 | loss: 0.39837 | err: 0.20513
batch 040 / 384 | loss: 0.38841 | err: 0.20000
batch 041 / 384 | loss: 0.37926 | err: 0.19512
batch 042 / 384 | loss: 0.37023 | err: 0.19048
batch 043 / 384 | loss: 0.39556 | err: 0.19767
batch 044 / 3

batch 199 / 384 | loss: 0.45183 | err: 0.22362
batch 200 / 384 | loss: 0.45724 | err: 0.22750
batch 201 / 384 | loss: 0.45900 | err: 0.22886
batch 202 / 384 | loss: 0.46650 | err: 0.23267
batch 203 / 384 | loss: 0.46604 | err: 0.23153
batch 204 / 384 | loss: 0.46376 | err: 0.23039
batch 205 / 384 | loss: 0.46346 | err: 0.22927
batch 206 / 384 | loss: 0.46121 | err: 0.22816
batch 207 / 384 | loss: 0.45898 | err: 0.22705
batch 208 / 384 | loss: 0.45677 | err: 0.22596
batch 209 / 384 | loss: 0.45459 | err: 0.22488
batch 210 / 384 | loss: 0.45683 | err: 0.22619
batch 211 / 384 | loss: 0.46012 | err: 0.22749
batch 212 / 384 | loss: 0.46300 | err: 0.22877
batch 213 / 384 | loss: 0.46298 | err: 0.22770
batch 214 / 384 | loss: 0.46081 | err: 0.22664
batch 215 / 384 | loss: 0.46251 | err: 0.22791
batch 216 / 384 | loss: 0.46358 | err: 0.22917
batch 217 / 384 | loss: 0.46420 | err: 0.23041
batch 218 / 384 | loss: 0.46512 | err: 0.23165
batch 219 / 384 | loss: 0.46833 | err: 0.23288
batch 220 / 3

batch 374 / 384 | loss: 0.44512 | err: 0.22193
batch 375 / 384 | loss: 0.44463 | err: 0.22133
batch 376 / 384 | loss: 0.44692 | err: 0.22207
batch 377 / 384 | loss: 0.44573 | err: 0.22149
batch 378 / 384 | loss: 0.44649 | err: 0.22222
batch 379 / 384 | loss: 0.44564 | err: 0.22164
batch 380 / 384 | loss: 0.44508 | err: 0.22105
batch 381 / 384 | loss: 0.44391 | err: 0.22047
batch 382 / 384 | loss: 0.44275 | err: 0.21990
batch 383 / 384 | loss: 0.44208 | err: 0.21932
batch 384 / 384 | loss: 0.44500 | err: 0.22135
------------- epoch 005 / 005 | time: 079 sec | loss: 0.42133 | err: 0.20703
training time: 378.8414213657379 seconds
batch 001 / 192 | loss: 0.73587 | err: 0.50000
batch 002 / 192 | loss: 0.69600 | err: 0.50000
batch 003 / 192 | loss: 0.55107 | err: 0.33333
batch 004 / 192 | loss: 0.49347 | err: 0.25000
batch 005 / 192 | loss: 0.50405 | err: 0.25000
batch 006 / 192 | loss: 0.62584 | err: 0.29167
batch 007 / 192 | loss: 0.61488 | err: 0.28571
batch 008 / 192 | loss: 0.70709 | er

batch 162 / 192 | loss: 0.46596 | err: 0.23611
batch 163 / 192 | loss: 0.46548 | err: 0.23620
batch 164 / 192 | loss: 0.46788 | err: 0.23780
batch 165 / 192 | loss: 0.46964 | err: 0.23939
batch 166 / 192 | loss: 0.47360 | err: 0.24247
batch 167 / 192 | loss: 0.47144 | err: 0.24102
batch 168 / 192 | loss: 0.46863 | err: 0.23958
batch 169 / 192 | loss: 0.47313 | err: 0.24260
batch 170 / 192 | loss: 0.47035 | err: 0.24118
batch 171 / 192 | loss: 0.47274 | err: 0.24269
batch 172 / 192 | loss: 0.47515 | err: 0.24419
batch 173 / 192 | loss: 0.47724 | err: 0.24566
batch 174 / 192 | loss: 0.47704 | err: 0.24569
batch 175 / 192 | loss: 0.48061 | err: 0.24714
batch 176 / 192 | loss: 0.48391 | err: 0.25000
batch 177 / 192 | loss: 0.48144 | err: 0.24859
batch 178 / 192 | loss: 0.47873 | err: 0.24719
batch 179 / 192 | loss: 0.47717 | err: 0.24581
batch 180 / 192 | loss: 0.47684 | err: 0.24583
batch 181 / 192 | loss: 0.47800 | err: 0.24586
batch 182 / 192 | loss: 0.47889 | err: 0.24588
batch 183 / 1

batch 143 / 192 | loss: 0.43570 | err: 0.21853
batch 144 / 192 | loss: 0.43373 | err: 0.21701
batch 145 / 192 | loss: 0.43418 | err: 0.21724
batch 146 / 192 | loss: 0.43120 | err: 0.21575
batch 147 / 192 | loss: 0.43109 | err: 0.21599
batch 148 / 192 | loss: 0.43086 | err: 0.21622
batch 149 / 192 | loss: 0.42796 | err: 0.21477
batch 150 / 192 | loss: 0.42791 | err: 0.21500
batch 151 / 192 | loss: 0.43059 | err: 0.21523
batch 152 / 192 | loss: 0.42791 | err: 0.21382
batch 153 / 192 | loss: 0.42511 | err: 0.21242
batch 154 / 192 | loss: 0.42486 | err: 0.21266
batch 155 / 192 | loss: 0.42238 | err: 0.21129
batch 156 / 192 | loss: 0.41968 | err: 0.20994
batch 157 / 192 | loss: 0.42103 | err: 0.21019
batch 158 / 192 | loss: 0.42954 | err: 0.21361
batch 159 / 192 | loss: 0.43171 | err: 0.21384
batch 160 / 192 | loss: 0.43272 | err: 0.21406
batch 161 / 192 | loss: 0.43487 | err: 0.21584
batch 162 / 192 | loss: 0.43218 | err: 0.21451
batch 163 / 192 | loss: 0.43767 | err: 0.21779
batch 164 / 1

batch 124 / 192 | loss: 0.43649 | err: 0.22379
batch 125 / 192 | loss: 0.44121 | err: 0.22800
batch 126 / 192 | loss: 0.44528 | err: 0.23016
batch 127 / 192 | loss: 0.44469 | err: 0.22835
batch 128 / 192 | loss: 0.44854 | err: 0.22852
batch 129 / 192 | loss: 0.45735 | err: 0.23256
batch 130 / 192 | loss: 0.46007 | err: 0.23269
batch 131 / 192 | loss: 0.45964 | err: 0.23092
batch 132 / 192 | loss: 0.46247 | err: 0.23485
batch 133 / 192 | loss: 0.46095 | err: 0.23308
batch 134 / 192 | loss: 0.46292 | err: 0.23507
batch 135 / 192 | loss: 0.46530 | err: 0.23704
batch 136 / 192 | loss: 0.46214 | err: 0.23529
batch 137 / 192 | loss: 0.46179 | err: 0.23540
batch 138 / 192 | loss: 0.46418 | err: 0.23732
batch 139 / 192 | loss: 0.47054 | err: 0.24101
batch 140 / 192 | loss: 0.47192 | err: 0.24286
batch 141 / 192 | loss: 0.47444 | err: 0.24468
batch 142 / 192 | loss: 0.47500 | err: 0.24472
batch 143 / 192 | loss: 0.47467 | err: 0.24476
batch 144 / 192 | loss: 0.47161 | err: 0.24306
batch 145 / 1

batch 106 / 192 | loss: 0.44457 | err: 0.20755
batch 107 / 192 | loss: 0.44460 | err: 0.20794
batch 108 / 192 | loss: 0.45039 | err: 0.21065
batch 109 / 192 | loss: 0.44639 | err: 0.20872
batch 110 / 192 | loss: 0.44644 | err: 0.20909
batch 111 / 192 | loss: 0.44758 | err: 0.20946
batch 112 / 192 | loss: 0.44708 | err: 0.20982
batch 113 / 192 | loss: 0.44723 | err: 0.21018
batch 114 / 192 | loss: 0.44714 | err: 0.21053
batch 115 / 192 | loss: 0.45330 | err: 0.21304
batch 116 / 192 | loss: 0.45614 | err: 0.21552
batch 117 / 192 | loss: 0.47423 | err: 0.22222
batch 118 / 192 | loss: 0.47061 | err: 0.22034
batch 119 / 192 | loss: 0.46666 | err: 0.21849
batch 120 / 192 | loss: 0.46358 | err: 0.21667
batch 121 / 192 | loss: 0.45985 | err: 0.21488
batch 122 / 192 | loss: 0.45949 | err: 0.21516
batch 123 / 192 | loss: 0.45888 | err: 0.21545
batch 124 / 192 | loss: 0.45561 | err: 0.21371
batch 125 / 192 | loss: 0.45197 | err: 0.21200
batch 126 / 192 | loss: 0.44861 | err: 0.21032
batch 127 / 1

batch 087 / 192 | loss: 0.48142 | err: 0.25287
batch 088 / 192 | loss: 0.47599 | err: 0.25000
batch 089 / 192 | loss: 0.47064 | err: 0.24719
batch 090 / 192 | loss: 0.47033 | err: 0.24722
batch 091 / 192 | loss: 0.46929 | err: 0.24725
batch 092 / 192 | loss: 0.46735 | err: 0.24728
batch 093 / 192 | loss: 0.46232 | err: 0.24462
batch 094 / 192 | loss: 0.46654 | err: 0.24734
batch 095 / 192 | loss: 0.46690 | err: 0.24737
batch 096 / 192 | loss: 0.46407 | err: 0.24479
batch 097 / 192 | loss: 0.46055 | err: 0.24227
batch 098 / 192 | loss: 0.46070 | err: 0.24235
batch 099 / 192 | loss: 0.46086 | err: 0.24242
batch 100 / 192 | loss: 0.47124 | err: 0.24750
batch 101 / 192 | loss: 0.47874 | err: 0.25248
batch 102 / 192 | loss: 0.47577 | err: 0.25000
batch 103 / 192 | loss: 0.47307 | err: 0.24757
batch 104 / 192 | loss: 0.46852 | err: 0.24519
batch 105 / 192 | loss: 0.46828 | err: 0.24524
batch 106 / 192 | loss: 0.47442 | err: 0.24764
batch 107 / 192 | loss: 0.47195 | err: 0.24533
batch 108 / 1

batch 067 / 096 | loss: 0.45245 | err: 0.22948
batch 068 / 096 | loss: 0.45435 | err: 0.23162
batch 069 / 096 | loss: 0.45111 | err: 0.23007
batch 070 / 096 | loss: 0.44672 | err: 0.22679
batch 071 / 096 | loss: 0.44513 | err: 0.22535
batch 072 / 096 | loss: 0.44376 | err: 0.22569
batch 073 / 096 | loss: 0.44608 | err: 0.22774
batch 074 / 096 | loss: 0.44876 | err: 0.22804
batch 075 / 096 | loss: 0.44608 | err: 0.22667
batch 076 / 096 | loss: 0.44321 | err: 0.22533
batch 077 / 096 | loss: 0.44117 | err: 0.22403
batch 078 / 096 | loss: 0.44063 | err: 0.22276
batch 079 / 096 | loss: 0.43951 | err: 0.22152
batch 080 / 096 | loss: 0.44384 | err: 0.22344
batch 081 / 096 | loss: 0.44371 | err: 0.22377
batch 082 / 096 | loss: 0.44789 | err: 0.22713
batch 083 / 096 | loss: 0.44938 | err: 0.22892
batch 084 / 096 | loss: 0.44475 | err: 0.22619
batch 085 / 096 | loss: 0.44616 | err: 0.22794
batch 086 / 096 | loss: 0.44942 | err: 0.22965
batch 087 / 096 | loss: 0.45020 | err: 0.22989
batch 088 / 0

batch 046 / 096 | loss: 0.40894 | err: 0.22826
batch 047 / 096 | loss: 0.40792 | err: 0.22872
batch 048 / 096 | loss: 0.41466 | err: 0.23438
batch 049 / 096 | loss: 0.42247 | err: 0.23724
batch 050 / 096 | loss: 0.42923 | err: 0.24250
batch 051 / 096 | loss: 0.43368 | err: 0.24510
batch 052 / 096 | loss: 0.43023 | err: 0.24279
batch 053 / 096 | loss: 0.43678 | err: 0.24292
batch 054 / 096 | loss: 0.43599 | err: 0.24074
batch 055 / 096 | loss: 0.43499 | err: 0.24091
batch 056 / 096 | loss: 0.43759 | err: 0.24330
batch 057 / 096 | loss: 0.43815 | err: 0.24123
batch 058 / 096 | loss: 0.44126 | err: 0.24353
batch 059 / 096 | loss: 0.44141 | err: 0.24364
batch 060 / 096 | loss: 0.43793 | err: 0.23958
batch 061 / 096 | loss: 0.43620 | err: 0.23770
batch 062 / 096 | loss: 0.43925 | err: 0.23790
batch 063 / 096 | loss: 0.44773 | err: 0.24405
batch 064 / 096 | loss: 0.44663 | err: 0.24023
batch 065 / 096 | loss: 0.45420 | err: 0.24231
batch 066 / 096 | loss: 0.45349 | err: 0.24053
batch 067 / 0

batch 026 / 096 | loss: 0.41568 | err: 0.22596
batch 027 / 096 | loss: 0.41748 | err: 0.22222
batch 028 / 096 | loss: 0.42661 | err: 0.22768
batch 029 / 096 | loss: 0.43356 | err: 0.23276
batch 030 / 096 | loss: 0.42095 | err: 0.22500
batch 031 / 096 | loss: 0.41774 | err: 0.22177
batch 032 / 096 | loss: 0.43351 | err: 0.23047
batch 033 / 096 | loss: 0.43460 | err: 0.23106
batch 034 / 096 | loss: 0.43357 | err: 0.23162
batch 035 / 096 | loss: 0.42761 | err: 0.22857
batch 036 / 096 | loss: 0.43723 | err: 0.23611
batch 037 / 096 | loss: 0.43927 | err: 0.23649
batch 038 / 096 | loss: 0.45926 | err: 0.24671
batch 039 / 096 | loss: 0.46696 | err: 0.25000
batch 040 / 096 | loss: 0.47077 | err: 0.25000
batch 041 / 096 | loss: 0.46954 | err: 0.25000
batch 042 / 096 | loss: 0.46736 | err: 0.25000
batch 043 / 096 | loss: 0.46808 | err: 0.25000
batch 044 / 096 | loss: 0.46138 | err: 0.24716
batch 045 / 096 | loss: 0.45669 | err: 0.24444
batch 046 / 096 | loss: 0.45084 | err: 0.24185
batch 047 / 0

batch 004 / 048 | loss: 0.40645 | err: 0.21875
batch 005 / 048 | loss: 0.44288 | err: 0.25000
batch 006 / 048 | loss: 0.43559 | err: 0.25000
batch 007 / 048 | loss: 0.41899 | err: 0.24107
batch 008 / 048 | loss: 0.40705 | err: 0.23438
batch 009 / 048 | loss: 0.38270 | err: 0.21528
batch 010 / 048 | loss: 0.39610 | err: 0.22500
batch 011 / 048 | loss: 0.39189 | err: 0.22159
batch 012 / 048 | loss: 0.38281 | err: 0.21354
batch 013 / 048 | loss: 0.37714 | err: 0.20673
batch 014 / 048 | loss: 0.37139 | err: 0.20536
batch 015 / 048 | loss: 0.36262 | err: 0.20000
batch 016 / 048 | loss: 0.38403 | err: 0.20703
batch 017 / 048 | loss: 0.39423 | err: 0.21324
batch 018 / 048 | loss: 0.39874 | err: 0.21528
batch 019 / 048 | loss: 0.40433 | err: 0.22039
batch 020 / 048 | loss: 0.40057 | err: 0.21563
batch 021 / 048 | loss: 0.40570 | err: 0.21726
batch 022 / 048 | loss: 0.40793 | err: 0.22159
batch 023 / 048 | loss: 0.40171 | err: 0.21739
batch 024 / 048 | loss: 0.39507 | err: 0.21354
batch 025 / 0

batch 005 / 024 | loss: 0.40092 | err: 0.20000
batch 006 / 024 | loss: 0.36822 | err: 0.18229
batch 007 / 024 | loss: 0.34810 | err: 0.17411
batch 008 / 024 | loss: 0.33274 | err: 0.16797
batch 009 / 024 | loss: 0.33082 | err: 0.16667
batch 010 / 024 | loss: 0.35508 | err: 0.18125
batch 011 / 024 | loss: 0.37408 | err: 0.19034
batch 012 / 024 | loss: 0.38212 | err: 0.19271
batch 013 / 024 | loss: 0.40097 | err: 0.19952
batch 014 / 024 | loss: 0.40751 | err: 0.20536
batch 015 / 024 | loss: 0.40519 | err: 0.20417
batch 016 / 024 | loss: 0.39826 | err: 0.20117
batch 017 / 024 | loss: 0.39600 | err: 0.19853
batch 018 / 024 | loss: 0.39157 | err: 0.19618
batch 019 / 024 | loss: 0.38703 | err: 0.19408
batch 020 / 024 | loss: 0.39412 | err: 0.19687
batch 021 / 024 | loss: 0.39242 | err: 0.19643
batch 022 / 024 | loss: 0.39108 | err: 0.19602
batch 023 / 024 | loss: 0.38772 | err: 0.19429
batch 024 / 024 | loss: 0.39203 | err: 0.19792
model saved!
------------- epoch 002 / 005 | time: 015 sec |

batch 006 / 006 | loss: 0.61406 | err: 0.27214
model saved!
------------- epoch 001 / 005 | time: 009 sec | loss: 0.44625 | err: 0.21484
batch 001 / 006 | loss: 0.45017 | err: 0.22656
batch 002 / 006 | loss: 0.40215 | err: 0.21094
batch 003 / 006 | loss: 0.41924 | err: 0.21615
batch 004 / 006 | loss: 0.42575 | err: 0.21875
batch 005 / 006 | loss: 0.41902 | err: 0.21250
batch 006 / 006 | loss: 0.41467 | err: 0.21094
model saved!
------------- epoch 002 / 005 | time: 010 sec | loss: 0.33653 | err: 0.14844
batch 001 / 006 | loss: 0.33905 | err: 0.17188
batch 002 / 006 | loss: 0.34940 | err: 0.17188
batch 003 / 006 | loss: 0.31919 | err: 0.15365
batch 004 / 006 | loss: 0.35952 | err: 0.16992
batch 005 / 006 | loss: 0.37979 | err: 0.17969
batch 006 / 006 | loss: 0.37987 | err: 0.17969
model saved!
------------- epoch 003 / 005 | time: 010 sec | loss: 0.30664 | err: 0.13281
batch 001 / 006 | loss: 0.47525 | err: 0.21094
batch 002 / 006 | loss: 0.40489 | err: 0.18750
batch 003 / 006 | loss: 0