In [1]:
%matplotlib notebook
import cvxpy as cp
import dccp
import torch
import numpy as np
from cvxpylayers.torch import CvxpyLayer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn.metrics import zero_one_loss, confusion_matrix
from scipy.io import arff
import pandas as pd
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.utils import shuffle
import matplotlib.patches as mpatches
import json
import random
import math
import os, psutil
from datetime import datetime

torch.set_default_dtype(torch.float64)
torch.manual_seed(0)
np.random.seed(0)

TRAIN_SLOPE = 2
EVAL_SLOPE = 5
X_LOWER_BOUND = -10
X_UPPER_BOUND = 10
SEED = 2

# Utils

In [2]:
def split_data(X, Y, percentage):
    num_val = int(len(X)*percentage)
    return X[num_val:], Y[num_val:], X[:num_val], Y[:num_val]

def shuffle(X, Y):
    torch.manual_seed(0)
    np.random.seed(0)
    data = torch.cat((Y, X), 1)
    data = data[torch.randperm(data.size()[0])]
    X = data[:, 1:]
    Y = data[:, 0]
    return X, Y

def conf_mat(Y1, Y2):
    num_of_samples = len(Y1)
    mat = confusion_matrix(Y1, Y2, labels=[-1, 1])*100/num_of_samples
    acc = np.trace(mat)
    return mat, acc

def calc_accuracy(Y, Ypred):
    num = len(Y)
    temp = Y - Ypred
    acc = len(temp[temp == 0])*1./num
    return acc

# CCP classes

In [11]:
class CCP:
    def __init__(self, x_dim, batch_size, funcs, scale):
        self.f_derivative = funcs["f_derivative"]
        self.g = funcs["g"]
        self.c = funcs["c"]
        self.batch_size = batch_size
        
        self.x = cp.Variable((batch_size, x_dim))
        self.xt = cp.Parameter((batch_size, x_dim))
        self.r = cp.Parameter((batch_size, x_dim))
        self.w = cp.Parameter(x_dim)
        self.b = cp.Parameter(1)
        self.slope = cp.Parameter(1)

        target = cp.diag(self.x@(self.f_derivative(self.xt, self.w, self.b, self.slope).T))-self.g(self.x, self.w, self.b, self.slope)-self.c(self.x, self.r, x_dim, scale)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        self.prob = cp.Problem(cp.Maximize(cp.sum(target)), constraints)
        
    def ccp(self, r, num_iterations):
        """
        numpy to numpy
        """
        self.xt.value = r
        self.r.value = r
        result = self.prob.solve()
        diff = np.linalg.norm(self.xt.value - self.x.value)
        for i in range(num_iterations):
            self.xt.value = self.x.value
            result = self.prob.solve()
            diff = np.linalg.norm(self.x.value - self.xt.value)/self.batch_size
        return self.x.value
    
    def optimize_X(self, X, w, b, slope, num_iterations):
        """
        tensor to tensor
        """
        w = w.detach().numpy()
        b = b.detach().numpy()
        slope = np.full(1, slope)
        X = X.numpy()
        
        self.w.value = w
        self.b.value = b
        self.slope.value = slope
        return torch.from_numpy(self.ccp(X, num_iterations))
        # return torch.stack([torch.from_numpy(self.ccp(x)) for x in X])

  and should_run_async(code)


In [12]:
class DELTA():
    
    def __init__(self, x_dim, funcs, scale):
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.r = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.w = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.b = cp.Parameter(1, value = np.random.randn(1))
        self.f_der = cp.Parameter(x_dim, value = np.random.randn(x_dim))

        target = self.x@self.f_der-self.g(self.x, self.w, self.b, TRAIN_SLOPE)-self.c(self.x, self.r, x_dim, scale)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        objective = cp.Maximize(target)
        problem = cp.Problem(objective, constraints)
        self.layer = CvxpyLayer(problem, parameters=[self.r, self.w, self.b, self.f_der],
                                variables=[self.x])
        
    def optimize_X(self, X, w, b, F_DER):
        return self.layer(X, w, b, F_DER)[0]

# Gain & Cost functions

In [13]:
def score(x, w, b):
    return x@w + b

def f(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) + 1)]), 2)

def g(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) - 1)]), 2)

def c(x, r, x_dim, scale):
    return (scale)*cp.sum_squares(x-r)

def f_derivative(x, w, b, slope):
    return 0.5*cp.multiply(slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1)), w)
    
def f_batch(x, w, b, slope):
    return 0.5*cp.norm(cp.vstack([np.ones(x.shape[0]), (slope*score(x, w, b) + 1)]), 2, axis=0)

def g_batch(x, w, b, slope):
    return 0.5*cp.norm(cp.vstack([np.ones((1, x.shape[0])), cp.reshape((slope*score(x, w, b) - 1), (1, x.shape[0]))]), 2, axis=0)

def c_batch(x, r, x_dim, scale):
    return (scale)*cp.square(cp.norm(x-r, 2, axis=1))

def f_derivative_batch(x, w, b, slope):
    nablas = 0.5*slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1))
    return cp.reshape(nablas, (nablas.shape[0], 1))@cp.reshape(w, (1, x.shape[1]))

# Model

In [17]:
class MyStrategicModel(torch.nn.Module):
    def __init__(self, x_dim, batch_size, funcs, funcs_batch, train_slope, eval_slope, scale, strategic=False):
        torch.manual_seed(0)
        np.random.seed(0)

        super(MyStrategicModel, self).__init__()
        self.x_dim = x_dim
        self.batch_size = batch_size
        self.train_slope, self.eval_slope = train_slope, eval_slope
        self.w = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(x_dim, dtype=torch.float64, requires_grad=True)))
        self.b = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(1, dtype=torch.float64, requires_grad=True)))
#         self.w = torch.nn.parameter.Parameter(torch.zeros(x_dim, dtype=torch.float64, requires_grad=True))
#         self.b = torch.nn.parameter.Parameter(torch.zeros(1, dtype=torch.float64, requires_grad=True))
        self.strategic = strategic
        self.ccp = CCP(x_dim, batch_size, funcs_batch, scale)
        self.delta = DELTA(x_dim, funcs, scale)
        self.ccp_time = 0
        self.total_time = 0

    def forward(self, X, num_iterations, evaluation=False):
        if self.strategic:
            if evaluation:
                t1 = time.time()
                XT = self.ccp.optimize_X(X, self.w, self.b, self.eval_slope, num_iterations)
                self.ccp_time += time.time()-t1
                X_opt = XT
            else:
                t1 = time.time()
                XT = self.ccp.optimize_X(X, self.w, self.b, self.train_slope, num_iterations)
                self.ccp_time += time.time()-t1
                F_DER = self.get_f_ders(XT, self.train_slope)
                X_opt = self.delta.optimize_X(X, self.w, self.b, F_DER) # Xopt should be equal to XT but we do it again for the gradients
            output = self.score(X_opt)
        else:
            output = self.score(X)        
        return output
    
    def optimize_X(self, X, evaluation=False):
        slope = self.eval_slope if evaluation else self.train_slope
        return self.ccp.optimize_X(X, self.w, self.b, slope)
    
    def normalize_weights(self):
        with torch.no_grad():
            norm = torch.sqrt(torch.sum(self.w**2) + self.b**2)
            self.w /= norm
            self.b /= norm

    def score(self, x):
        return x@self.w + self.b
    
    def get_f_ders(self, XT, slope):
        # return torch.stack([0.5*slope*((slope*self.score(xt) + 1)/torch.sqrt((slope*self.score(xt) + 1)**2 + 1))*self.w for xt in XT])
        nablas = 0.5*slope*((slope*self.score(XT) + 1)/torch.sqrt((slope*self.score(XT) + 1)**2 + 1))
        return torch.reshape(nablas, (len(nablas), 1))@torch.reshape(self.w, (1, len(self.w)))

    def calc_accuracy(self, Y, Y_pred):
        Y_pred = torch.sign(Y_pred)
        num = len(Y)
        temp = Y - Y_pred
        acc = len(temp[temp == 0])*1./num        
        return acc
    
    def evaluate(self, X, Y):      
        return self.calc_accuracy(Y, self.forward(X, evaluation=True))
    
    def loss(self, Y, Y_pred):
        return torch.mean(torch.clamp(1 - Y_pred * Y, min=0))
    
    def save_model(self, train_errors, val_errors, train_losses, val_losses, info, path, comment=None):
        if comment is not None:
            path += "/" + comment
            
        filename = path + "/model.pt"
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
        torch.save(self.state_dict(), filename)
                
        pd.DataFrame(np.array(train_errors)).to_csv(path + '/train_errors.csv')
        pd.DataFrame(np.array(val_errors)).to_csv(path + '/val_errors.csv')
        pd.DataFrame(np.array(train_losses)).to_csv(path + '/train_losses.csv')
        pd.DataFrame(np.array(val_losses)).to_csv(path + '/val_losses.csv')
        
        with open(path + "/info.txt", "w") as f:
            f.write(info)
    
    def load_model(self, filename):
        self.load_state_dict(torch.load(filename))
        self.eval()
    
    def fit(self, path, X, Y, Xval, Yval, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False, callback=None, comment=None):
        train_dset = TensorDataset(X, Y)
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        test_dset = TensorDataset(Xval, Yval)
        test_loader = DataLoader(test_dset, batch_size=batch_size, shuffle=True)
        
        opt = opt(self.parameters(), **opt_kwargs)

        train_losses = []
        val_losses = []
        train_errors = []
        val_errors = []
        
        best_val_error = 1
        consecutive_no_improvement = 0

        total_time = time.time()
        for epoch in range(epochs):
            t1 = time.time()
            batch = 1
            train_losses.append([])
            train_errors.append([])
            for Xbatch, Ybatch in train_loader:
#                 try:
                opt.zero_grad()
                Ybatch_pred = self.forward(Xbatch, epoch + 1)
                l = self.loss(Ybatch, Ybatch_pred)
                l.backward()
                opt.step()
                train_losses[-1].append(l.item())
                with torch.no_grad():
                    e = self.calc_accuracy(Ybatch, Ybatch_pred)
                    train_errors[-1].append(1-e)
                if verbose:
                    print("batch %03d / %03d | loss: %3.5f | err: %3.5f" %
                          (batch, len(train_loader), np.mean(train_losses[-1]), np.mean(train_errors[-1])))
                batch += 1
                if callback is not None:
                    callback()
#                 except:
#                     print("failed")
                
            with torch.no_grad():
                total_loss = 0
                total_error = 0
                batch = 0
                for Xbatch, Ybatch in test_loader:
#                     try:
                    Yval_pred = self.forward(Xbatch, epoch + 1, evaluation=True)
                    val_loss = self.loss(Ybatch, Yval_pred).item()
                    total_loss += val_loss
                    val_error = 1-self.calc_accuracy(Ybatch, Yval_pred)
                    total_error += val_error
                    batch += 1
#                     except:
#                         print("failed")
                        
                avg_loss = total_loss/batch
                avg_error = total_error/batch
                val_losses.append(avg_loss)
                val_errors.append(avg_error)
                if avg_error < best_val_error:
                        consecutive_no_improvement = 0
                        best_val_error = avg_error
                        info = "training time in seconds: {}\nepoch: {}\nbatch size: {}\ntrain slope: {}\neval slope: {}\nlearning rate: {}\nvalidation loss: {}\nvalidation error: {}\n".format(
                        time.time()-total_time, epoch, batch_size, self.train_slope, self.eval_slope, opt_kwargs["lr"], avg_loss, avg_error)
                        self.save_model(train_errors, val_errors, train_losses, val_losses, info, path, comment)
                        print("model saved!")

                else:
                    consecutive_no_improvement += 1
                    if consecutive_no_improvement >= 4:
                        break
                    
            t2 = time.time()
            if verbose:
                print("------------- epoch %03d / %03d | time: %03d sec | loss: %3.5f | err: %3.5f" % (epoch + 1, epochs, t2-t1, val_losses[-1], val_errors[-1]))
        
        self.total_time = time.time()-total_time
        print("training time: {} seconds".format(self.total_time)) 
        return train_errors, val_errors, train_losses, val_losses

  and should_run_async(code)


In [18]:
def gen_sklearn_data(x_dim, N, informative_frac=1, shift_range=1, scale_range=1, noise_frac=0.01):
    torch.manual_seed(0)
    np.random.seed(0)
    n_informative = int(informative_frac*x_dim)
    n_redundant = x_dim - n_informative
    shift_arr = shift_range*np.random.randn(x_dim)
    scale_arr = scale_range*np.random.randn(x_dim)
    X, Y = make_classification(n_samples=N, n_features=x_dim, n_informative=n_informative, n_redundant=n_redundant,
                               flip_y=noise_frac, shift=shift_arr, scale=scale_arr, random_state=0)
    Y[Y == 0] = -1
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_spam_data():
    torch.manual_seed(0)
    np.random.seed(0)
    path = r"C:\Users\sagil\Desktop\nir_project\tip_spam_data\IS_journal_tip_spam.arff"
    data, meta = arff.loadarff(path)
    df = pd.DataFrame(data)
    most_disc = ['qTips_plc', 'rating_plc', 'qEmail_tip', 'qContacts_tip', 'qURL_tip', 'qPhone_tip', 'qNumeriChar_tip', 'sentistrength_tip', 'combined_tip', 'qWords_tip', 'followers_followees_gph', 'qunigram_avg_tip', 'qTips_usr', 'indeg_gph', 'qCapitalChar_tip', 'class1']
    df = df[most_disc]
    df["class1"].replace({b'spam': -1, b'notspam': 1}, inplace=True)
    df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    Y = df['class1'].values
    X = df.drop('class1', axis = 1).values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_card_fraud_data():
    torch.manual_seed(0)
    np.random.seed(0)
    df = pd.read_csv('C:/Users/sagil/Desktop/nir_project/card_fraud_dataset/creditcard.csv')

    rob_scaler = RobustScaler()

    df['scaled_amount'] = rob_scaler.fit_transform(df['Amount'].values.reshape(-1,1))
    df.drop(['Time','Amount'], axis=1, inplace=True)
    scaled_amount = df['scaled_amount']
    df.drop(['scaled_amount'], axis=1, inplace=True)
    df.insert(0, 'scaled_amount', scaled_amount)

    df["Class"].replace({1: -1, 0: 1}, inplace=True)
    df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    # amount of fraud classes 492 rows.
    fraud_df = df.loc[df['Class'] == -1]
    non_fraud_df = df.loc[df['Class'] == 1][:492]

    normal_distributed_df = pd.concat([fraud_df, non_fraud_df])

    # Shuffle dataframe rows
    df = normal_distributed_df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    Y = df['Class'].values
    X = df.drop('Class', axis = 1).values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_credit_default_data():
    torch.manual_seed(0)
    np.random.seed(0)
    url = 'https://raw.githubusercontent.com/ustunb/actionable-recourse/master/examples/paper/data/credit_processed.csv'
    df = pd.read_csv(url)
    df["NoDefaultNextMonth"].replace({0: -1}, inplace=True)
    df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    df = df.drop(['Married', 'Single', 'Age_lt_25', 'Age_in_25_to_40', 'Age_in_40_to_59', 'Age_geq_60'], axis = 1)

    fraud_df = df.loc[df["NoDefaultNextMonth"] == -1]
    non_fraud_df = df.loc[df["NoDefaultNextMonth"] == 1][:6636]

    normal_distributed_df = pd.concat([fraud_df, non_fraud_df])

    # Shuffle dataframe rows
    df = normal_distributed_df.sample(frac=1, random_state=SEED).reset_index(drop=True)
    
    scaler = StandardScaler()
    df.loc[:, df.columns != "NoDefaultNextMonth"] = scaler.fit_transform(df.drop("NoDefaultNextMonth", axis=1)) 
    Y, X = df.iloc[:, 0].values, df.iloc[:, 1:].values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_financial_distress_data():
    torch.manual_seed(0)
    np.random.seed(0)
    data = pd.read_csv("C:/Users/sagil/Desktop/nir_project/financial_distress_data/Financial Distress.csv")

    data = data[data.columns.drop(list(data.filter(regex='x80')))] # Since it is a categorical feature with 37 features.
    x_dim = len(data.columns) - 3
    data.drop(['Time'], axis=1, inplace=True)

    data_grouped = data.groupby(['Company']).last()

    scaler = StandardScaler()
    data_grouped.loc[:, data_grouped.columns != "Financial Distress"] = scaler.fit_transform(data_grouped.drop("Financial Distress", axis=1))

    # Shuffle dataframe rows
    data_grouped = data_grouped.sample(frac=1, random_state=SEED).reset_index(drop=True)

    Y, X = data_grouped.iloc[:, 0].values, data_grouped.iloc[:, 1:].values
    for y in range(0,len(Y)): # Coverting target variable from continuous to binary form
        if Y[y] < -0.5:
              Y[y] = -1
        else:
              Y[y] = 1
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

In [19]:
path = "C:/Users/sagil/Desktop/nir_project/models/runtime_varying_batch_size_dim5_inc_tol"
epochs = 5
x_dim = 5
scale = 1
X, Y = gen_sklearn_data(x_dim, 1024)
X, Y, Xval, Yval = split_data(X, Y, 0.25)
print(Xval.size())
print("percent of positive samples: {}%".format(100 * len(Y[Y == 1]) / len(Y)))

funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}
funcs_batch = {"f": f_batch, "g": g_batch, "f_derivative": f_derivative_batch, "c": c_batch, "score": score}

total = []
ccp = []
for batch_size in (2**np.arange(9)).tolist():
    strategic_model = MyStrategicModel(x_dim, batch_size, funcs, funcs_batch, TRAIN_SLOPE, EVAL_SLOPE, scale=scale, strategic=True)
    strategic_model.fit(path, X, Y, Xval, Yval,
                        opt=torch.optim.Adam, opt_kwargs={"lr": (1e-1)},
                        batch_size=batch_size, epochs=epochs, verbose=True,
                       comment="batched")
    
    total_time = strategic_model.total_time
    ccp_time = strategic_model.ccp_time
    total.append(total_time)
    ccp.append(ccp_time)
    pd.DataFrame(np.array(total)).to_csv(path + '/total_timing_results.csv')
    pd.DataFrame(np.array(ccp)).to_csv(path + '/ccp_timing_results.csv')

torch.Size([256, 5])
percent of positive samples: 50.78125%
batch 001 / 768 | loss: 0.30994 | err: 0.00000
batch 002 / 768 | loss: 0.67513 | err: 0.50000


This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

batch 003 / 768 | loss: 0.45008 | err: 0.33333
batch 004 / 768 | loss: 0.82082 | err: 0.50000
batch 005 / 768 | loss: 0.65666 | err: 0.40000
batch 006 / 768 | loss: 0.83206 | err: 0.50000
batch 007 / 768 | loss: 0.71320 | err: 0.42857
batch 008 / 768 | loss: 0.83747 | err: 0.50000
batch 009 / 768 | loss: 0.74442 | err: 0.44444
batch 010 / 768 | loss: 0.66998 | err: 0.40000
batch 011 / 768 | loss: 0.64063 | err: 0.36364
batch 012 / 768 | loss: 0.62015 | err: 0.33333
batch 013 / 768 | loss: 0.57244 | err: 0.30769
batch 014 / 768 | loss: 0.59444 | err: 0.28571
batch 015 / 768 | loss: 0.59189 | err: 0.26667
batch 016 / 768 | loss: 0.55490 | err: 0.25000
batch 017 / 768 | loss: 0.53747 | err: 0.23529
batch 018 / 768 | loss: 0.52719 | err: 0.22222
batch 019 / 768 | loss: 0.58482 | err: 0.26316
batch 020 / 768 | loss: 0.55557 | err: 0.25000
batch 021 / 768 | loss: 0.68482 | err: 0.28571
batch 022 / 768 | loss: 0.82438 | err: 0.31818
batch 023 / 768 | loss: 0.78854 | err: 0.30435
batch 024 / 7

batch 178 / 768 | loss: 0.46297 | err: 0.17978
batch 179 / 768 | loss: 0.46038 | err: 0.17877
batch 180 / 768 | loss: 0.45783 | err: 0.17778
batch 181 / 768 | loss: 0.45530 | err: 0.17680
batch 182 / 768 | loss: 0.45279 | err: 0.17582
batch 183 / 768 | loss: 0.45032 | err: 0.17486
batch 184 / 768 | loss: 0.44787 | err: 0.17391
batch 185 / 768 | loss: 0.44545 | err: 0.17297
batch 186 / 768 | loss: 0.44306 | err: 0.17204
batch 187 / 768 | loss: 0.44069 | err: 0.17112
batch 188 / 768 | loss: 0.43834 | err: 0.17021
batch 189 / 768 | loss: 0.43602 | err: 0.16931
batch 190 / 768 | loss: 0.43373 | err: 0.16842
batch 191 / 768 | loss: 0.43146 | err: 0.16754
batch 192 / 768 | loss: 0.42921 | err: 0.16667
batch 193 / 768 | loss: 0.42699 | err: 0.16580
batch 194 / 768 | loss: 0.43489 | err: 0.17010
batch 195 / 768 | loss: 0.44680 | err: 0.17436
batch 196 / 768 | loss: 0.44453 | err: 0.17347
batch 197 / 768 | loss: 0.44227 | err: 0.17259
batch 198 / 768 | loss: 0.46088 | err: 0.17677
batch 199 / 7

batch 354 / 768 | loss: 0.47683 | err: 0.18644
batch 355 / 768 | loss: 0.47548 | err: 0.18592
batch 356 / 768 | loss: 0.47415 | err: 0.18539
batch 357 / 768 | loss: 0.47282 | err: 0.18487
batch 358 / 768 | loss: 0.47150 | err: 0.18436
batch 359 / 768 | loss: 0.47018 | err: 0.18384
batch 360 / 768 | loss: 0.46899 | err: 0.18333
batch 361 / 768 | loss: 0.47267 | err: 0.18560
batch 362 / 768 | loss: 0.47137 | err: 0.18508
batch 363 / 768 | loss: 0.47007 | err: 0.18457
batch 364 / 768 | loss: 0.46878 | err: 0.18407
batch 365 / 768 | loss: 0.46828 | err: 0.18356
batch 366 / 768 | loss: 0.46700 | err: 0.18306
batch 367 / 768 | loss: 0.46573 | err: 0.18256
batch 368 / 768 | loss: 0.46446 | err: 0.18207
batch 369 / 768 | loss: 0.47282 | err: 0.18428
batch 370 / 768 | loss: 0.47155 | err: 0.18378
batch 371 / 768 | loss: 0.47028 | err: 0.18329
batch 372 / 768 | loss: 0.46901 | err: 0.18280
batch 373 / 768 | loss: 0.46775 | err: 0.18231
batch 374 / 768 | loss: 0.47154 | err: 0.18449
batch 375 / 7

  "Solution may be inaccurate. Try another solver, "


batch 413 / 768 | loss: 0.45478 | err: 0.18160
batch 414 / 768 | loss: 0.45368 | err: 0.18116
batch 415 / 768 | loss: 0.45259 | err: 0.18072
batch 416 / 768 | loss: 0.45916 | err: 0.18269
batch 417 / 768 | loss: 0.45805 | err: 0.18225
batch 418 / 768 | loss: 0.46103 | err: 0.18421
batch 419 / 768 | loss: 0.45993 | err: 0.18377
batch 420 / 768 | loss: 0.45883 | err: 0.18333
batch 421 / 768 | loss: 0.46226 | err: 0.18527
batch 422 / 768 | loss: 0.46117 | err: 0.18483
batch 423 / 768 | loss: 0.46008 | err: 0.18440
batch 424 / 768 | loss: 0.45899 | err: 0.18396
batch 425 / 768 | loss: 0.45791 | err: 0.18353
batch 426 / 768 | loss: 0.45684 | err: 0.18310
batch 427 / 768 | loss: 0.45577 | err: 0.18267
batch 428 / 768 | loss: 0.45470 | err: 0.18224
batch 429 / 768 | loss: 0.45686 | err: 0.18415
batch 430 / 768 | loss: 0.46378 | err: 0.18605
batch 431 / 768 | loss: 0.46271 | err: 0.18561
batch 432 / 768 | loss: 0.46164 | err: 0.18519
batch 433 / 768 | loss: 0.46057 | err: 0.18476
batch 434 / 7

batch 590 / 768 | loss: 0.42558 | err: 0.17288
batch 591 / 768 | loss: 0.42486 | err: 0.17259
batch 592 / 768 | loss: 0.42415 | err: 0.17230
batch 593 / 768 | loss: 0.42343 | err: 0.17201
batch 594 / 768 | loss: 0.42272 | err: 0.17172
batch 595 / 768 | loss: 0.42201 | err: 0.17143
batch 596 / 768 | loss: 0.42130 | err: 0.17114
batch 597 / 768 | loss: 0.42661 | err: 0.17253
batch 598 / 768 | loss: 0.42590 | err: 0.17224
batch 599 / 768 | loss: 0.42519 | err: 0.17195
batch 600 / 768 | loss: 0.42448 | err: 0.17167
batch 601 / 768 | loss: 0.42377 | err: 0.17138
batch 602 / 768 | loss: 0.42320 | err: 0.17110
batch 603 / 768 | loss: 0.42249 | err: 0.17081
batch 604 / 768 | loss: 0.42179 | err: 0.17053
batch 605 / 768 | loss: 0.42110 | err: 0.17025
batch 606 / 768 | loss: 0.42176 | err: 0.16997
batch 607 / 768 | loss: 0.42106 | err: 0.16969
batch 608 / 768 | loss: 0.42037 | err: 0.16941
batch 609 / 768 | loss: 0.41968 | err: 0.16913
batch 610 / 768 | loss: 0.41899 | err: 0.16885
batch 611 / 7

batch 766 / 768 | loss: 0.43819 | err: 0.17363
batch 767 / 768 | loss: 0.43762 | err: 0.17340
batch 768 / 768 | loss: 0.43705 | err: 0.17318
model saved!
------------- epoch 001 / 005 | time: 061 sec | loss: 0.54729 | err: 0.19922
batch 001 / 768 | loss: 0.00000 | err: 0.00000
batch 002 / 768 | loss: 0.00000 | err: 0.00000
batch 003 / 768 | loss: 0.00000 | err: 0.00000
batch 004 / 768 | loss: 0.00000 | err: 0.00000
batch 005 / 768 | loss: 0.46064 | err: 0.20000
batch 006 / 768 | loss: 0.38387 | err: 0.16667
batch 007 / 768 | loss: 0.32903 | err: 0.14286
batch 008 / 768 | loss: 0.28790 | err: 0.12500
batch 009 / 768 | loss: 0.25591 | err: 0.11111
batch 010 / 768 | loss: 0.42032 | err: 0.20000
batch 011 / 768 | loss: 0.59988 | err: 0.27273
batch 012 / 768 | loss: 0.56236 | err: 0.25000
batch 013 / 768 | loss: 0.51910 | err: 0.23077
batch 014 / 768 | loss: 0.48202 | err: 0.21429
batch 015 / 768 | loss: 0.44989 | err: 0.20000
batch 016 / 768 | loss: 0.43641 | err: 0.18750
batch 017 / 768 |

batch 172 / 768 | loss: 0.39934 | err: 0.16279
batch 173 / 768 | loss: 0.39703 | err: 0.16185
batch 174 / 768 | loss: 0.40665 | err: 0.16667
batch 175 / 768 | loss: 0.40433 | err: 0.16571
batch 176 / 768 | loss: 0.40203 | err: 0.16477
batch 177 / 768 | loss: 0.41876 | err: 0.16949
batch 178 / 768 | loss: 0.41641 | err: 0.16854
batch 179 / 768 | loss: 0.41408 | err: 0.16760
batch 180 / 768 | loss: 0.41178 | err: 0.16667
batch 181 / 768 | loss: 0.40951 | err: 0.16575
batch 182 / 768 | loss: 0.40726 | err: 0.16484
batch 183 / 768 | loss: 0.40623 | err: 0.16393
batch 184 / 768 | loss: 0.40402 | err: 0.16304
batch 185 / 768 | loss: 0.40238 | err: 0.16216
batch 186 / 768 | loss: 0.40022 | err: 0.16129
batch 187 / 768 | loss: 0.39808 | err: 0.16043
batch 188 / 768 | loss: 0.39596 | err: 0.15957
batch 189 / 768 | loss: 0.39386 | err: 0.15873
batch 190 / 768 | loss: 0.39305 | err: 0.15789
batch 191 / 768 | loss: 0.39099 | err: 0.15707
batch 192 / 768 | loss: 0.38895 | err: 0.15625
batch 193 / 7

batch 347 / 768 | loss: 0.38354 | err: 0.16715
batch 348 / 768 | loss: 0.38244 | err: 0.16667
batch 349 / 768 | loss: 0.38134 | err: 0.16619
batch 350 / 768 | loss: 0.38025 | err: 0.16571
batch 351 / 768 | loss: 0.37917 | err: 0.16524
batch 352 / 768 | loss: 0.37809 | err: 0.16477
batch 353 / 768 | loss: 0.37702 | err: 0.16431
batch 354 / 768 | loss: 0.37595 | err: 0.16384
batch 355 / 768 | loss: 0.37490 | err: 0.16338
batch 356 / 768 | loss: 0.37384 | err: 0.16292
batch 357 / 768 | loss: 0.37806 | err: 0.16527
batch 358 / 768 | loss: 0.38419 | err: 0.16760
batch 359 / 768 | loss: 0.39224 | err: 0.16992
batch 360 / 768 | loss: 0.39179 | err: 0.16944
batch 361 / 768 | loss: 0.39627 | err: 0.17175
batch 362 / 768 | loss: 0.39517 | err: 0.17127
batch 363 / 768 | loss: 0.40494 | err: 0.17355
batch 364 / 768 | loss: 0.40421 | err: 0.17308
batch 365 / 768 | loss: 0.40310 | err: 0.17260
batch 366 / 768 | loss: 0.40223 | err: 0.17213
batch 367 / 768 | loss: 0.40114 | err: 0.17166
batch 368 / 7

batch 523 / 768 | loss: 0.41930 | err: 0.17017
batch 524 / 768 | loss: 0.41850 | err: 0.16985
batch 525 / 768 | loss: 0.41814 | err: 0.16952
batch 526 / 768 | loss: 0.41734 | err: 0.16920
batch 527 / 768 | loss: 0.42133 | err: 0.17078
batch 528 / 768 | loss: 0.42054 | err: 0.17045
batch 529 / 768 | loss: 0.41974 | err: 0.17013
batch 530 / 768 | loss: 0.41895 | err: 0.16981
batch 531 / 768 | loss: 0.41816 | err: 0.16949
batch 532 / 768 | loss: 0.41737 | err: 0.16917
batch 533 / 768 | loss: 0.41661 | err: 0.16886
batch 534 / 768 | loss: 0.41583 | err: 0.16854
batch 535 / 768 | loss: 0.41836 | err: 0.17009
batch 536 / 768 | loss: 0.41758 | err: 0.16978
batch 537 / 768 | loss: 0.41747 | err: 0.16946
batch 538 / 768 | loss: 0.41901 | err: 0.17100
batch 539 / 768 | loss: 0.41877 | err: 0.17069
batch 540 / 768 | loss: 0.41799 | err: 0.17037
batch 541 / 768 | loss: 0.41722 | err: 0.17006
batch 542 / 768 | loss: 0.41645 | err: 0.16974
batch 543 / 768 | loss: 0.41976 | err: 0.17127
batch 544 / 7

batch 698 / 768 | loss: 0.41084 | err: 0.16762
batch 699 / 768 | loss: 0.41025 | err: 0.16738
batch 700 / 768 | loss: 0.40966 | err: 0.16714
batch 701 / 768 | loss: 0.40908 | err: 0.16690
batch 702 / 768 | loss: 0.41425 | err: 0.16809
batch 703 / 768 | loss: 0.41366 | err: 0.16785
batch 704 / 768 | loss: 0.41308 | err: 0.16761
batch 705 / 768 | loss: 0.41249 | err: 0.16738
batch 706 / 768 | loss: 0.41191 | err: 0.16714
batch 707 / 768 | loss: 0.41132 | err: 0.16690
batch 708 / 768 | loss: 0.41074 | err: 0.16667
batch 709 / 768 | loss: 0.41016 | err: 0.16643
batch 710 / 768 | loss: 0.40959 | err: 0.16620
batch 711 / 768 | loss: 0.40901 | err: 0.16596
batch 712 / 768 | loss: 0.40857 | err: 0.16573
batch 713 / 768 | loss: 0.40800 | err: 0.16550
batch 714 / 768 | loss: 0.40742 | err: 0.16527
batch 715 / 768 | loss: 0.40691 | err: 0.16503
batch 716 / 768 | loss: 0.40634 | err: 0.16480
batch 717 / 768 | loss: 0.40577 | err: 0.16457
batch 718 / 768 | loss: 0.40521 | err: 0.16435
batch 719 / 7

batch 102 / 768 | loss: 0.29746 | err: 0.12745
batch 103 / 768 | loss: 0.29458 | err: 0.12621
batch 104 / 768 | loss: 0.29174 | err: 0.12500
batch 105 / 768 | loss: 0.30593 | err: 0.13333
batch 106 / 768 | loss: 0.32042 | err: 0.14151
batch 107 / 768 | loss: 0.31742 | err: 0.14019
batch 108 / 768 | loss: 0.31448 | err: 0.13889
batch 109 / 768 | loss: 0.31216 | err: 0.13761
batch 110 / 768 | loss: 0.30932 | err: 0.13636
batch 111 / 768 | loss: 0.30653 | err: 0.13514
batch 112 / 768 | loss: 0.30380 | err: 0.13393
batch 113 / 768 | loss: 0.30111 | err: 0.13274
batch 114 / 768 | loss: 0.29847 | err: 0.13158
batch 115 / 768 | loss: 0.29587 | err: 0.13043
batch 116 / 768 | loss: 0.29332 | err: 0.12931
batch 117 / 768 | loss: 0.29081 | err: 0.12821
batch 118 / 768 | loss: 0.30453 | err: 0.13559
batch 119 / 768 | loss: 0.30197 | err: 0.13445
batch 120 / 768 | loss: 0.29945 | err: 0.13333
batch 121 / 768 | loss: 0.29698 | err: 0.13223
batch 122 / 768 | loss: 0.32203 | err: 0.13934
batch 123 / 7

batch 278 / 768 | loss: 0.33373 | err: 0.14388
batch 279 / 768 | loss: 0.33253 | err: 0.14337
batch 280 / 768 | loss: 0.33135 | err: 0.14286
batch 281 / 768 | loss: 0.33017 | err: 0.14235
batch 282 / 768 | loss: 0.32900 | err: 0.14184
batch 283 / 768 | loss: 0.32882 | err: 0.14134
batch 284 / 768 | loss: 0.32798 | err: 0.14085
batch 285 / 768 | loss: 0.32683 | err: 0.14035
batch 286 / 768 | loss: 0.32569 | err: 0.13986
batch 287 / 768 | loss: 0.32455 | err: 0.13937
batch 288 / 768 | loss: 0.32343 | err: 0.13889
batch 289 / 768 | loss: 0.32231 | err: 0.13841
batch 290 / 768 | loss: 0.32119 | err: 0.13793
batch 291 / 768 | loss: 0.32175 | err: 0.13746
batch 292 / 768 | loss: 0.32065 | err: 0.13699
batch 293 / 768 | loss: 0.31956 | err: 0.13652
batch 294 / 768 | loss: 0.31847 | err: 0.13605
batch 295 / 768 | loss: 0.32338 | err: 0.13898
batch 296 / 768 | loss: 0.32229 | err: 0.13851
batch 297 / 768 | loss: 0.32121 | err: 0.13805
batch 298 / 768 | loss: 0.32013 | err: 0.13758
batch 299 / 7

batch 455 / 768 | loss: 0.37254 | err: 0.15604
batch 456 / 768 | loss: 0.37172 | err: 0.15570
batch 457 / 768 | loss: 0.37444 | err: 0.15755
batch 458 / 768 | loss: 0.37362 | err: 0.15721
batch 459 / 768 | loss: 0.37352 | err: 0.15686
batch 460 / 768 | loss: 0.37626 | err: 0.15870
batch 461 / 768 | loss: 0.37892 | err: 0.16052
batch 462 / 768 | loss: 0.37810 | err: 0.16017
batch 463 / 768 | loss: 0.37791 | err: 0.15983
batch 464 / 768 | loss: 0.37788 | err: 0.15948
batch 465 / 768 | loss: 0.37707 | err: 0.15914
batch 466 / 768 | loss: 0.37626 | err: 0.15880
batch 467 / 768 | loss: 0.37546 | err: 0.15846
batch 468 / 768 | loss: 0.37465 | err: 0.15812
batch 469 / 768 | loss: 0.37386 | err: 0.15778
batch 470 / 768 | loss: 0.37306 | err: 0.15745
batch 471 / 768 | loss: 0.37554 | err: 0.15924
batch 472 / 768 | loss: 0.37936 | err: 0.16102
batch 473 / 768 | loss: 0.37856 | err: 0.16068
batch 474 / 768 | loss: 0.37825 | err: 0.16034
batch 475 / 768 | loss: 0.37745 | err: 0.16000
batch 476 / 7

batch 631 / 768 | loss: 0.41836 | err: 0.18067
batch 632 / 768 | loss: 0.41770 | err: 0.18038
batch 633 / 768 | loss: 0.42208 | err: 0.18167
batch 634 / 768 | loss: 0.42266 | err: 0.18139
batch 635 / 768 | loss: 0.42488 | err: 0.18268
batch 636 / 768 | loss: 0.42446 | err: 0.18239
batch 637 / 768 | loss: 0.42408 | err: 0.18210
batch 638 / 768 | loss: 0.42375 | err: 0.18182
batch 639 / 768 | loss: 0.42342 | err: 0.18153
batch 640 / 768 | loss: 0.42279 | err: 0.18125
batch 641 / 768 | loss: 0.42213 | err: 0.18097
batch 642 / 768 | loss: 0.42147 | err: 0.18069
batch 643 / 768 | loss: 0.42097 | err: 0.18040
batch 644 / 768 | loss: 0.42379 | err: 0.18168
batch 645 / 768 | loss: 0.42313 | err: 0.18140
batch 646 / 768 | loss: 0.42248 | err: 0.18111
batch 647 / 768 | loss: 0.42183 | err: 0.18083
batch 648 / 768 | loss: 0.42118 | err: 0.18056
batch 649 / 768 | loss: 0.42053 | err: 0.18028
batch 650 / 768 | loss: 0.42428 | err: 0.18154
batch 651 / 768 | loss: 0.42363 | err: 0.18126
batch 652 / 7

batch 037 / 768 | loss: 0.18253 | err: 0.08108
batch 038 / 768 | loss: 0.28285 | err: 0.10526
batch 039 / 768 | loss: 0.37337 | err: 0.12821
batch 040 / 768 | loss: 0.36404 | err: 0.12500
batch 041 / 768 | loss: 0.35516 | err: 0.12195
batch 042 / 768 | loss: 0.34670 | err: 0.11905
batch 043 / 768 | loss: 0.33864 | err: 0.11628
batch 044 / 768 | loss: 0.33094 | err: 0.11364
batch 045 / 768 | loss: 0.32359 | err: 0.11111
batch 046 / 768 | loss: 0.31655 | err: 0.10870
batch 047 / 768 | loss: 0.43334 | err: 0.12766
batch 048 / 768 | loss: 0.50763 | err: 0.14583
batch 049 / 768 | loss: 0.49885 | err: 0.14286
batch 050 / 768 | loss: 0.48887 | err: 0.14000
batch 051 / 768 | loss: 0.47928 | err: 0.13725
batch 052 / 768 | loss: 0.47007 | err: 0.13462
batch 053 / 768 | loss: 0.46120 | err: 0.13208
batch 054 / 768 | loss: 0.45266 | err: 0.12963
batch 055 / 768 | loss: 0.44443 | err: 0.12727
batch 056 / 768 | loss: 0.43649 | err: 0.12500
batch 057 / 768 | loss: 0.42883 | err: 0.12281
batch 058 / 7

batch 213 / 768 | loss: 0.49595 | err: 0.16901
batch 214 / 768 | loss: 0.50249 | err: 0.17290
batch 215 / 768 | loss: 0.50015 | err: 0.17209
batch 216 / 768 | loss: 0.50710 | err: 0.17593
batch 217 / 768 | loss: 0.50477 | err: 0.17512
batch 218 / 768 | loss: 0.50245 | err: 0.17431
batch 219 / 768 | loss: 0.50016 | err: 0.17352
batch 220 / 768 | loss: 0.49788 | err: 0.17273
batch 221 / 768 | loss: 0.50558 | err: 0.17647
batch 222 / 768 | loss: 0.50330 | err: 0.17568
batch 223 / 768 | loss: 0.50104 | err: 0.17489
batch 224 / 768 | loss: 0.50673 | err: 0.17857
batch 225 / 768 | loss: 0.50448 | err: 0.17778
batch 226 / 768 | loss: 0.51183 | err: 0.18142
batch 227 / 768 | loss: 0.50958 | err: 0.18062
batch 228 / 768 | loss: 0.50734 | err: 0.17982
batch 229 / 768 | loss: 0.50513 | err: 0.17904
batch 230 / 768 | loss: 0.50293 | err: 0.17826
batch 231 / 768 | loss: 0.50075 | err: 0.17749
batch 232 / 768 | loss: 0.49859 | err: 0.17672
batch 233 / 768 | loss: 0.50511 | err: 0.18026
batch 234 / 7

batch 389 / 768 | loss: 0.50706 | err: 0.19023
batch 390 / 768 | loss: 0.50576 | err: 0.18974
batch 391 / 768 | loss: 0.50447 | err: 0.18926
batch 392 / 768 | loss: 0.51441 | err: 0.19133
batch 393 / 768 | loss: 0.51344 | err: 0.19084
batch 394 / 768 | loss: 0.51213 | err: 0.19036
batch 395 / 768 | loss: 0.51084 | err: 0.18987
batch 396 / 768 | loss: 0.50955 | err: 0.18939
batch 397 / 768 | loss: 0.50866 | err: 0.18892
batch 398 / 768 | loss: 0.50739 | err: 0.18844
batch 399 / 768 | loss: 0.51873 | err: 0.19048
batch 400 / 768 | loss: 0.52428 | err: 0.19250
batch 401 / 768 | loss: 0.52298 | err: 0.19202
batch 402 / 768 | loss: 0.52182 | err: 0.19154
batch 403 / 768 | loss: 0.52104 | err: 0.19107
batch 404 / 768 | loss: 0.51983 | err: 0.19059
batch 405 / 768 | loss: 0.52388 | err: 0.19259
batch 406 / 768 | loss: 0.52259 | err: 0.19212
batch 407 / 768 | loss: 0.52130 | err: 0.19165
batch 408 / 768 | loss: 0.52002 | err: 0.19118
batch 409 / 768 | loss: 0.51924 | err: 0.19071
batch 410 / 7

batch 565 / 768 | loss: 0.48511 | err: 0.18938
batch 566 / 768 | loss: 0.48426 | err: 0.18905
batch 567 / 768 | loss: 0.48340 | err: 0.18871
batch 568 / 768 | loss: 0.48255 | err: 0.18838
batch 569 / 768 | loss: 0.48170 | err: 0.18805
batch 570 / 768 | loss: 0.48086 | err: 0.18772
batch 571 / 768 | loss: 0.48026 | err: 0.18739
batch 572 / 768 | loss: 0.47942 | err: 0.18706
batch 573 / 768 | loss: 0.47858 | err: 0.18674
batch 574 / 768 | loss: 0.47775 | err: 0.18641
batch 575 / 768 | loss: 0.47723 | err: 0.18609
batch 576 / 768 | loss: 0.47640 | err: 0.18576
batch 577 / 768 | loss: 0.47557 | err: 0.18544
batch 578 / 768 | loss: 0.47754 | err: 0.18685
batch 579 / 768 | loss: 0.47671 | err: 0.18653
batch 580 / 768 | loss: 0.47589 | err: 0.18621
batch 581 / 768 | loss: 0.47507 | err: 0.18589
batch 582 / 768 | loss: 0.47425 | err: 0.18557
batch 583 / 768 | loss: 0.47344 | err: 0.18525
batch 584 / 768 | loss: 0.47263 | err: 0.18493
batch 585 / 768 | loss: 0.47182 | err: 0.18462
batch 586 / 7

batch 741 / 768 | loss: 0.44508 | err: 0.18219
batch 742 / 768 | loss: 0.44448 | err: 0.18194
batch 743 / 768 | loss: 0.44623 | err: 0.18304
batch 744 / 768 | loss: 0.45005 | err: 0.18414
batch 745 / 768 | loss: 0.44944 | err: 0.18389
batch 746 / 768 | loss: 0.44884 | err: 0.18365
batch 747 / 768 | loss: 0.44824 | err: 0.18340
batch 748 / 768 | loss: 0.44764 | err: 0.18316
batch 749 / 768 | loss: 0.44927 | err: 0.18425
batch 750 / 768 | loss: 0.45084 | err: 0.18533
batch 751 / 768 | loss: 0.45024 | err: 0.18509
batch 752 / 768 | loss: 0.44964 | err: 0.18484
batch 753 / 768 | loss: 0.44920 | err: 0.18459
batch 754 / 768 | loss: 0.44860 | err: 0.18435
batch 755 / 768 | loss: 0.44801 | err: 0.18411
batch 756 / 768 | loss: 0.44742 | err: 0.18386
batch 757 / 768 | loss: 0.44683 | err: 0.18362
batch 758 / 768 | loss: 0.44624 | err: 0.18338
batch 759 / 768 | loss: 0.44582 | err: 0.18314
batch 760 / 768 | loss: 0.44523 | err: 0.18289
batch 761 / 768 | loss: 0.44711 | err: 0.18397
batch 762 / 7

batch 146 / 768 | loss: 0.28444 | err: 0.13699
batch 147 / 768 | loss: 0.28251 | err: 0.13605
batch 148 / 768 | loss: 0.28060 | err: 0.13514
batch 149 / 768 | loss: 0.27949 | err: 0.13423
batch 150 / 768 | loss: 0.30250 | err: 0.14000
batch 151 / 768 | loss: 0.32657 | err: 0.14570
batch 152 / 768 | loss: 0.32442 | err: 0.14474
batch 153 / 768 | loss: 0.32331 | err: 0.14379
batch 154 / 768 | loss: 0.32121 | err: 0.14286
batch 155 / 768 | loss: 0.33017 | err: 0.14839
batch 156 / 768 | loss: 0.32805 | err: 0.14744
batch 157 / 768 | loss: 0.32596 | err: 0.14650
batch 158 / 768 | loss: 0.32390 | err: 0.14557
batch 159 / 768 | loss: 0.32186 | err: 0.14465
batch 160 / 768 | loss: 0.31985 | err: 0.14375
batch 161 / 768 | loss: 0.31786 | err: 0.14286
batch 162 / 768 | loss: 0.31590 | err: 0.14198
batch 163 / 768 | loss: 0.32780 | err: 0.14724
batch 164 / 768 | loss: 0.32580 | err: 0.14634
batch 165 / 768 | loss: 0.32383 | err: 0.14545
batch 166 / 768 | loss: 0.32188 | err: 0.14458
batch 167 / 7

batch 322 / 768 | loss: 0.47306 | err: 0.19255
batch 323 / 768 | loss: 0.47250 | err: 0.19195
batch 324 / 768 | loss: 0.47104 | err: 0.19136
batch 325 / 768 | loss: 0.47006 | err: 0.19077
batch 326 / 768 | loss: 0.46861 | err: 0.19018
batch 327 / 768 | loss: 0.46718 | err: 0.18960
batch 328 / 768 | loss: 0.47109 | err: 0.19207
batch 329 / 768 | loss: 0.46966 | err: 0.19149
batch 330 / 768 | loss: 0.46823 | err: 0.19091
batch 331 / 768 | loss: 0.47163 | err: 0.19335
batch 332 / 768 | loss: 0.47072 | err: 0.19277
batch 333 / 768 | loss: 0.46930 | err: 0.19219
batch 334 / 768 | loss: 0.46790 | err: 0.19162
batch 335 / 768 | loss: 0.46650 | err: 0.19104
batch 336 / 768 | loss: 0.46976 | err: 0.19345
batch 337 / 768 | loss: 0.47437 | err: 0.19585
batch 338 / 768 | loss: 0.47296 | err: 0.19527
batch 339 / 768 | loss: 0.47157 | err: 0.19469
batch 340 / 768 | loss: 0.47143 | err: 0.19412
batch 341 / 768 | loss: 0.47809 | err: 0.19648
batch 342 / 768 | loss: 0.47669 | err: 0.19591
batch 343 / 7

batch 498 / 768 | loss: 0.49532 | err: 0.21486
batch 499 / 768 | loss: 0.49444 | err: 0.21443
batch 500 / 768 | loss: 0.49345 | err: 0.21400
batch 501 / 768 | loss: 0.49247 | err: 0.21357
batch 502 / 768 | loss: 0.49288 | err: 0.21315
batch 503 / 768 | loss: 0.49190 | err: 0.21272
batch 504 / 768 | loss: 0.49093 | err: 0.21230
batch 505 / 768 | loss: 0.49330 | err: 0.21386
batch 506 / 768 | loss: 0.49233 | err: 0.21344
batch 507 / 768 | loss: 0.49198 | err: 0.21302
batch 508 / 768 | loss: 0.49509 | err: 0.21457
batch 509 / 768 | loss: 0.49411 | err: 0.21415
batch 510 / 768 | loss: 0.49315 | err: 0.21373
batch 511 / 768 | loss: 0.49312 | err: 0.21331
batch 512 / 768 | loss: 0.49242 | err: 0.21289
batch 513 / 768 | loss: 0.49146 | err: 0.21248
batch 514 / 768 | loss: 0.49095 | err: 0.21206
batch 515 / 768 | loss: 0.48999 | err: 0.21165
batch 516 / 768 | loss: 0.48904 | err: 0.21124
batch 517 / 768 | loss: 0.48810 | err: 0.21083
batch 518 / 768 | loss: 0.48900 | err: 0.21042
batch 519 / 7

batch 673 / 768 | loss: 0.47216 | err: 0.20802
batch 674 / 768 | loss: 0.47424 | err: 0.20920
batch 675 / 768 | loss: 0.47393 | err: 0.20889
batch 676 / 768 | loss: 0.47611 | err: 0.21006
batch 677 / 768 | loss: 0.47541 | err: 0.20975
batch 678 / 768 | loss: 0.47471 | err: 0.20944
batch 679 / 768 | loss: 0.47682 | err: 0.21060
batch 680 / 768 | loss: 0.47612 | err: 0.21029
batch 681 / 768 | loss: 0.47542 | err: 0.20999
batch 682 / 768 | loss: 0.47817 | err: 0.21114
batch 683 / 768 | loss: 0.47747 | err: 0.21083
batch 684 / 768 | loss: 0.47677 | err: 0.21053
batch 685 / 768 | loss: 0.47608 | err: 0.21022
batch 686 / 768 | loss: 0.47538 | err: 0.20991
batch 687 / 768 | loss: 0.47469 | err: 0.20961
batch 688 / 768 | loss: 0.47400 | err: 0.20930
batch 689 / 768 | loss: 0.47331 | err: 0.20900
batch 690 / 768 | loss: 0.47263 | err: 0.20870
batch 691 / 768 | loss: 0.47260 | err: 0.20839
batch 692 / 768 | loss: 0.47192 | err: 0.20809
batch 693 / 768 | loss: 0.47123 | err: 0.20779
batch 694 / 7

batch 078 / 384 | loss: 0.51762 | err: 0.21795
batch 079 / 384 | loss: 0.51349 | err: 0.21519
batch 080 / 384 | loss: 0.50707 | err: 0.21250
batch 081 / 384 | loss: 0.50146 | err: 0.20988
batch 082 / 384 | loss: 0.49587 | err: 0.20732
batch 083 / 384 | loss: 0.48990 | err: 0.20482
batch 084 / 384 | loss: 0.48407 | err: 0.20238
batch 085 / 384 | loss: 0.48921 | err: 0.20588
batch 086 / 384 | loss: 0.48352 | err: 0.20349
batch 087 / 384 | loss: 0.47797 | err: 0.20115
batch 088 / 384 | loss: 0.47253 | err: 0.19886
batch 089 / 384 | loss: 0.46837 | err: 0.19663
batch 090 / 384 | loss: 0.46316 | err: 0.19444
batch 091 / 384 | loss: 0.45807 | err: 0.19231
batch 092 / 384 | loss: 0.45309 | err: 0.19022
batch 093 / 384 | loss: 0.44822 | err: 0.18817
batch 094 / 384 | loss: 0.44345 | err: 0.18617
batch 095 / 384 | loss: 0.43879 | err: 0.18421
batch 096 / 384 | loss: 0.43421 | err: 0.18229
batch 097 / 384 | loss: 0.43915 | err: 0.18557
batch 098 / 384 | loss: 0.43578 | err: 0.18367
batch 099 / 3

batch 254 / 384 | loss: 0.39812 | err: 0.17323
batch 255 / 384 | loss: 0.40041 | err: 0.17451
batch 256 / 384 | loss: 0.39885 | err: 0.17383
batch 257 / 384 | loss: 0.39730 | err: 0.17315
batch 258 / 384 | loss: 0.39641 | err: 0.17248
batch 259 / 384 | loss: 0.39488 | err: 0.17181
batch 260 / 384 | loss: 0.39985 | err: 0.17308
batch 261 / 384 | loss: 0.39832 | err: 0.17241
batch 262 / 384 | loss: 0.39680 | err: 0.17176
batch 263 / 384 | loss: 0.39530 | err: 0.17110
batch 264 / 384 | loss: 0.39380 | err: 0.17045
batch 265 / 384 | loss: 0.39232 | err: 0.16981
batch 266 / 384 | loss: 0.39111 | err: 0.16917
batch 267 / 384 | loss: 0.38965 | err: 0.16854
batch 268 / 384 | loss: 0.38855 | err: 0.16791
batch 269 / 384 | loss: 0.38745 | err: 0.16729
batch 270 / 384 | loss: 0.38601 | err: 0.16667
batch 271 / 384 | loss: 0.38459 | err: 0.16605
batch 272 / 384 | loss: 0.38638 | err: 0.16728
batch 273 / 384 | loss: 0.38515 | err: 0.16667
batch 274 / 384 | loss: 0.38374 | err: 0.16606
batch 275 / 3

batch 044 / 384 | loss: 0.37667 | err: 0.15909
batch 045 / 384 | loss: 0.40147 | err: 0.16667
batch 046 / 384 | loss: 0.42285 | err: 0.17391
batch 047 / 384 | loss: 0.41385 | err: 0.17021
batch 048 / 384 | loss: 0.40565 | err: 0.16667
batch 049 / 384 | loss: 0.39895 | err: 0.16327
batch 050 / 384 | loss: 0.39098 | err: 0.16000
batch 051 / 384 | loss: 0.40569 | err: 0.16667
batch 052 / 384 | loss: 0.39925 | err: 0.16346
batch 053 / 384 | loss: 0.39385 | err: 0.16038
batch 054 / 384 | loss: 0.38656 | err: 0.15741
batch 055 / 384 | loss: 0.39540 | err: 0.16364
batch 056 / 384 | loss: 0.39254 | err: 0.16071
batch 057 / 384 | loss: 0.38565 | err: 0.15789
batch 058 / 384 | loss: 0.37900 | err: 0.15517
batch 059 / 384 | loss: 0.37258 | err: 0.15254
batch 060 / 384 | loss: 0.36637 | err: 0.15000
batch 061 / 384 | loss: 0.36383 | err: 0.14754
batch 062 / 384 | loss: 0.35796 | err: 0.14516
batch 063 / 384 | loss: 0.35313 | err: 0.14286
batch 064 / 384 | loss: 0.36287 | err: 0.14844
batch 065 / 3

batch 219 / 384 | loss: 0.42677 | err: 0.16895
batch 220 / 384 | loss: 0.42587 | err: 0.16818
batch 221 / 384 | loss: 0.42491 | err: 0.16742
batch 222 / 384 | loss: 0.42343 | err: 0.16667
batch 223 / 384 | loss: 0.42201 | err: 0.16592
batch 224 / 384 | loss: 0.42380 | err: 0.16741
batch 225 / 384 | loss: 0.42347 | err: 0.16667
batch 226 / 384 | loss: 0.42159 | err: 0.16593
batch 227 / 384 | loss: 0.41974 | err: 0.16520
batch 228 / 384 | loss: 0.41789 | err: 0.16447
batch 229 / 384 | loss: 0.41628 | err: 0.16376
batch 230 / 384 | loss: 0.41797 | err: 0.16522
batch 231 / 384 | loss: 0.41936 | err: 0.16667
batch 232 / 384 | loss: 0.41827 | err: 0.16595
batch 233 / 384 | loss: 0.41988 | err: 0.16738
batch 234 / 384 | loss: 0.41809 | err: 0.16667
batch 235 / 384 | loss: 0.41995 | err: 0.16809
batch 236 / 384 | loss: 0.41817 | err: 0.16737
batch 237 / 384 | loss: 0.41671 | err: 0.16667
batch 238 / 384 | loss: 0.41495 | err: 0.16597
batch 239 / 384 | loss: 0.41933 | err: 0.16736
batch 240 / 3

batch 008 / 384 | loss: 0.59254 | err: 0.25000
batch 009 / 384 | loss: 0.52670 | err: 0.22222
batch 010 / 384 | loss: 0.47995 | err: 0.20000
batch 011 / 384 | loss: 0.51545 | err: 0.22727
batch 012 / 384 | loss: 0.48068 | err: 0.20833
batch 013 / 384 | loss: 0.44370 | err: 0.19231
batch 014 / 384 | loss: 0.41201 | err: 0.17857
batch 015 / 384 | loss: 0.43437 | err: 0.20000
batch 016 / 384 | loss: 0.41391 | err: 0.18750
batch 017 / 384 | loss: 0.38956 | err: 0.17647
batch 018 / 384 | loss: 0.39816 | err: 0.19444
batch 019 / 384 | loss: 0.41588 | err: 0.21053
batch 020 / 384 | loss: 0.39965 | err: 0.20000
batch 021 / 384 | loss: 0.38062 | err: 0.19048
batch 022 / 384 | loss: 0.36332 | err: 0.18182
batch 023 / 384 | loss: 0.34752 | err: 0.17391
batch 024 / 384 | loss: 0.37167 | err: 0.18750
batch 025 / 384 | loss: 0.36568 | err: 0.18000
batch 026 / 384 | loss: 0.35161 | err: 0.17308
batch 027 / 384 | loss: 0.33859 | err: 0.16667
batch 028 / 384 | loss: 0.32650 | err: 0.16071
batch 029 / 3

batch 184 / 384 | loss: 0.32318 | err: 0.14402
batch 185 / 384 | loss: 0.32196 | err: 0.14324
batch 186 / 384 | loss: 0.32023 | err: 0.14247
batch 187 / 384 | loss: 0.31852 | err: 0.14171
batch 188 / 384 | loss: 0.31738 | err: 0.14096
batch 189 / 384 | loss: 0.31570 | err: 0.14021
batch 190 / 384 | loss: 0.31892 | err: 0.14211
batch 191 / 384 | loss: 0.31725 | err: 0.14136
batch 192 / 384 | loss: 0.32053 | err: 0.14323
batch 193 / 384 | loss: 0.31910 | err: 0.14249
batch 194 / 384 | loss: 0.32328 | err: 0.14433
batch 195 / 384 | loss: 0.33294 | err: 0.14615
batch 196 / 384 | loss: 0.33584 | err: 0.14796
batch 197 / 384 | loss: 0.33910 | err: 0.14975
batch 198 / 384 | loss: 0.34203 | err: 0.15152
batch 199 / 384 | loss: 0.34031 | err: 0.15075
batch 200 / 384 | loss: 0.33861 | err: 0.15000
batch 201 / 384 | loss: 0.34611 | err: 0.15174
batch 202 / 384 | loss: 0.34440 | err: 0.15099
batch 203 / 384 | loss: 0.34282 | err: 0.15025
batch 204 / 384 | loss: 0.34114 | err: 0.14951
batch 205 / 3

batch 360 / 384 | loss: 0.38670 | err: 0.16528
batch 361 / 384 | loss: 0.38563 | err: 0.16482
batch 362 / 384 | loss: 0.38456 | err: 0.16436
batch 363 / 384 | loss: 0.38366 | err: 0.16391
batch 364 / 384 | loss: 0.38643 | err: 0.16484
batch 365 / 384 | loss: 0.38538 | err: 0.16438
batch 366 / 384 | loss: 0.38691 | err: 0.16530
batch 367 / 384 | loss: 0.38586 | err: 0.16485
batch 368 / 384 | loss: 0.38494 | err: 0.16440
batch 369 / 384 | loss: 0.38411 | err: 0.16396
batch 370 / 384 | loss: 0.38307 | err: 0.16351
batch 371 / 384 | loss: 0.38204 | err: 0.16307
batch 372 / 384 | loss: 0.38101 | err: 0.16263
batch 373 / 384 | loss: 0.37999 | err: 0.16220
batch 374 / 384 | loss: 0.37897 | err: 0.16176
batch 375 / 384 | loss: 0.37814 | err: 0.16133
batch 376 / 384 | loss: 0.37713 | err: 0.16090
batch 377 / 384 | loss: 0.37613 | err: 0.16048
batch 378 / 384 | loss: 0.37804 | err: 0.16138
batch 379 / 384 | loss: 0.37704 | err: 0.16095
batch 380 / 384 | loss: 0.37605 | err: 0.16053
batch 381 / 3

batch 148 / 384 | loss: 0.44009 | err: 0.18243
batch 149 / 384 | loss: 0.45122 | err: 0.18792
batch 150 / 384 | loss: 0.44821 | err: 0.18667
batch 151 / 384 | loss: 0.44524 | err: 0.18543
batch 152 / 384 | loss: 0.44231 | err: 0.18421
batch 153 / 384 | loss: 0.44061 | err: 0.18301
batch 154 / 384 | loss: 0.45080 | err: 0.18506
batch 155 / 384 | loss: 0.45422 | err: 0.18710
batch 156 / 384 | loss: 0.45215 | err: 0.18590
batch 157 / 384 | loss: 0.45083 | err: 0.18471
batch 158 / 384 | loss: 0.44797 | err: 0.18354
batch 159 / 384 | loss: 0.44516 | err: 0.18239
batch 160 / 384 | loss: 0.44237 | err: 0.18125
batch 161 / 384 | loss: 0.43963 | err: 0.18012
batch 162 / 384 | loss: 0.43691 | err: 0.17901
batch 163 / 384 | loss: 0.44552 | err: 0.18098
batch 164 / 384 | loss: 0.45242 | err: 0.18293
batch 165 / 384 | loss: 0.45459 | err: 0.18485
batch 166 / 384 | loss: 0.45267 | err: 0.18373
batch 167 / 384 | loss: 0.45991 | err: 0.18563
batch 168 / 384 | loss: 0.46810 | err: 0.18750
batch 169 / 3

batch 322 / 384 | loss: 0.44465 | err: 0.19255
batch 323 / 384 | loss: 0.44671 | err: 0.19350
batch 324 / 384 | loss: 0.44533 | err: 0.19290
batch 325 / 384 | loss: 0.44396 | err: 0.19231
batch 326 / 384 | loss: 0.44260 | err: 0.19172
batch 327 / 384 | loss: 0.44179 | err: 0.19113
batch 328 / 384 | loss: 0.44230 | err: 0.19207
batch 329 / 384 | loss: 0.44108 | err: 0.19149
batch 330 / 384 | loss: 0.44208 | err: 0.19242
batch 331 / 384 | loss: 0.44308 | err: 0.19335
batch 332 / 384 | loss: 0.44220 | err: 0.19277
batch 333 / 384 | loss: 0.44113 | err: 0.19219
batch 334 / 384 | loss: 0.44039 | err: 0.19162
batch 335 / 384 | loss: 0.43907 | err: 0.19104
batch 336 / 384 | loss: 0.43777 | err: 0.19048
batch 337 / 384 | loss: 0.43647 | err: 0.18991
batch 338 / 384 | loss: 0.43524 | err: 0.18935
batch 339 / 384 | loss: 0.43415 | err: 0.18879
batch 340 / 384 | loss: 0.43569 | err: 0.18971
batch 341 / 384 | loss: 0.43441 | err: 0.18915
batch 342 / 384 | loss: 0.43314 | err: 0.18860
batch 343 / 3

batch 111 / 384 | loss: 0.33636 | err: 0.14414
batch 112 / 384 | loss: 0.33496 | err: 0.14286
batch 113 / 384 | loss: 0.33989 | err: 0.14602
batch 114 / 384 | loss: 0.34433 | err: 0.14912
batch 115 / 384 | loss: 0.34134 | err: 0.14783
batch 116 / 384 | loss: 0.33953 | err: 0.14655
batch 117 / 384 | loss: 0.33663 | err: 0.14530
batch 118 / 384 | loss: 0.33377 | err: 0.14407
batch 119 / 384 | loss: 0.33142 | err: 0.14286
batch 120 / 384 | loss: 0.33000 | err: 0.14167
batch 121 / 384 | loss: 0.32847 | err: 0.14050
batch 122 / 384 | loss: 0.32578 | err: 0.13934
batch 123 / 384 | loss: 0.32376 | err: 0.13821
batch 124 / 384 | loss: 0.33968 | err: 0.14516
batch 125 / 384 | loss: 0.33706 | err: 0.14400
batch 126 / 384 | loss: 0.33438 | err: 0.14286
batch 127 / 384 | loss: 0.34878 | err: 0.14961
batch 128 / 384 | loss: 0.35555 | err: 0.15234
batch 129 / 384 | loss: 0.36169 | err: 0.15504
batch 130 / 384 | loss: 0.35891 | err: 0.15385
batch 131 / 384 | loss: 0.35617 | err: 0.15267
batch 132 / 3

batch 286 / 384 | loss: 0.42954 | err: 0.18007
batch 287 / 384 | loss: 0.42860 | err: 0.17944
batch 288 / 384 | loss: 0.42711 | err: 0.17882
batch 289 / 384 | loss: 0.42595 | err: 0.17820
batch 290 / 384 | loss: 0.42448 | err: 0.17759
batch 291 / 384 | loss: 0.43089 | err: 0.18041
batch 292 / 384 | loss: 0.42942 | err: 0.17979
batch 293 / 384 | loss: 0.42837 | err: 0.17918
batch 294 / 384 | loss: 0.42692 | err: 0.17857
batch 295 / 384 | loss: 0.42547 | err: 0.17797
batch 296 / 384 | loss: 0.42403 | err: 0.17736
batch 297 / 384 | loss: 0.42260 | err: 0.17677
batch 298 / 384 | loss: 0.42390 | err: 0.17785
batch 299 / 384 | loss: 0.42248 | err: 0.17726
batch 300 / 384 | loss: 0.42108 | err: 0.17667
batch 301 / 384 | loss: 0.42043 | err: 0.17608
batch 302 / 384 | loss: 0.41909 | err: 0.17550
batch 303 / 384 | loss: 0.41774 | err: 0.17492
batch 304 / 384 | loss: 0.41636 | err: 0.17434
batch 305 / 384 | loss: 0.41500 | err: 0.17377
batch 306 / 384 | loss: 0.41364 | err: 0.17320
batch 307 / 3

batch 074 / 192 | loss: 0.43827 | err: 0.20270
batch 075 / 192 | loss: 0.44545 | err: 0.20667
batch 076 / 192 | loss: 0.44751 | err: 0.20724
batch 077 / 192 | loss: 0.44251 | err: 0.20455
batch 078 / 192 | loss: 0.44153 | err: 0.20513
batch 079 / 192 | loss: 0.44796 | err: 0.20570
batch 080 / 192 | loss: 0.44268 | err: 0.20312
batch 081 / 192 | loss: 0.44860 | err: 0.20370
batch 082 / 192 | loss: 0.44313 | err: 0.20122
batch 083 / 192 | loss: 0.44785 | err: 0.20181
batch 084 / 192 | loss: 0.45115 | err: 0.20238
batch 085 / 192 | loss: 0.45399 | err: 0.20294
batch 086 / 192 | loss: 0.44956 | err: 0.20058
batch 087 / 192 | loss: 0.44517 | err: 0.19828
batch 088 / 192 | loss: 0.44101 | err: 0.19602
batch 089 / 192 | loss: 0.43620 | err: 0.19382
batch 090 / 192 | loss: 0.43855 | err: 0.19444
batch 091 / 192 | loss: 0.43826 | err: 0.19505
batch 092 / 192 | loss: 0.43381 | err: 0.19293
batch 093 / 192 | loss: 0.43472 | err: 0.19355
batch 094 / 192 | loss: 0.43563 | err: 0.19415
batch 095 / 1

batch 055 / 192 | loss: 0.31508 | err: 0.12727
batch 056 / 192 | loss: 0.31912 | err: 0.12946
batch 057 / 192 | loss: 0.32270 | err: 0.13158
batch 058 / 192 | loss: 0.31713 | err: 0.12931
batch 059 / 192 | loss: 0.31230 | err: 0.12712
batch 060 / 192 | loss: 0.31659 | err: 0.12917
batch 061 / 192 | loss: 0.31140 | err: 0.12705
batch 062 / 192 | loss: 0.30637 | err: 0.12500
batch 063 / 192 | loss: 0.30151 | err: 0.12302
batch 064 / 192 | loss: 0.29685 | err: 0.12109
batch 065 / 192 | loss: 0.29951 | err: 0.12308
batch 066 / 192 | loss: 0.29498 | err: 0.12121
batch 067 / 192 | loss: 0.29989 | err: 0.12313
batch 068 / 192 | loss: 0.29690 | err: 0.12132
batch 069 / 192 | loss: 0.29260 | err: 0.11957
batch 070 / 192 | loss: 0.28866 | err: 0.11786
batch 071 / 192 | loss: 0.28459 | err: 0.11620
batch 072 / 192 | loss: 0.28891 | err: 0.11806
batch 073 / 192 | loss: 0.28895 | err: 0.11986
batch 074 / 192 | loss: 0.29628 | err: 0.12162
batch 075 / 192 | loss: 0.31224 | err: 0.13000
batch 076 / 1

batch 036 / 192 | loss: 0.27625 | err: 0.12500
batch 037 / 192 | loss: 0.27186 | err: 0.12162
batch 038 / 192 | loss: 0.26470 | err: 0.11842
batch 039 / 192 | loss: 0.29353 | err: 0.12821
batch 040 / 192 | loss: 0.30343 | err: 0.13125
batch 041 / 192 | loss: 0.30885 | err: 0.13415
batch 042 / 192 | loss: 0.31329 | err: 0.13690
batch 043 / 192 | loss: 0.30651 | err: 0.13372
batch 044 / 192 | loss: 0.29955 | err: 0.13068
batch 045 / 192 | loss: 0.30306 | err: 0.13333
batch 046 / 192 | loss: 0.29647 | err: 0.13043
batch 047 / 192 | loss: 0.29204 | err: 0.12766
batch 048 / 192 | loss: 0.28839 | err: 0.12500
batch 049 / 192 | loss: 0.29311 | err: 0.12755
batch 050 / 192 | loss: 0.29932 | err: 0.13000
batch 051 / 192 | loss: 0.29442 | err: 0.12745
batch 052 / 192 | loss: 0.29020 | err: 0.12500
batch 053 / 192 | loss: 0.28473 | err: 0.12264
batch 054 / 192 | loss: 0.27945 | err: 0.12037
batch 055 / 192 | loss: 0.27437 | err: 0.11818
batch 056 / 192 | loss: 0.28485 | err: 0.12054
batch 057 / 1

batch 017 / 192 | loss: 0.47530 | err: 0.16176
batch 018 / 192 | loss: 0.52231 | err: 0.18056
batch 019 / 192 | loss: 0.49482 | err: 0.17105
batch 020 / 192 | loss: 0.49475 | err: 0.17500
batch 021 / 192 | loss: 0.49345 | err: 0.17857
batch 022 / 192 | loss: 0.47102 | err: 0.17045
batch 023 / 192 | loss: 0.49078 | err: 0.17391
batch 024 / 192 | loss: 0.52060 | err: 0.18750
batch 025 / 192 | loss: 0.50051 | err: 0.18000
batch 026 / 192 | loss: 0.51697 | err: 0.18269
batch 027 / 192 | loss: 0.51260 | err: 0.18519
batch 028 / 192 | loss: 0.53177 | err: 0.18750
batch 029 / 192 | loss: 0.53251 | err: 0.18966
batch 030 / 192 | loss: 0.53172 | err: 0.19167
batch 031 / 192 | loss: 0.51457 | err: 0.18548
batch 032 / 192 | loss: 0.51259 | err: 0.18750
batch 033 / 192 | loss: 0.49921 | err: 0.18182
batch 034 / 192 | loss: 0.51546 | err: 0.19118
batch 035 / 192 | loss: 0.50280 | err: 0.18571
batch 036 / 192 | loss: 0.51917 | err: 0.19444
batch 037 / 192 | loss: 0.52816 | err: 0.20270
batch 038 / 1

batch 192 / 192 | loss: 0.40686 | err: 0.18359
------------- epoch 004 / 005 | time: 045 sec | loss: 0.31998 | err: 0.14844
batch 001 / 192 | loss: 0.03611 | err: 0.00000
batch 002 / 192 | loss: 0.01805 | err: 0.00000
batch 003 / 192 | loss: 0.16171 | err: 0.08333
batch 004 / 192 | loss: 0.12128 | err: 0.06250
batch 005 / 192 | loss: 0.33525 | err: 0.20000
batch 006 / 192 | loss: 0.34191 | err: 0.20833
batch 007 / 192 | loss: 0.38015 | err: 0.21429
batch 008 / 192 | loss: 0.38001 | err: 0.21875
batch 009 / 192 | loss: 0.35474 | err: 0.19444
batch 010 / 192 | loss: 0.33511 | err: 0.17500
batch 011 / 192 | loss: 0.36461 | err: 0.18182
batch 012 / 192 | loss: 0.40736 | err: 0.20833
batch 013 / 192 | loss: 0.40960 | err: 0.21154
batch 014 / 192 | loss: 0.41661 | err: 0.21429
batch 015 / 192 | loss: 0.39398 | err: 0.20000
batch 016 / 192 | loss: 0.37331 | err: 0.18750
batch 017 / 192 | loss: 0.36364 | err: 0.17647
batch 018 / 192 | loss: 0.38621 | err: 0.18056
batch 019 / 192 | loss: 0.3658

batch 174 / 192 | loss: 0.43270 | err: 0.21408
batch 175 / 192 | loss: 0.43294 | err: 0.21429
batch 176 / 192 | loss: 0.43334 | err: 0.21449
batch 177 / 192 | loss: 0.43117 | err: 0.21328
batch 178 / 192 | loss: 0.42874 | err: 0.21208
batch 179 / 192 | loss: 0.42637 | err: 0.21089
batch 180 / 192 | loss: 0.42477 | err: 0.20972
batch 181 / 192 | loss: 0.42242 | err: 0.20856
batch 182 / 192 | loss: 0.42010 | err: 0.20742
batch 183 / 192 | loss: 0.41781 | err: 0.20628
batch 184 / 192 | loss: 0.41554 | err: 0.20516
batch 185 / 192 | loss: 0.41536 | err: 0.20541
batch 186 / 192 | loss: 0.41711 | err: 0.20699
batch 187 / 192 | loss: 0.41488 | err: 0.20588
batch 188 / 192 | loss: 0.41682 | err: 0.20612
batch 189 / 192 | loss: 0.41461 | err: 0.20503
batch 190 / 192 | loss: 0.41352 | err: 0.20395
batch 191 / 192 | loss: 0.41136 | err: 0.20288
batch 192 / 192 | loss: 0.41096 | err: 0.20312
------------- epoch 005 / 005 | time: 051 sec | loss: 0.34847 | err: 0.16016
training time: 210.31788659095

batch 056 / 096 | loss: 0.34860 | err: 0.14955
batch 057 / 096 | loss: 0.34348 | err: 0.14693
batch 058 / 096 | loss: 0.34592 | err: 0.14871
batch 059 / 096 | loss: 0.34529 | err: 0.14831
batch 060 / 096 | loss: 0.34338 | err: 0.14792
batch 061 / 096 | loss: 0.33868 | err: 0.14549
batch 062 / 096 | loss: 0.33404 | err: 0.14315
batch 063 / 096 | loss: 0.33267 | err: 0.14286
batch 064 / 096 | loss: 0.32967 | err: 0.14258
batch 065 / 096 | loss: 0.32491 | err: 0.14038
batch 066 / 096 | loss: 0.32872 | err: 0.14015
batch 067 / 096 | loss: 0.32382 | err: 0.13806
batch 068 / 096 | loss: 0.32409 | err: 0.13787
batch 069 / 096 | loss: 0.32459 | err: 0.13768
batch 070 / 096 | loss: 0.32378 | err: 0.13750
batch 071 / 096 | loss: 0.31984 | err: 0.13556
batch 072 / 096 | loss: 0.31965 | err: 0.13542
batch 073 / 096 | loss: 0.31865 | err: 0.13527
batch 074 / 096 | loss: 0.31686 | err: 0.13514
batch 075 / 096 | loss: 0.31610 | err: 0.13500
batch 076 / 096 | loss: 0.31578 | err: 0.13487
batch 077 / 0

batch 036 / 096 | loss: 0.40548 | err: 0.16319
batch 037 / 096 | loss: 0.40270 | err: 0.16216
batch 038 / 096 | loss: 0.39899 | err: 0.16118
batch 039 / 096 | loss: 0.40440 | err: 0.16346
batch 040 / 096 | loss: 0.40470 | err: 0.16250
batch 041 / 096 | loss: 0.40411 | err: 0.16159
batch 042 / 096 | loss: 0.40988 | err: 0.16369
batch 043 / 096 | loss: 0.40154 | err: 0.15988
batch 044 / 096 | loss: 0.41061 | err: 0.16477
batch 045 / 096 | loss: 0.40767 | err: 0.16389
batch 046 / 096 | loss: 0.40972 | err: 0.16576
batch 047 / 096 | loss: 0.40627 | err: 0.16489
batch 048 / 096 | loss: 0.39983 | err: 0.16146
batch 049 / 096 | loss: 0.40019 | err: 0.16071
batch 050 / 096 | loss: 0.40116 | err: 0.16000
batch 051 / 096 | loss: 0.39946 | err: 0.15931
batch 052 / 096 | loss: 0.39564 | err: 0.15865
batch 053 / 096 | loss: 0.38897 | err: 0.15566
batch 054 / 096 | loss: 0.39500 | err: 0.15972
batch 055 / 096 | loss: 0.39268 | err: 0.15909
batch 056 / 096 | loss: 0.39038 | err: 0.15848
batch 057 / 0

batch 014 / 048 | loss: 0.51916 | err: 0.21429
batch 015 / 048 | loss: 0.50771 | err: 0.21250
batch 016 / 048 | loss: 0.50447 | err: 0.21094
batch 017 / 048 | loss: 0.50262 | err: 0.21324
batch 018 / 048 | loss: 0.51997 | err: 0.22222
batch 019 / 048 | loss: 0.52110 | err: 0.22368
batch 020 / 048 | loss: 0.50132 | err: 0.21563
batch 021 / 048 | loss: 0.50289 | err: 0.21726
batch 022 / 048 | loss: 0.48294 | err: 0.20739
batch 023 / 048 | loss: 0.46876 | err: 0.20109
batch 024 / 048 | loss: 0.46463 | err: 0.20052
batch 025 / 048 | loss: 0.45642 | err: 0.19750
batch 026 / 048 | loss: 0.45730 | err: 0.19952
batch 027 / 048 | loss: 0.45187 | err: 0.19676
batch 028 / 048 | loss: 0.44599 | err: 0.19420
batch 029 / 048 | loss: 0.44349 | err: 0.19397
batch 030 / 048 | loss: 0.44664 | err: 0.19583
batch 031 / 048 | loss: 0.43808 | err: 0.19153
batch 032 / 048 | loss: 0.42877 | err: 0.18750
batch 033 / 048 | loss: 0.42291 | err: 0.18561
batch 034 / 048 | loss: 0.41682 | err: 0.18382
batch 035 / 0

batch 039 / 048 | loss: 0.36760 | err: 0.17147
batch 040 / 048 | loss: 0.36766 | err: 0.17188
batch 041 / 048 | loss: 0.36361 | err: 0.16921
batch 042 / 048 | loss: 0.35836 | err: 0.16667
batch 043 / 048 | loss: 0.35450 | err: 0.16424
batch 044 / 048 | loss: 0.34792 | err: 0.16051
batch 045 / 048 | loss: 0.35456 | err: 0.16528
batch 046 / 048 | loss: 0.35035 | err: 0.16304
batch 047 / 048 | loss: 0.35062 | err: 0.16356
batch 048 / 048 | loss: 0.35380 | err: 0.16536
model saved!
------------- epoch 004 / 005 | time: 014 sec | loss: 0.29047 | err: 0.12109
batch 001 / 048 | loss: 0.11386 | err: 0.06250
batch 002 / 048 | loss: 0.22415 | err: 0.12500
batch 003 / 048 | loss: 0.31535 | err: 0.14583
batch 004 / 048 | loss: 0.28079 | err: 0.12500
batch 005 / 048 | loss: 0.26621 | err: 0.11250
batch 006 / 048 | loss: 0.26049 | err: 0.11458
batch 007 / 048 | loss: 0.29431 | err: 0.13393
batch 008 / 048 | loss: 0.27799 | err: 0.12500
batch 009 / 048 | loss: 0.27133 | err: 0.12500
batch 010 / 048 |

batch 010 / 024 | loss: 0.39575 | err: 0.18750
batch 011 / 024 | loss: 0.37883 | err: 0.17898
batch 012 / 024 | loss: 0.36751 | err: 0.17448
batch 013 / 024 | loss: 0.38158 | err: 0.17788
batch 014 / 024 | loss: 0.38854 | err: 0.18304
batch 015 / 024 | loss: 0.39924 | err: 0.18750
batch 016 / 024 | loss: 0.39320 | err: 0.18359
batch 017 / 024 | loss: 0.37872 | err: 0.17647
batch 018 / 024 | loss: 0.38654 | err: 0.18056
batch 019 / 024 | loss: 0.38055 | err: 0.17763
batch 020 / 024 | loss: 0.38565 | err: 0.18125
batch 021 / 024 | loss: 0.38220 | err: 0.17857
batch 022 / 024 | loss: 0.37928 | err: 0.17756
batch 023 / 024 | loss: 0.36368 | err: 0.16984
batch 024 / 024 | loss: 0.35873 | err: 0.16667
------------- epoch 005 / 005 | time: 011 sec | loss: 0.31026 | err: 0.12891
training time: 48.17788481712341 seconds
batch 001 / 012 | loss: 0.79272 | err: 0.28125
batch 002 / 012 | loss: 0.73181 | err: 0.28125
batch 003 / 012 | loss: 0.72112 | err: 0.27604
batch 004 / 012 | loss: 0.69099 | er