In [96]:
%matplotlib notebook
import cvxpy as cp
import dccp
import torch
import numpy as np
from cvxpylayers.torch import CvxpyLayer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn.metrics import zero_one_loss, confusion_matrix
from scipy.io import arff
import pandas as pd
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.utils import shuffle
import matplotlib.patches as mpatches
import json
import random
import math
import os, psutil
from datetime import datetime

torch.set_default_dtype(torch.float64)
torch.manual_seed(0)
np.random.seed(0)

TRAIN_SLOPE = 2
EVAL_SLOPE = 5
X_LOWER_BOUND = -10
X_UPPER_BOUND = 10
SEED = 2

# Utils

In [97]:
def split_data(X, Y, percentage):
    num_val = int(len(X)*percentage)
    return X[num_val:], Y[num_val:], X[:num_val], Y[:num_val]

def shuffle(X, Y):
    torch.manual_seed(0)
    np.random.seed(0)
    data = torch.cat((Y, X), 1)
    data = data[torch.randperm(data.size()[0])]
    X = data[:, 1:]
    Y = data[:, 0]
    return X, Y

def conf_mat(Y1, Y2):
    num_of_samples = len(Y1)
    mat = confusion_matrix(Y1, Y2, labels=[-1, 1])*100/num_of_samples
    acc = np.trace(mat)
    return mat, acc

def calc_accuracy(Y, Ypred):
    num = len(Y)
    temp = Y - Ypred
    acc = len(temp[temp == 0])*1./num
    return acc

# CCP classes

In [98]:
class CCP:
    def __init__(self, x_dim, batch_size, funcs, scale):
        self.f_derivative = funcs["f_derivative"]
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable((batch_size, x_dim))
        self.xt = cp.Parameter((batch_size, x_dim))
        self.r = cp.Parameter((batch_size, x_dim))
        self.w = cp.Parameter(x_dim)
        self.b = cp.Parameter(1)
        self.slope = cp.Parameter(1)

        target = cp.diag(self.x@(self.f_derivative(self.xt, self.w, self.b, self.slope).T))-self.g(self.x, self.w, self.b, self.slope)-self.c(self.x, self.r, x_dim, scale)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        self.prob = cp.Problem(cp.Maximize(cp.sum(target)), constraints)
        
    def ccp(self, r):
        """
        numpy to numpy
        """
        self.xt.value = r
        self.r.value = r
        result = self.prob.solve()
        diff = np.linalg.norm(self.xt.value - self.x.value)
        cnt = 0
        while diff > 0.001 and cnt < 100:
            cnt += 1
            self.xt.value = self.x.value
            result = self.prob.solve()
            diff = np.linalg.norm(self.x.value - self.xt.value)
        return self.x.value
    
    def optimize_X(self, X, w, b, slope):
        """
        tensor to tensor
        """
        w = w.detach().numpy()
        b = b.detach().numpy()
        slope = np.full(1, slope)
        X = X.numpy()
        
        self.w.value = w
        self.b.value = b
        self.slope.value = slope
        return torch.from_numpy(self.ccp(X))
        # return torch.stack([torch.from_numpy(self.ccp(x)) for x in X])

In [99]:
class DELTA():
    
    def __init__(self, x_dim, funcs, scale):
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.r = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.w = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.b = cp.Parameter(1, value = np.random.randn(1))
        self.f_der = cp.Parameter(x_dim, value = np.random.randn(x_dim))

        target = self.x@self.f_der-self.g(self.x, self.w, self.b, TRAIN_SLOPE)-self.c(self.x, self.r, x_dim, scale)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        objective = cp.Maximize(target)
        problem = cp.Problem(objective, constraints)
        self.layer = CvxpyLayer(problem, parameters=[self.r, self.w, self.b, self.f_der],
                                variables=[self.x])
        
    def optimize_X(self, X, w, b, F_DER):
        return self.layer(X, w, b, F_DER)[0]

# Gain & Cost functions

In [100]:
def score(x, w, b):
    return x@w + b

def f(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) + 1)]), 2)

def g(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) - 1)]), 2)

def c(x, r, x_dim, scale):
    return (scale)*cp.sum_squares(x-r)

def f_derivative(x, w, b, slope):
    return 0.5*cp.multiply(slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1)), w)
    
def f_batch(x, w, b, slope):
    return 0.5*cp.norm(cp.vstack([np.ones(x.shape[0]), (slope*score(x, w, b) + 1)]), 2, axis=0)

def g_batch(x, w, b, slope):
    return 0.5*cp.norm(cp.vstack([np.ones((1, x.shape[0])), cp.reshape((slope*score(x, w, b) - 1), (1, x.shape[0]))]), 2, axis=0)

def c_batch(x, r, x_dim, scale):
    return (scale)*cp.square(cp.norm(x-r, 2, axis=1))

def f_derivative_batch(x, w, b, slope):
    nablas = 0.5*slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1))
    return cp.reshape(nablas, (nablas.shape[0], 1))@cp.reshape(w, (1, x.shape[1]))

# Model

In [101]:
class MyStrategicModel(torch.nn.Module):
    def __init__(self, x_dim, batch_size, funcs, funcs_batch, train_slope, eval_slope, scale, strategic=False):
        torch.manual_seed(0)
        np.random.seed(0)

        super(MyStrategicModel, self).__init__()
        self.x_dim = x_dim
        self.batch_size = batch_size
        self.train_slope, self.eval_slope = train_slope, eval_slope
        self.w = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(x_dim, dtype=torch.float64, requires_grad=True)))
        self.b = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(1, dtype=torch.float64, requires_grad=True)))
#         self.w = torch.nn.parameter.Parameter(torch.zeros(x_dim, dtype=torch.float64, requires_grad=True))
#         self.b = torch.nn.parameter.Parameter(torch.zeros(1, dtype=torch.float64, requires_grad=True))
        self.strategic = strategic
        self.ccp = CCP(x_dim, batch_size, funcs_batch, scale)
        self.delta = DELTA(x_dim, funcs, scale)
        self.ccp_time = 0
        self.total_time = 0

    def forward(self, X, evaluation=False):
        if self.strategic:
            if evaluation:
                t1 = time.time()
                XT = self.ccp.optimize_X(X, self.w, self.b, self.eval_slope)
                self.ccp_time += time.time()-t1
                X_opt = XT
            else:
                t1 = time.time()
                XT = self.ccp.optimize_X(X, self.w, self.b, self.train_slope)
                self.ccp_time += time.time()-t1
                F_DER = self.get_f_ders(XT, self.train_slope)
                X_opt = self.delta.optimize_X(X, self.w, self.b, F_DER) # Xopt should be equal to XT but we do it again for the gradients
            output = self.score(X_opt)
        else:
            output = self.score(X)        
        return output
    
    def optimize_X(self, X, evaluation=False):
        slope = self.eval_slope if evaluation else self.train_slope
        return self.ccp.optimize_X(X, self.w, self.b, slope)
    
    def normalize_weights(self):
        with torch.no_grad():
            norm = torch.sqrt(torch.sum(self.w**2) + self.b**2)
            self.w /= norm
            self.b /= norm

    def score(self, x):
        return x@self.w + self.b
    
    def get_f_ders(self, XT, slope):
        # return torch.stack([0.5*slope*((slope*self.score(xt) + 1)/torch.sqrt((slope*self.score(xt) + 1)**2 + 1))*self.w for xt in XT])
        nablas = 0.5*slope*((slope*self.score(XT) + 1)/torch.sqrt((slope*self.score(XT) + 1)**2 + 1))
        return torch.reshape(nablas, (len(nablas), 1))@torch.reshape(self.w, (1, len(self.w)))

    def calc_accuracy(self, Y, Y_pred):
        Y_pred = torch.sign(Y_pred)
        num = len(Y)
        temp = Y - Y_pred
        acc = len(temp[temp == 0])*1./num        
        return acc
    
    def evaluate(self, X, Y):      
        return self.calc_accuracy(Y, self.forward(X, evaluation=True))
    
    def loss(self, Y, Y_pred):
        return torch.mean(torch.clamp(1 - Y_pred * Y, min=0))
    
    def save_model(self, train_errors, val_errors, train_losses, val_losses, info, path, comment=None):
        if comment is not None:
            path += "/" + comment
            
        filename = path + "/model.pt"
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
        torch.save(self.state_dict(), filename)
                
        pd.DataFrame(np.array(train_errors)).to_csv(path + '/train_errors.csv')
        pd.DataFrame(np.array(val_errors)).to_csv(path + '/val_errors.csv')
        pd.DataFrame(np.array(train_losses)).to_csv(path + '/train_losses.csv')
        pd.DataFrame(np.array(val_losses)).to_csv(path + '/val_losses.csv')
        
        with open(path + "/info.txt", "w") as f:
            f.write(info)
    
    def load_model(self, filename):
        self.load_state_dict(torch.load(filename))
        self.eval()
    
    def fit(self, path, X, Y, Xval, Yval, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False, callback=None, comment=None):
        train_dset = TensorDataset(X, Y)
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        test_dset = TensorDataset(Xval, Yval)
        test_loader = DataLoader(test_dset, batch_size=batch_size, shuffle=True)
        
        opt = opt(self.parameters(), **opt_kwargs)

        train_losses = []
        val_losses = []
        train_errors = []
        val_errors = []
        
        best_val_error = 1
        consecutive_no_improvement = 0

        total_time = time.time()
        for epoch in range(epochs):
            t1 = time.time()
            batch = 1
            train_losses.append([])
            train_errors.append([])
            for Xbatch, Ybatch in train_loader:
#                 try:
                opt.zero_grad()
                Ybatch_pred = self.forward(Xbatch)
                l = self.loss(Ybatch, Ybatch_pred)
                l.backward()
                opt.step()
                train_losses[-1].append(l.item())
                with torch.no_grad():
                    e = self.calc_accuracy(Ybatch, Ybatch_pred)
                    train_errors[-1].append(1-e)
                if verbose:
                    print("batch %03d / %03d | loss: %3.5f | err: %3.5f" %
                          (batch, len(train_loader), np.mean(train_losses[-1]), np.mean(train_errors[-1])))
                batch += 1
                if callback is not None:
                    callback()
#                 except:
#                     print("failed")
                
            with torch.no_grad():
                total_loss = 0
                total_error = 0
                batch = 0
                for Xbatch, Ybatch in test_loader:
#                     try:
                    Yval_pred = self.forward(Xbatch, evaluation=True)
                    val_loss = self.loss(Ybatch, Yval_pred).item()
                    total_loss += val_loss
                    val_error = 1-self.calc_accuracy(Ybatch, Yval_pred)
                    total_error += val_error
                    batch += 1
#                     except:
#                         print("failed")
                        
                avg_loss = total_loss/batch
                avg_error = total_error/batch
                val_losses.append(avg_loss)
                val_errors.append(avg_error)
                if avg_error < best_val_error:
                        consecutive_no_improvement = 0
                        best_val_error = avg_error
                        info = "training time in seconds: {}\nepoch: {}\nbatch size: {}\ntrain slope: {}\neval slope: {}\nlearning rate: {}\nvalidation loss: {}\nvalidation error: {}\n".format(
                        time.time()-total_time, epoch, batch_size, self.train_slope, self.eval_slope, opt_kwargs["lr"], avg_loss, avg_error)
                        self.save_model(train_errors, val_errors, train_losses, val_losses, info, path, comment)
                        print("model saved!")

                else:
                    consecutive_no_improvement += 1
                    if consecutive_no_improvement >= 4:
                        break
                    
            t2 = time.time()
            if verbose:
                print("------------- epoch %03d / %03d | time: %03d sec | loss: %3.5f | err: %3.5f" % (epoch + 1, epochs, t2-t1, val_losses[-1], val_errors[-1]))
        
        self.total_time = time.time()-total_time
        print("training time: {} seconds".format(self.total_time)) 
        return train_errors, val_errors, train_losses, val_losses

In [102]:
def gen_sklearn_data(x_dim, N, informative_frac=1, shift_range=1, scale_range=1, noise_frac=0.01):
    torch.manual_seed(0)
    np.random.seed(0)
    n_informative = int(informative_frac*x_dim)
    n_redundant = x_dim - n_informative
    shift_arr = shift_range*np.random.randn(x_dim)
    scale_arr = scale_range*np.random.randn(x_dim)
    X, Y = make_classification(n_samples=N, n_features=x_dim, n_informative=n_informative, n_redundant=n_redundant,
                               flip_y=noise_frac, shift=shift_arr, scale=scale_arr, random_state=0)
    Y[Y == 0] = -1
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_spam_data():
    torch.manual_seed(0)
    np.random.seed(0)
    path = r"C:\Users\sagil\Desktop\nir_project\tip_spam_data\IS_journal_tip_spam.arff"
    data, meta = arff.loadarff(path)
    df = pd.DataFrame(data)
    most_disc = ['qTips_plc', 'rating_plc', 'qEmail_tip', 'qContacts_tip', 'qURL_tip', 'qPhone_tip', 'qNumeriChar_tip', 'sentistrength_tip', 'combined_tip', 'qWords_tip', 'followers_followees_gph', 'qunigram_avg_tip', 'qTips_usr', 'indeg_gph', 'qCapitalChar_tip', 'class1']
    df = df[most_disc]
    df["class1"].replace({b'spam': -1, b'notspam': 1}, inplace=True)
    df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    Y = df['class1'].values
    X = df.drop('class1', axis = 1).values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_card_fraud_data():
    torch.manual_seed(0)
    np.random.seed(0)
    df = pd.read_csv('C:/Users/sagil/Desktop/nir_project/card_fraud_dataset/creditcard.csv')

    rob_scaler = RobustScaler()

    df['scaled_amount'] = rob_scaler.fit_transform(df['Amount'].values.reshape(-1,1))
    df.drop(['Time','Amount'], axis=1, inplace=True)
    scaled_amount = df['scaled_amount']
    df.drop(['scaled_amount'], axis=1, inplace=True)
    df.insert(0, 'scaled_amount', scaled_amount)

    df["Class"].replace({1: -1, 0: 1}, inplace=True)
    df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    # amount of fraud classes 492 rows.
    fraud_df = df.loc[df['Class'] == -1]
    non_fraud_df = df.loc[df['Class'] == 1][:492]

    normal_distributed_df = pd.concat([fraud_df, non_fraud_df])

    # Shuffle dataframe rows
    df = normal_distributed_df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    Y = df['Class'].values
    X = df.drop('Class', axis = 1).values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_credit_default_data():
    torch.manual_seed(0)
    np.random.seed(0)
    url = 'https://raw.githubusercontent.com/ustunb/actionable-recourse/master/examples/paper/data/credit_processed.csv'
    df = pd.read_csv(url)
    df["NoDefaultNextMonth"].replace({0: -1}, inplace=True)
    df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    df = df.drop(['Married', 'Single', 'Age_lt_25', 'Age_in_25_to_40', 'Age_in_40_to_59', 'Age_geq_60'], axis = 1)

    fraud_df = df.loc[df["NoDefaultNextMonth"] == -1]
    non_fraud_df = df.loc[df["NoDefaultNextMonth"] == 1][:6636]

    normal_distributed_df = pd.concat([fraud_df, non_fraud_df])

    # Shuffle dataframe rows
    df = normal_distributed_df.sample(frac=1, random_state=SEED).reset_index(drop=True)
    
    scaler = StandardScaler()
    df.loc[:, df.columns != "NoDefaultNextMonth"] = scaler.fit_transform(df.drop("NoDefaultNextMonth", axis=1)) 
    Y, X = df.iloc[:, 0].values, df.iloc[:, 1:].values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_financial_distress_data():
    torch.manual_seed(0)
    np.random.seed(0)
    data = pd.read_csv("C:/Users/sagil/Desktop/nir_project/financial_distress_data/Financial Distress.csv")

    data = data[data.columns.drop(list(data.filter(regex='x80')))] # Since it is a categorical feature with 37 features.
    x_dim = len(data.columns) - 3
    data.drop(['Time'], axis=1, inplace=True)

    data_grouped = data.groupby(['Company']).last()

    scaler = StandardScaler()
    data_grouped.loc[:, data_grouped.columns != "Financial Distress"] = scaler.fit_transform(data_grouped.drop("Financial Distress", axis=1))

    # Shuffle dataframe rows
    data_grouped = data_grouped.sample(frac=1, random_state=SEED).reset_index(drop=True)

    Y, X = data_grouped.iloc[:, 0].values, data_grouped.iloc[:, 1:].values
    for y in range(0,len(Y)): # Coverting target variable from continuous to binary form
        if Y[y] < -0.5:
              Y[y] = -1
        else:
              Y[y] = 1
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

In [None]:
path = "C:/Users/sagil/Desktop/nir_project/models/runtime_varying_batch_size"
epochs = 5
x_dim = 10
scale = 1
X, Y = gen_sklearn_data(x_dim, 1024)
X, Y, Xval, Yval = split_data(X, Y, 0.25)
print(Xval.size())
print("percent of positive samples: {}%".format(100 * len(Y[Y == 1]) / len(Y)))

funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}
funcs_batch = {"f": f_batch, "g": g_batch, "f_derivative": f_derivative_batch, "c": c_batch, "score": score}

total = []
ccp = []
for batch_size in (2**np.arange(9)).tolist():
    strategic_model = MyStrategicModel(x_dim, batch_size, funcs, funcs_batch, TRAIN_SLOPE, EVAL_SLOPE, scale=scale, strategic=True)
    strategic_model.fit(path, X, Y, Xval, Yval,
                        opt=torch.optim.Adam, opt_kwargs={"lr": (1e-1)},
                        batch_size=batch_size, epochs=epochs, verbose=True,
                       comment="batched")
    
    total_time = strategic_model.total_time
    ccp_time = strategic_model.ccp_time
    total.append(total_time)
    ccp.append(ccp_time)
    pd.DataFrame(np.array(total)).to_csv(path + '/total_timing_results.csv')
    pd.DataFrame(np.array(ccp)).to_csv(path + '/ccp_timing_results.csv')

torch.Size([256, 10])
percent of positive samples: 52.473958333333336%
batch 001 / 768 | loss: 1.60883 | err: 1.00000
batch 002 / 768 | loss: 1.42408 | err: 1.00000


This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

batch 003 / 768 | loss: 1.43130 | err: 1.00000
batch 004 / 768 | loss: 1.46224 | err: 1.00000
batch 005 / 768 | loss: 1.31990 | err: 0.80000
batch 006 / 768 | loss: 1.22072 | err: 0.66667
batch 007 / 768 | loss: 1.19027 | err: 0.71429
batch 008 / 768 | loss: 1.20005 | err: 0.75000
batch 009 / 768 | loss: 1.10415 | err: 0.66667
batch 010 / 768 | loss: 1.11612 | err: 0.70000
batch 011 / 768 | loss: 1.06212 | err: 0.63636
batch 012 / 768 | loss: 0.97361 | err: 0.58333
batch 013 / 768 | loss: 0.90332 | err: 0.53846
batch 014 / 768 | loss: 0.89250 | err: 0.50000
batch 015 / 768 | loss: 0.83830 | err: 0.46667
batch 016 / 768 | loss: 0.78591 | err: 0.43750
batch 017 / 768 | loss: 0.73968 | err: 0.41176
batch 018 / 768 | loss: 0.86659 | err: 0.44444
batch 019 / 768 | loss: 0.91253 | err: 0.47368
batch 020 / 768 | loss: 0.87997 | err: 0.45000
batch 021 / 768 | loss: 0.83807 | err: 0.42857
batch 022 / 768 | loss: 0.80810 | err: 0.40909
batch 023 / 768 | loss: 0.77296 | err: 0.39130
batch 024 / 7

batch 180 / 768 | loss: 0.75600 | err: 0.34444
batch 181 / 768 | loss: 0.75182 | err: 0.34254
batch 182 / 768 | loss: 0.74769 | err: 0.34066
batch 183 / 768 | loss: 0.75875 | err: 0.34426
batch 184 / 768 | loss: 0.76934 | err: 0.34783
batch 185 / 768 | loss: 0.76518 | err: 0.34595
batch 186 / 768 | loss: 0.76107 | err: 0.34409
batch 187 / 768 | loss: 0.76735 | err: 0.34759
batch 188 / 768 | loss: 0.77262 | err: 0.35106
batch 189 / 768 | loss: 0.78713 | err: 0.35450
batch 190 / 768 | loss: 0.78299 | err: 0.35263
batch 191 / 768 | loss: 0.78634 | err: 0.35602
batch 192 / 768 | loss: 0.78987 | err: 0.35938
batch 193 / 768 | loss: 0.79312 | err: 0.36269
batch 194 / 768 | loss: 0.79792 | err: 0.36598
batch 195 / 768 | loss: 0.80408 | err: 0.36923
batch 196 / 768 | loss: 0.80297 | err: 0.36735
batch 197 / 768 | loss: 0.80891 | err: 0.37056
batch 198 / 768 | loss: 0.80874 | err: 0.36869
batch 199 / 768 | loss: 0.80468 | err: 0.36683
batch 200 / 768 | loss: 0.80066 | err: 0.36500
batch 201 / 7

batch 355 / 768 | loss: 0.75240 | err: 0.34085
batch 356 / 768 | loss: 0.75029 | err: 0.33989
batch 357 / 768 | loss: 0.74819 | err: 0.33894
batch 358 / 768 | loss: 0.74610 | err: 0.33799
batch 359 / 768 | loss: 0.75219 | err: 0.33983
batch 360 / 768 | loss: 0.75916 | err: 0.34167
batch 361 / 768 | loss: 0.76122 | err: 0.34349
batch 362 / 768 | loss: 0.76400 | err: 0.34530
batch 363 / 768 | loss: 0.76309 | err: 0.34435
batch 364 / 768 | loss: 0.76099 | err: 0.34341
batch 365 / 768 | loss: 0.76471 | err: 0.34521
batch 366 / 768 | loss: 0.76739 | err: 0.34699
batch 367 / 768 | loss: 0.76530 | err: 0.34605
batch 368 / 768 | loss: 0.76322 | err: 0.34511
batch 369 / 768 | loss: 0.76605 | err: 0.34688
batch 370 / 768 | loss: 0.76531 | err: 0.34595
batch 371 / 768 | loss: 0.76408 | err: 0.34501
batch 372 / 768 | loss: 0.76377 | err: 0.34409
batch 373 / 768 | loss: 0.76172 | err: 0.34316
batch 374 / 768 | loss: 0.76346 | err: 0.34492
batch 375 / 768 | loss: 0.76272 | err: 0.34400
batch 376 / 7

batch 530 / 768 | loss: 0.78357 | err: 0.34340
batch 531 / 768 | loss: 0.78503 | err: 0.34463
batch 532 / 768 | loss: 0.78916 | err: 0.34586
batch 533 / 768 | loss: 0.78820 | err: 0.34522
batch 534 / 768 | loss: 0.78673 | err: 0.34457
batch 535 / 768 | loss: 0.78526 | err: 0.34393
batch 536 / 768 | loss: 0.78912 | err: 0.34515
batch 537 / 768 | loss: 0.78765 | err: 0.34451
batch 538 / 768 | loss: 0.78619 | err: 0.34387
batch 539 / 768 | loss: 0.78473 | err: 0.34323
batch 540 / 768 | loss: 0.78605 | err: 0.34444
batch 541 / 768 | loss: 0.78467 | err: 0.34381
batch 542 / 768 | loss: 0.78323 | err: 0.34317
batch 543 / 768 | loss: 0.78178 | err: 0.34254
batch 544 / 768 | loss: 0.78035 | err: 0.34191
batch 545 / 768 | loss: 0.78206 | err: 0.34312
batch 546 / 768 | loss: 0.78505 | err: 0.34432
batch 547 / 768 | loss: 0.78788 | err: 0.34552
batch 548 / 768 | loss: 0.78644 | err: 0.34489
batch 549 / 768 | loss: 0.78896 | err: 0.34608
batch 550 / 768 | loss: 0.78860 | err: 0.34545
batch 551 / 7

batch 705 / 768 | loss: 0.80121 | err: 0.34752
batch 706 / 768 | loss: 0.80048 | err: 0.34703
batch 707 / 768 | loss: 0.79935 | err: 0.34653
batch 708 / 768 | loss: 0.79906 | err: 0.34605
batch 709 / 768 | loss: 0.79793 | err: 0.34556
batch 710 / 768 | loss: 0.80048 | err: 0.34648
batch 711 / 768 | loss: 0.79936 | err: 0.34599
batch 712 / 768 | loss: 0.79824 | err: 0.34551
batch 713 / 768 | loss: 0.79712 | err: 0.34502
batch 714 / 768 | loss: 0.79635 | err: 0.34454
batch 715 / 768 | loss: 0.79566 | err: 0.34406
batch 716 / 768 | loss: 0.79507 | err: 0.34358
batch 717 / 768 | loss: 0.79396 | err: 0.34310
batch 718 / 768 | loss: 0.79500 | err: 0.34401
batch 719 / 768 | loss: 0.79389 | err: 0.34353
batch 720 / 768 | loss: 0.79279 | err: 0.34306
batch 721 / 768 | loss: 0.79169 | err: 0.34258
batch 722 / 768 | loss: 0.79404 | err: 0.34349
batch 723 / 768 | loss: 0.79294 | err: 0.34302
batch 724 / 768 | loss: 0.79250 | err: 0.34254
batch 725 / 768 | loss: 0.79164 | err: 0.34207
batch 726 / 7

batch 112 / 768 | loss: 0.82991 | err: 0.40179
batch 113 / 768 | loss: 0.83556 | err: 0.40708
batch 114 / 768 | loss: 0.82825 | err: 0.40351
batch 115 / 768 | loss: 0.82350 | err: 0.40000
batch 116 / 768 | loss: 0.82198 | err: 0.39655
batch 117 / 768 | loss: 0.84649 | err: 0.40171
batch 118 / 768 | loss: 0.83932 | err: 0.39831
batch 119 / 768 | loss: 0.83226 | err: 0.39496
batch 120 / 768 | loss: 0.82533 | err: 0.39167
batch 121 / 768 | loss: 0.81851 | err: 0.38843
batch 122 / 768 | loss: 0.83191 | err: 0.39344
batch 123 / 768 | loss: 0.82514 | err: 0.39024
batch 124 / 768 | loss: 0.84621 | err: 0.39516
batch 125 / 768 | loss: 0.87224 | err: 0.40000
batch 126 / 768 | loss: 0.86532 | err: 0.39683
batch 127 / 768 | loss: 0.87677 | err: 0.40157
batch 128 / 768 | loss: 0.86992 | err: 0.39844
batch 129 / 768 | loss: 0.86317 | err: 0.39535
batch 130 / 768 | loss: 0.87050 | err: 0.40000
batch 131 / 768 | loss: 0.86385 | err: 0.39695
batch 132 / 768 | loss: 0.86829 | err: 0.40152
batch 133 / 7

batch 286 / 768 | loss: 0.85507 | err: 0.39161
batch 287 / 768 | loss: 0.85909 | err: 0.39373
batch 288 / 768 | loss: 0.86299 | err: 0.39583
batch 289 / 768 | loss: 0.86000 | err: 0.39446
batch 290 / 768 | loss: 0.85704 | err: 0.39310
batch 291 / 768 | loss: 0.85409 | err: 0.39175
batch 292 / 768 | loss: 0.85754 | err: 0.39384
batch 293 / 768 | loss: 0.85503 | err: 0.39249
batch 294 / 768 | loss: 0.85212 | err: 0.39116
batch 295 / 768 | loss: 0.85503 | err: 0.39322
batch 296 / 768 | loss: 0.85214 | err: 0.39189
batch 297 / 768 | loss: 0.85082 | err: 0.39057
batch 298 / 768 | loss: 0.84796 | err: 0.38926
batch 299 / 768 | loss: 0.84621 | err: 0.38796
batch 300 / 768 | loss: 0.84807 | err: 0.39000
batch 301 / 768 | loss: 0.84525 | err: 0.38870
batch 302 / 768 | loss: 0.84281 | err: 0.38742
batch 303 / 768 | loss: 0.84697 | err: 0.38944
batch 304 / 768 | loss: 0.84419 | err: 0.38816
batch 305 / 768 | loss: 0.84142 | err: 0.38689
batch 306 / 768 | loss: 0.84355 | err: 0.38889
batch 307 / 7

batch 461 / 768 | loss: 0.86394 | err: 0.39046
batch 462 / 768 | loss: 0.86207 | err: 0.38961
batch 463 / 768 | loss: 0.86021 | err: 0.38877
batch 464 / 768 | loss: 0.85836 | err: 0.38793
batch 465 / 768 | loss: 0.85749 | err: 0.38710
batch 466 / 768 | loss: 0.85565 | err: 0.38627
batch 467 / 768 | loss: 0.85468 | err: 0.38544
batch 468 / 768 | loss: 0.85285 | err: 0.38462
batch 469 / 768 | loss: 0.86004 | err: 0.38593
batch 470 / 768 | loss: 0.85873 | err: 0.38511
batch 471 / 768 | loss: 0.86020 | err: 0.38641
batch 472 / 768 | loss: 0.85838 | err: 0.38559
batch 473 / 768 | loss: 0.86034 | err: 0.38689
batch 474 / 768 | loss: 0.85866 | err: 0.38608
batch 475 / 768 | loss: 0.85685 | err: 0.38526
batch 476 / 768 | loss: 0.86078 | err: 0.38655
batch 477 / 768 | loss: 0.85897 | err: 0.38574
batch 478 / 768 | loss: 0.85717 | err: 0.38494
batch 479 / 768 | loss: 0.85580 | err: 0.38413
batch 480 / 768 | loss: 0.85718 | err: 0.38542
batch 481 / 768 | loss: 0.85605 | err: 0.38462
batch 482 / 7

batch 636 / 768 | loss: 0.83669 | err: 0.38050
batch 637 / 768 | loss: 0.83835 | err: 0.38148
batch 638 / 768 | loss: 0.83707 | err: 0.38088
batch 639 / 768 | loss: 0.83851 | err: 0.38185
batch 640 / 768 | loss: 0.83951 | err: 0.38281
batch 641 / 768 | loss: 0.83820 | err: 0.38222
batch 642 / 768 | loss: 0.83918 | err: 0.38318
batch 643 / 768 | loss: 0.84373 | err: 0.38414
batch 644 / 768 | loss: 0.84656 | err: 0.38509
batch 645 / 768 | loss: 0.84525 | err: 0.38450
batch 646 / 768 | loss: 0.84394 | err: 0.38390
batch 647 / 768 | loss: 0.84524 | err: 0.38485
batch 648 / 768 | loss: 0.84393 | err: 0.38426
batch 649 / 768 | loss: 0.84263 | err: 0.38367
batch 650 / 768 | loss: 0.84133 | err: 0.38308
batch 651 / 768 | loss: 0.84004 | err: 0.38249
batch 652 / 768 | loss: 0.84094 | err: 0.38344
batch 653 / 768 | loss: 0.84203 | err: 0.38438
batch 654 / 768 | loss: 0.84075 | err: 0.38379
batch 655 / 768 | loss: 0.84183 | err: 0.38473
batch 656 / 768 | loss: 0.84055 | err: 0.38415
batch 657 / 7

batch 041 / 768 | loss: 1.07469 | err: 0.46341
batch 042 / 768 | loss: 1.08959 | err: 0.47619
batch 043 / 768 | loss: 1.13575 | err: 0.48837
batch 044 / 768 | loss: 1.10994 | err: 0.47727
batch 045 / 768 | loss: 1.08527 | err: 0.46667
batch 046 / 768 | loss: 1.06168 | err: 0.45652
batch 047 / 768 | loss: 1.03909 | err: 0.44681
batch 048 / 768 | loss: 1.03437 | err: 0.43750
batch 049 / 768 | loss: 1.03836 | err: 0.44898
batch 050 / 768 | loss: 1.02614 | err: 0.44000
batch 051 / 768 | loss: 1.00602 | err: 0.43137
batch 052 / 768 | loss: 1.01274 | err: 0.44231
batch 053 / 768 | loss: 1.02143 | err: 0.45283
batch 054 / 768 | loss: 1.00818 | err: 0.44444
batch 055 / 768 | loss: 1.02044 | err: 0.45455
batch 056 / 768 | loss: 1.03323 | err: 0.46429
batch 057 / 768 | loss: 1.04222 | err: 0.47368
batch 058 / 768 | loss: 1.02914 | err: 0.46552
batch 059 / 768 | loss: 1.01824 | err: 0.45763
batch 060 / 768 | loss: 1.00127 | err: 0.45000
batch 061 / 768 | loss: 0.98486 | err: 0.44262
batch 062 / 7

batch 217 / 768 | loss: 0.91598 | err: 0.41014
batch 218 / 768 | loss: 0.91906 | err: 0.41284
batch 219 / 768 | loss: 0.93358 | err: 0.41553
batch 220 / 768 | loss: 0.92934 | err: 0.41364
batch 221 / 768 | loss: 0.92514 | err: 0.41176
batch 222 / 768 | loss: 0.92884 | err: 0.41441
batch 223 / 768 | loss: 0.93146 | err: 0.41704
batch 224 / 768 | loss: 0.93357 | err: 0.41964
batch 225 / 768 | loss: 0.93950 | err: 0.42222
batch 226 / 768 | loss: 0.93534 | err: 0.42035
batch 227 / 768 | loss: 0.94124 | err: 0.42291
batch 228 / 768 | loss: 0.93711 | err: 0.42105
batch 229 / 768 | loss: 0.93302 | err: 0.41921
batch 230 / 768 | loss: 0.93663 | err: 0.42174
batch 231 / 768 | loss: 0.93258 | err: 0.41991
batch 232 / 768 | loss: 0.93448 | err: 0.42241
batch 233 / 768 | loss: 0.93047 | err: 0.42060
batch 234 / 768 | loss: 0.93302 | err: 0.42308
batch 235 / 768 | loss: 0.93093 | err: 0.42128
batch 236 / 768 | loss: 0.92699 | err: 0.41949
batch 237 / 768 | loss: 0.92837 | err: 0.42194
batch 238 / 7

  "Solution may be inaccurate. Try another solver, "


batch 268 / 768 | loss: 0.91594 | err: 0.41791
batch 269 / 768 | loss: 0.91254 | err: 0.41636
batch 270 / 768 | loss: 0.91627 | err: 0.41852
batch 271 / 768 | loss: 0.91289 | err: 0.41697
batch 272 / 768 | loss: 0.90953 | err: 0.41544
batch 273 / 768 | loss: 0.91373 | err: 0.41758
batch 274 / 768 | loss: 0.91490 | err: 0.41971
batch 275 / 768 | loss: 0.91157 | err: 0.41818
batch 276 / 768 | loss: 0.90843 | err: 0.41667
batch 277 / 768 | loss: 0.91101 | err: 0.41877
batch 278 / 768 | loss: 0.90824 | err: 0.41727
batch 279 / 768 | loss: 0.90677 | err: 0.41577
batch 280 / 768 | loss: 0.91168 | err: 0.41786
batch 281 / 768 | loss: 0.90912 | err: 0.41637
batch 282 / 768 | loss: 0.91164 | err: 0.41844
batch 283 / 768 | loss: 0.91042 | err: 0.41696
batch 284 / 768 | loss: 0.90841 | err: 0.41549
batch 285 / 768 | loss: 0.91016 | err: 0.41754
batch 286 / 768 | loss: 0.90804 | err: 0.41608
batch 287 / 768 | loss: 0.90488 | err: 0.41463
batch 288 / 768 | loss: 0.90280 | err: 0.41319
batch 289 / 7

batch 447 / 768 | loss: 0.88517 | err: 0.41163
batch 448 / 768 | loss: 0.88448 | err: 0.41071
batch 449 / 768 | loss: 0.88251 | err: 0.40980
batch 450 / 768 | loss: 0.88425 | err: 0.41111
batch 451 / 768 | loss: 0.88229 | err: 0.41020
batch 452 / 768 | loss: 0.88387 | err: 0.41150
batch 453 / 768 | loss: 0.88192 | err: 0.41060
batch 454 / 768 | loss: 0.88316 | err: 0.41189
batch 455 / 768 | loss: 0.88494 | err: 0.41319
batch 456 / 768 | loss: 0.88867 | err: 0.41447
batch 457 / 768 | loss: 0.89090 | err: 0.41575
batch 458 / 768 | loss: 0.88895 | err: 0.41485
batch 459 / 768 | loss: 0.88786 | err: 0.41394
batch 460 / 768 | loss: 0.89292 | err: 0.41522
batch 461 / 768 | loss: 0.89386 | err: 0.41649
batch 462 / 768 | loss: 0.89512 | err: 0.41775
batch 463 / 768 | loss: 0.89319 | err: 0.41685
batch 464 / 768 | loss: 0.89297 | err: 0.41595
batch 465 / 768 | loss: 0.89105 | err: 0.41505
batch 466 / 768 | loss: 0.89071 | err: 0.41416
batch 467 / 768 | loss: 0.88923 | err: 0.41328
batch 468 / 7

batch 625 / 768 | loss: 0.87944 | err: 0.40960
batch 626 / 768 | loss: 0.87804 | err: 0.40895
batch 627 / 768 | loss: 0.87737 | err: 0.40829
batch 628 / 768 | loss: 0.87891 | err: 0.40924
batch 629 / 768 | loss: 0.87801 | err: 0.40859
batch 630 / 768 | loss: 0.87661 | err: 0.40794
batch 631 / 768 | loss: 0.87523 | err: 0.40729
batch 632 / 768 | loss: 0.87384 | err: 0.40665
batch 633 / 768 | loss: 0.87493 | err: 0.40758
batch 634 / 768 | loss: 0.87355 | err: 0.40694
batch 635 / 768 | loss: 0.87217 | err: 0.40630
batch 636 / 768 | loss: 0.87080 | err: 0.40566
batch 637 / 768 | loss: 0.86965 | err: 0.40502
batch 638 / 768 | loss: 0.87107 | err: 0.40596
batch 639 / 768 | loss: 0.87950 | err: 0.40689
batch 640 / 768 | loss: 0.87812 | err: 0.40625
batch 641 / 768 | loss: 0.87675 | err: 0.40562
batch 642 / 768 | loss: 0.87539 | err: 0.40498
batch 643 / 768 | loss: 0.87403 | err: 0.40435
batch 644 / 768 | loss: 0.87267 | err: 0.40373
batch 645 / 768 | loss: 0.87132 | err: 0.40310
batch 646 / 7

batch 032 / 768 | loss: 0.95134 | err: 0.40625
batch 033 / 768 | loss: 1.03188 | err: 0.42424
batch 034 / 768 | loss: 1.00153 | err: 0.41176
batch 035 / 768 | loss: 0.97292 | err: 0.40000
batch 036 / 768 | loss: 0.94589 | err: 0.38889
batch 037 / 768 | loss: 0.93050 | err: 0.37838
batch 038 / 768 | loss: 0.95960 | err: 0.39474
batch 039 / 768 | loss: 1.00895 | err: 0.41026
batch 040 / 768 | loss: 0.98373 | err: 0.40000
batch 041 / 768 | loss: 0.97334 | err: 0.39024
batch 042 / 768 | loss: 0.98259 | err: 0.40476
batch 043 / 768 | loss: 0.95974 | err: 0.39535
batch 044 / 768 | loss: 0.93793 | err: 0.38636
batch 045 / 768 | loss: 0.95005 | err: 0.40000
batch 046 / 768 | loss: 0.96433 | err: 0.41304
batch 047 / 768 | loss: 0.94382 | err: 0.40426
batch 048 / 768 | loss: 0.92686 | err: 0.39583
batch 049 / 768 | loss: 0.98048 | err: 0.40816
batch 050 / 768 | loss: 1.00419 | err: 0.42000
batch 051 / 768 | loss: 0.98969 | err: 0.41176
batch 052 / 768 | loss: 0.98648 | err: 0.40385
batch 053 / 7

batch 209 / 768 | loss: 0.93645 | err: 0.39234
batch 210 / 768 | loss: 0.94378 | err: 0.39524
batch 211 / 768 | loss: 0.93931 | err: 0.39336
batch 212 / 768 | loss: 0.93488 | err: 0.39151
batch 213 / 768 | loss: 0.94014 | err: 0.39437
batch 214 / 768 | loss: 0.94829 | err: 0.39720
batch 215 / 768 | loss: 0.95909 | err: 0.40000
batch 216 / 768 | loss: 0.96664 | err: 0.40278
batch 217 / 768 | loss: 0.96219 | err: 0.40092
batch 218 / 768 | loss: 0.96477 | err: 0.40367
batch 219 / 768 | loss: 0.96974 | err: 0.40639
batch 220 / 768 | loss: 0.97321 | err: 0.40909
batch 221 / 768 | loss: 0.97045 | err: 0.40724
batch 222 / 768 | loss: 0.96608 | err: 0.40541
batch 223 / 768 | loss: 0.96174 | err: 0.40359
batch 224 / 768 | loss: 0.95745 | err: 0.40179
batch 225 / 768 | loss: 0.95641 | err: 0.40000
batch 226 / 768 | loss: 0.95354 | err: 0.39823
batch 227 / 768 | loss: 0.94934 | err: 0.39648
batch 228 / 768 | loss: 0.94583 | err: 0.39474
batch 229 / 768 | loss: 0.94867 | err: 0.39738
batch 230 / 7

batch 386 / 768 | loss: 0.90885 | err: 0.39896
batch 387 / 768 | loss: 0.90680 | err: 0.39793
batch 388 / 768 | loss: 0.90596 | err: 0.39691
batch 389 / 768 | loss: 0.90363 | err: 0.39589
batch 390 / 768 | loss: 0.90244 | err: 0.39487
batch 391 / 768 | loss: 0.90013 | err: 0.39386
batch 392 / 768 | loss: 0.89783 | err: 0.39286
batch 393 / 768 | loss: 0.89555 | err: 0.39186
batch 394 / 768 | loss: 0.89811 | err: 0.39340
batch 395 / 768 | loss: 0.89584 | err: 0.39241
batch 396 / 768 | loss: 0.89357 | err: 0.39141
batch 397 / 768 | loss: 0.89507 | err: 0.39295
batch 398 / 768 | loss: 0.90285 | err: 0.39447
batch 399 / 768 | loss: 0.90058 | err: 0.39348
batch 400 / 768 | loss: 0.89890 | err: 0.39250
batch 401 / 768 | loss: 0.90091 | err: 0.39401
batch 402 / 768 | loss: 0.90342 | err: 0.39552
batch 403 / 768 | loss: 0.90117 | err: 0.39454
batch 404 / 768 | loss: 0.89894 | err: 0.39356
batch 405 / 768 | loss: 0.90049 | err: 0.39506
batch 406 / 768 | loss: 0.89828 | err: 0.39409
batch 407 / 7

batch 562 / 768 | loss: 0.89611 | err: 0.39146
batch 563 / 768 | loss: 0.89505 | err: 0.39076
batch 564 / 768 | loss: 0.89632 | err: 0.39184
batch 565 / 768 | loss: 0.89490 | err: 0.39115
batch 566 / 768 | loss: 0.89332 | err: 0.39046
batch 567 / 768 | loss: 0.89174 | err: 0.38977
batch 568 / 768 | loss: 0.89324 | err: 0.39085
batch 569 / 768 | loss: 0.89167 | err: 0.39016
batch 570 / 768 | loss: 0.89069 | err: 0.38947
batch 571 / 768 | loss: 0.89214 | err: 0.39054
batch 572 / 768 | loss: 0.89135 | err: 0.38986
batch 573 / 768 | loss: 0.89259 | err: 0.39092
batch 574 / 768 | loss: 0.89199 | err: 0.39024
batch 575 / 768 | loss: 0.89044 | err: 0.38957
batch 576 / 768 | loss: 0.89187 | err: 0.39062
batch 577 / 768 | loss: 0.89328 | err: 0.39168
batch 578 / 768 | loss: 0.89206 | err: 0.39100
batch 579 / 768 | loss: 0.89052 | err: 0.39033
batch 580 / 768 | loss: 0.88992 | err: 0.38966
batch 581 / 768 | loss: 0.88839 | err: 0.38898
batch 582 / 768 | loss: 0.89147 | err: 0.39003
batch 583 / 7

batch 738 / 768 | loss: 0.85422 | err: 0.37669
batch 739 / 768 | loss: 0.85306 | err: 0.37618
batch 740 / 768 | loss: 0.85213 | err: 0.37568
batch 741 / 768 | loss: 0.85336 | err: 0.37652
batch 742 / 768 | loss: 0.85221 | err: 0.37601
batch 743 / 768 | loss: 0.85314 | err: 0.37685
batch 744 / 768 | loss: 0.85199 | err: 0.37634
batch 745 / 768 | loss: 0.85085 | err: 0.37584
batch 746 / 768 | loss: 0.85268 | err: 0.37668
batch 747 / 768 | loss: 0.85153 | err: 0.37617
batch 748 / 768 | loss: 0.85040 | err: 0.37567
batch 749 / 768 | loss: 0.85158 | err: 0.37650
batch 750 / 768 | loss: 0.85278 | err: 0.37733
batch 751 / 768 | loss: 0.85653 | err: 0.37816
batch 752 / 768 | loss: 0.85755 | err: 0.37899
batch 753 / 768 | loss: 0.85707 | err: 0.37849
batch 754 / 768 | loss: 0.85777 | err: 0.37931
batch 755 / 768 | loss: 0.85911 | err: 0.38013
batch 756 / 768 | loss: 0.85816 | err: 0.37963
batch 757 / 768 | loss: 0.85722 | err: 0.37913
batch 758 / 768 | loss: 0.85627 | err: 0.37863
batch 759 / 7

batch 144 / 768 | loss: 0.89575 | err: 0.38889
batch 145 / 768 | loss: 0.89019 | err: 0.38621
batch 146 / 768 | loss: 0.88666 | err: 0.38356
batch 147 / 768 | loss: 0.88264 | err: 0.38095
batch 148 / 768 | loss: 0.87667 | err: 0.37838
batch 149 / 768 | loss: 0.87348 | err: 0.37584
batch 150 / 768 | loss: 0.86766 | err: 0.37333
batch 151 / 768 | loss: 0.86191 | err: 0.37086
batch 152 / 768 | loss: 0.87141 | err: 0.37500
batch 153 / 768 | loss: 0.87462 | err: 0.37908
batch 154 / 768 | loss: 0.88139 | err: 0.38312
batch 155 / 768 | loss: 0.87571 | err: 0.38065
batch 156 / 768 | loss: 0.87009 | err: 0.37821
batch 157 / 768 | loss: 0.87677 | err: 0.38217
batch 158 / 768 | loss: 0.87123 | err: 0.37975
batch 159 / 768 | loss: 0.87548 | err: 0.38365
batch 160 / 768 | loss: 0.87991 | err: 0.38750
batch 161 / 768 | loss: 0.87852 | err: 0.38509
batch 162 / 768 | loss: 0.88076 | err: 0.38889
batch 163 / 768 | loss: 0.87739 | err: 0.38650
batch 164 / 768 | loss: 0.88148 | err: 0.39024
batch 165 / 7

batch 321 / 768 | loss: 0.86854 | err: 0.37383
batch 322 / 768 | loss: 0.86584 | err: 0.37267
batch 323 / 768 | loss: 0.86316 | err: 0.37152
batch 324 / 768 | loss: 0.86762 | err: 0.37346
batch 325 / 768 | loss: 0.86640 | err: 0.37231
batch 326 / 768 | loss: 0.86374 | err: 0.37117
batch 327 / 768 | loss: 0.86110 | err: 0.37003
batch 328 / 768 | loss: 0.86291 | err: 0.37195
batch 329 / 768 | loss: 0.86559 | err: 0.37386
batch 330 / 768 | loss: 0.86320 | err: 0.37273
batch 331 / 768 | loss: 0.86556 | err: 0.37462
batch 332 / 768 | loss: 0.86787 | err: 0.37651
batch 333 / 768 | loss: 0.86527 | err: 0.37538
batch 334 / 768 | loss: 0.86722 | err: 0.37725
batch 335 / 768 | loss: 0.87081 | err: 0.37910
batch 336 / 768 | loss: 0.87329 | err: 0.38095
batch 337 / 768 | loss: 0.87222 | err: 0.37982
batch 338 / 768 | loss: 0.87020 | err: 0.37870
batch 339 / 768 | loss: 0.86763 | err: 0.37758
batch 340 / 768 | loss: 0.87420 | err: 0.37941
batch 341 / 768 | loss: 0.87164 | err: 0.37830
batch 342 / 7

batch 497 / 768 | loss: 0.80805 | err: 0.34809
batch 498 / 768 | loss: 0.80744 | err: 0.34739
batch 499 / 768 | loss: 0.80582 | err: 0.34669
batch 500 / 768 | loss: 0.80773 | err: 0.34800
batch 501 / 768 | loss: 0.80612 | err: 0.34731
batch 502 / 768 | loss: 0.80452 | err: 0.34661
batch 503 / 768 | loss: 0.80292 | err: 0.34592
batch 504 / 768 | loss: 0.80132 | err: 0.34524
batch 505 / 768 | loss: 0.80336 | err: 0.34653
batch 506 / 768 | loss: 0.80177 | err: 0.34585
batch 507 / 768 | loss: 0.80073 | err: 0.34517
batch 508 / 768 | loss: 0.79972 | err: 0.34449
batch 509 / 768 | loss: 0.79815 | err: 0.34381
batch 510 / 768 | loss: 0.79961 | err: 0.34510
batch 511 / 768 | loss: 0.80156 | err: 0.34638
batch 512 / 768 | loss: 0.79999 | err: 0.34570
batch 513 / 768 | loss: 0.80188 | err: 0.34698
batch 514 / 768 | loss: 0.80456 | err: 0.34825
batch 515 / 768 | loss: 0.80300 | err: 0.34757
batch 516 / 768 | loss: 0.80219 | err: 0.34690
batch 517 / 768 | loss: 0.80344 | err: 0.34816
batch 518 / 7

batch 672 / 768 | loss: 0.77994 | err: 0.33780
batch 673 / 768 | loss: 0.77878 | err: 0.33730
batch 674 / 768 | loss: 0.77763 | err: 0.33680
batch 675 / 768 | loss: 0.77647 | err: 0.33630
batch 676 / 768 | loss: 0.77565 | err: 0.33580
batch 677 / 768 | loss: 0.77473 | err: 0.33530
batch 678 / 768 | loss: 0.77359 | err: 0.33481
batch 679 / 768 | loss: 0.77513 | err: 0.33579
batch 680 / 768 | loss: 0.77399 | err: 0.33529
batch 681 / 768 | loss: 0.77285 | err: 0.33480
batch 682 / 768 | loss: 0.77531 | err: 0.33578
batch 683 / 768 | loss: 0.77917 | err: 0.33675
batch 684 / 768 | loss: 0.77803 | err: 0.33626
batch 685 / 768 | loss: 0.77946 | err: 0.33723
batch 686 / 768 | loss: 0.77832 | err: 0.33673
batch 687 / 768 | loss: 0.77719 | err: 0.33624
batch 688 / 768 | loss: 0.77835 | err: 0.33721
batch 689 / 768 | loss: 0.78213 | err: 0.33817
batch 690 / 768 | loss: 0.78323 | err: 0.33913
batch 691 / 768 | loss: 0.78210 | err: 0.33864
batch 692 / 768 | loss: 0.78348 | err: 0.33960
batch 693 / 7

batch 078 / 384 | loss: 0.79242 | err: 0.35897
batch 079 / 384 | loss: 0.78627 | err: 0.35443
batch 080 / 384 | loss: 0.77644 | err: 0.35000
batch 081 / 384 | loss: 0.77550 | err: 0.35185
batch 082 / 384 | loss: 0.78091 | err: 0.35366
batch 083 / 384 | loss: 0.77150 | err: 0.34940
batch 084 / 384 | loss: 0.76579 | err: 0.34524
batch 085 / 384 | loss: 0.75678 | err: 0.34118
batch 086 / 384 | loss: 0.74976 | err: 0.33721
batch 087 / 384 | loss: 0.75337 | err: 0.33908
batch 088 / 384 | loss: 0.74481 | err: 0.33523
batch 089 / 384 | loss: 0.73644 | err: 0.33146
batch 090 / 384 | loss: 0.72826 | err: 0.32778
batch 091 / 384 | loss: 0.72777 | err: 0.32967
batch 092 / 384 | loss: 0.75019 | err: 0.33696
batch 093 / 384 | loss: 0.74212 | err: 0.33333
batch 094 / 384 | loss: 0.75401 | err: 0.34043
batch 095 / 384 | loss: 0.77242 | err: 0.34737
batch 096 / 384 | loss: 0.77241 | err: 0.34896
batch 097 / 384 | loss: 0.78068 | err: 0.35567
batch 098 / 384 | loss: 0.78371 | err: 0.35714
batch 099 / 3

batch 253 / 384 | loss: 0.79412 | err: 0.37352
batch 254 / 384 | loss: 0.79398 | err: 0.37402
batch 255 / 384 | loss: 0.79558 | err: 0.37451
batch 256 / 384 | loss: 0.79343 | err: 0.37305
batch 257 / 384 | loss: 0.79130 | err: 0.37160
batch 258 / 384 | loss: 0.78890 | err: 0.37016
batch 259 / 384 | loss: 0.78585 | err: 0.36873
batch 260 / 384 | loss: 0.78334 | err: 0.36731
batch 261 / 384 | loss: 0.78141 | err: 0.36590
batch 262 / 384 | loss: 0.77940 | err: 0.36450
batch 263 / 384 | loss: 0.78355 | err: 0.36692
batch 264 / 384 | loss: 0.78419 | err: 0.36742
batch 265 / 384 | loss: 0.78370 | err: 0.36792
batch 266 / 384 | loss: 0.78946 | err: 0.37030
batch 267 / 384 | loss: 0.78709 | err: 0.36891
batch 268 / 384 | loss: 0.79086 | err: 0.36940
batch 269 / 384 | loss: 0.78792 | err: 0.36803
batch 270 / 384 | loss: 0.78500 | err: 0.36667
batch 271 / 384 | loss: 0.78211 | err: 0.36531
batch 272 / 384 | loss: 0.77923 | err: 0.36397
batch 273 / 384 | loss: 0.78308 | err: 0.36630
batch 274 / 3

batch 042 / 384 | loss: 0.76496 | err: 0.38095
batch 043 / 384 | loss: 0.75157 | err: 0.37209
batch 044 / 384 | loss: 0.75072 | err: 0.37500
batch 045 / 384 | loss: 0.73494 | err: 0.36667
batch 046 / 384 | loss: 0.74940 | err: 0.38043
batch 047 / 384 | loss: 0.77656 | err: 0.39362
batch 048 / 384 | loss: 0.77316 | err: 0.39583
batch 049 / 384 | loss: 0.76334 | err: 0.38776
batch 050 / 384 | loss: 0.74807 | err: 0.38000
batch 051 / 384 | loss: 0.74883 | err: 0.38235
batch 052 / 384 | loss: 0.74770 | err: 0.38462
batch 053 / 384 | loss: 0.74303 | err: 0.37736
batch 054 / 384 | loss: 0.74210 | err: 0.37963
batch 055 / 384 | loss: 0.75729 | err: 0.39091
batch 056 / 384 | loss: 0.75960 | err: 0.39286
batch 057 / 384 | loss: 0.76203 | err: 0.39474
batch 058 / 384 | loss: 0.75426 | err: 0.38793
batch 059 / 384 | loss: 0.74818 | err: 0.38136
batch 060 / 384 | loss: 0.73571 | err: 0.37500
batch 061 / 384 | loss: 0.73222 | err: 0.36885
batch 062 / 384 | loss: 0.72439 | err: 0.36290
batch 063 / 3

batch 217 / 384 | loss: 0.79035 | err: 0.38018
batch 218 / 384 | loss: 0.79431 | err: 0.38303
batch 219 / 384 | loss: 0.79361 | err: 0.38356
batch 220 / 384 | loss: 0.79226 | err: 0.38182
batch 221 / 384 | loss: 0.79215 | err: 0.38235
batch 222 / 384 | loss: 0.79385 | err: 0.38288
batch 223 / 384 | loss: 0.79268 | err: 0.38117
batch 224 / 384 | loss: 0.79037 | err: 0.37946
batch 225 / 384 | loss: 0.79329 | err: 0.38000
batch 226 / 384 | loss: 0.79154 | err: 0.37832
batch 227 / 384 | loss: 0.79156 | err: 0.37885
batch 228 / 384 | loss: 0.79287 | err: 0.37939
batch 229 / 384 | loss: 0.78941 | err: 0.37773
batch 230 / 384 | loss: 0.78647 | err: 0.37609
batch 231 / 384 | loss: 0.78617 | err: 0.37662
batch 232 / 384 | loss: 0.78278 | err: 0.37500
batch 233 / 384 | loss: 0.78098 | err: 0.37339
batch 234 / 384 | loss: 0.78168 | err: 0.37393
batch 235 / 384 | loss: 0.78432 | err: 0.37447
batch 236 / 384 | loss: 0.78100 | err: 0.37288
batch 237 / 384 | loss: 0.78087 | err: 0.37342
batch 238 / 3

batch 006 / 384 | loss: 0.70676 | err: 0.41667
batch 007 / 384 | loss: 0.72924 | err: 0.42857
batch 008 / 384 | loss: 0.74716 | err: 0.43750
batch 009 / 384 | loss: 0.76815 | err: 0.44444
batch 010 / 384 | loss: 0.69134 | err: 0.40000
batch 011 / 384 | loss: 0.81649 | err: 0.40909
batch 012 / 384 | loss: 0.81915 | err: 0.41667
batch 013 / 384 | loss: 0.82178 | err: 0.42308
batch 014 / 384 | loss: 0.83379 | err: 0.42857
batch 015 / 384 | loss: 0.77820 | err: 0.40000
batch 016 / 384 | loss: 0.84636 | err: 0.40625
batch 017 / 384 | loss: 0.83375 | err: 0.41176
batch 018 / 384 | loss: 0.85335 | err: 0.44444
batch 019 / 384 | loss: 0.90674 | err: 0.47368
batch 020 / 384 | loss: 0.95661 | err: 0.50000
batch 021 / 384 | loss: 0.97354 | err: 0.50000
batch 022 / 384 | loss: 0.98196 | err: 0.50000
batch 023 / 384 | loss: 0.95212 | err: 0.47826
batch 024 / 384 | loss: 0.94976 | err: 0.47917
batch 025 / 384 | loss: 0.93480 | err: 0.48000
batch 026 / 384 | loss: 0.92069 | err: 0.48077
batch 027 / 3

batch 181 / 384 | loss: 0.79511 | err: 0.35912
batch 182 / 384 | loss: 0.79074 | err: 0.35714
batch 183 / 384 | loss: 0.79047 | err: 0.35792
batch 184 / 384 | loss: 0.79047 | err: 0.35870
batch 185 / 384 | loss: 0.79428 | err: 0.35946
batch 186 / 384 | loss: 0.79568 | err: 0.36022
batch 187 / 384 | loss: 0.79548 | err: 0.36096
batch 188 / 384 | loss: 0.79125 | err: 0.35904
batch 189 / 384 | loss: 0.79459 | err: 0.35979
batch 190 / 384 | loss: 0.79040 | err: 0.35789
batch 191 / 384 | loss: 0.79126 | err: 0.35864
batch 192 / 384 | loss: 0.78770 | err: 0.35677
batch 193 / 384 | loss: 0.78907 | err: 0.35751
batch 194 / 384 | loss: 0.79442 | err: 0.35825
batch 195 / 384 | loss: 0.79692 | err: 0.35897
batch 196 / 384 | loss: 0.79683 | err: 0.35969
batch 197 / 384 | loss: 0.79737 | err: 0.36041
batch 198 / 384 | loss: 0.80007 | err: 0.36111
batch 199 / 384 | loss: 0.79688 | err: 0.35930
batch 200 / 384 | loss: 0.79613 | err: 0.36000
batch 201 / 384 | loss: 0.79485 | err: 0.35821
batch 202 / 3

batch 357 / 384 | loss: 0.79241 | err: 0.36835
batch 358 / 384 | loss: 0.79516 | err: 0.37011
batch 359 / 384 | loss: 0.79452 | err: 0.37047
batch 360 / 384 | loss: 0.79311 | err: 0.36944
batch 361 / 384 | loss: 0.79303 | err: 0.36981
batch 362 / 384 | loss: 0.79493 | err: 0.37155
batch 363 / 384 | loss: 0.79490 | err: 0.37190
batch 364 / 384 | loss: 0.79336 | err: 0.37088
batch 365 / 384 | loss: 0.79119 | err: 0.36986
batch 366 / 384 | loss: 0.79014 | err: 0.36885
batch 367 / 384 | loss: 0.79254 | err: 0.36921
batch 368 / 384 | loss: 0.79287 | err: 0.36957
batch 369 / 384 | loss: 0.79375 | err: 0.36992
batch 370 / 384 | loss: 0.79276 | err: 0.36892
batch 371 / 384 | loss: 0.79062 | err: 0.36792
batch 372 / 384 | loss: 0.79066 | err: 0.36828
batch 373 / 384 | loss: 0.78946 | err: 0.36729
batch 374 / 384 | loss: 0.78921 | err: 0.36765
batch 375 / 384 | loss: 0.78988 | err: 0.36800
batch 376 / 384 | loss: 0.78880 | err: 0.36702
batch 377 / 384 | loss: 0.78670 | err: 0.36605
batch 378 / 3

batch 146 / 384 | loss: 0.83799 | err: 0.38699
batch 147 / 384 | loss: 0.83722 | err: 0.38776
batch 148 / 384 | loss: 0.83726 | err: 0.38851
batch 149 / 384 | loss: 0.83400 | err: 0.38591
batch 150 / 384 | loss: 0.83650 | err: 0.38667
batch 151 / 384 | loss: 0.83096 | err: 0.38411
batch 152 / 384 | loss: 0.83110 | err: 0.38487
batch 153 / 384 | loss: 0.83594 | err: 0.38889
batch 154 / 384 | loss: 0.83574 | err: 0.38961
batch 155 / 384 | loss: 0.84232 | err: 0.39355
batch 156 / 384 | loss: 0.84725 | err: 0.39744
batch 157 / 384 | loss: 0.84610 | err: 0.39490
batch 158 / 384 | loss: 0.84155 | err: 0.39241
batch 159 / 384 | loss: 0.84228 | err: 0.39308
batch 160 / 384 | loss: 0.84750 | err: 0.39687
batch 161 / 384 | loss: 0.84391 | err: 0.39441
batch 162 / 384 | loss: 0.84276 | err: 0.39506
batch 163 / 384 | loss: 0.84483 | err: 0.39571
batch 164 / 384 | loss: 0.84504 | err: 0.39634
batch 165 / 384 | loss: 0.85021 | err: 0.40000
batch 166 / 384 | loss: 0.85526 | err: 0.40361
batch 167 / 3

batch 320 / 384 | loss: 0.82215 | err: 0.38906
batch 321 / 384 | loss: 0.82254 | err: 0.38941
batch 322 / 384 | loss: 0.82372 | err: 0.38975
batch 323 / 384 | loss: 0.82609 | err: 0.39009
batch 324 / 384 | loss: 0.82644 | err: 0.39043
batch 325 / 384 | loss: 0.82682 | err: 0.39077
batch 326 / 384 | loss: 0.82716 | err: 0.39110
batch 327 / 384 | loss: 0.82717 | err: 0.39144
batch 328 / 384 | loss: 0.82465 | err: 0.39024
batch 329 / 384 | loss: 0.82460 | err: 0.39058
batch 330 / 384 | loss: 0.82476 | err: 0.39091
batch 331 / 384 | loss: 0.82257 | err: 0.38973
batch 332 / 384 | loss: 0.82235 | err: 0.39006
batch 333 / 384 | loss: 0.82478 | err: 0.39189
batch 334 / 384 | loss: 0.82514 | err: 0.39222
batch 335 / 384 | loss: 0.82333 | err: 0.39104
batch 336 / 384 | loss: 0.82355 | err: 0.39137
batch 337 / 384 | loss: 0.82336 | err: 0.39169
batch 338 / 384 | loss: 0.82092 | err: 0.39053
batch 339 / 384 | loss: 0.82210 | err: 0.39086
batch 340 / 384 | loss: 0.82013 | err: 0.38971
batch 341 / 3

batch 110 / 384 | loss: 0.74579 | err: 0.34545
batch 111 / 384 | loss: 0.74494 | err: 0.34685
batch 112 / 384 | loss: 0.74554 | err: 0.34821
batch 113 / 384 | loss: 0.75763 | err: 0.35398
batch 114 / 384 | loss: 0.75098 | err: 0.35088
batch 115 / 384 | loss: 0.74445 | err: 0.34783
batch 116 / 384 | loss: 0.73831 | err: 0.34483
batch 117 / 384 | loss: 0.73575 | err: 0.34188
batch 118 / 384 | loss: 0.73624 | err: 0.34322
batch 119 / 384 | loss: 0.73005 | err: 0.34034
batch 120 / 384 | loss: 0.72842 | err: 0.34167
batch 121 / 384 | loss: 0.72658 | err: 0.34298
batch 122 / 384 | loss: 0.72195 | err: 0.34016
batch 123 / 384 | loss: 0.73110 | err: 0.34553
batch 124 / 384 | loss: 0.74246 | err: 0.35081
batch 125 / 384 | loss: 0.74247 | err: 0.35200
batch 126 / 384 | loss: 0.74598 | err: 0.35317
batch 127 / 384 | loss: 0.74933 | err: 0.35433
batch 128 / 384 | loss: 0.74670 | err: 0.35156
batch 129 / 384 | loss: 0.74422 | err: 0.34884
batch 130 / 384 | loss: 0.74429 | err: 0.34615
batch 131 / 3

batch 286 / 384 | loss: 0.75243 | err: 0.34790
batch 287 / 384 | loss: 0.75388 | err: 0.34843
batch 288 / 384 | loss: 0.75332 | err: 0.34896
batch 289 / 384 | loss: 0.75429 | err: 0.34948
batch 290 / 384 | loss: 0.75434 | err: 0.35000
batch 291 / 384 | loss: 0.75457 | err: 0.35052
batch 292 / 384 | loss: 0.75301 | err: 0.34932
batch 293 / 384 | loss: 0.75542 | err: 0.34983
batch 294 / 384 | loss: 0.75725 | err: 0.35204
batch 295 / 384 | loss: 0.75845 | err: 0.35254
batch 296 / 384 | loss: 0.75831 | err: 0.35304
batch 297 / 384 | loss: 0.75619 | err: 0.35185
batch 298 / 384 | loss: 0.75616 | err: 0.35235
batch 299 / 384 | loss: 0.75659 | err: 0.35284
batch 300 / 384 | loss: 0.75526 | err: 0.35167
batch 301 / 384 | loss: 0.75575 | err: 0.35216
batch 302 / 384 | loss: 0.75559 | err: 0.35265
batch 303 / 384 | loss: 0.75601 | err: 0.35314
batch 304 / 384 | loss: 0.75352 | err: 0.35197
batch 305 / 384 | loss: 0.75193 | err: 0.35082
batch 306 / 384 | loss: 0.75630 | err: 0.35131
batch 307 / 3

batch 074 / 192 | loss: 0.71768 | err: 0.32432
batch 075 / 192 | loss: 0.72158 | err: 0.32667
batch 076 / 192 | loss: 0.72085 | err: 0.32895
batch 077 / 192 | loss: 0.71516 | err: 0.32792
batch 078 / 192 | loss: 0.71138 | err: 0.32692
batch 079 / 192 | loss: 0.70238 | err: 0.32278
batch 080 / 192 | loss: 0.69851 | err: 0.32188
batch 081 / 192 | loss: 0.69726 | err: 0.32099
batch 082 / 192 | loss: 0.69371 | err: 0.32012
batch 083 / 192 | loss: 0.68663 | err: 0.31627
batch 084 / 192 | loss: 0.68149 | err: 0.31250
batch 085 / 192 | loss: 0.67633 | err: 0.30882
batch 086 / 192 | loss: 0.67185 | err: 0.30523
batch 087 / 192 | loss: 0.67081 | err: 0.30460
batch 088 / 192 | loss: 0.67190 | err: 0.30682
batch 089 / 192 | loss: 0.67170 | err: 0.30899
batch 090 / 192 | loss: 0.67893 | err: 0.31111
batch 091 / 192 | loss: 0.68075 | err: 0.31319
batch 092 / 192 | loss: 0.68594 | err: 0.31522
batch 093 / 192 | loss: 0.68602 | err: 0.31452
batch 094 / 192 | loss: 0.69185 | err: 0.31649
batch 095 / 1

batch 055 / 192 | loss: 0.75164 | err: 0.37727
batch 056 / 192 | loss: 0.74207 | err: 0.37054
batch 057 / 192 | loss: 0.74972 | err: 0.37719
batch 058 / 192 | loss: 0.75151 | err: 0.37931
batch 059 / 192 | loss: 0.76395 | err: 0.38136
batch 060 / 192 | loss: 0.76080 | err: 0.37917
batch 061 / 192 | loss: 0.75890 | err: 0.38115
batch 062 / 192 | loss: 0.76126 | err: 0.38306
batch 063 / 192 | loss: 0.75986 | err: 0.38095
batch 064 / 192 | loss: 0.77223 | err: 0.38672
batch 065 / 192 | loss: 0.77056 | err: 0.38462
batch 066 / 192 | loss: 0.77242 | err: 0.38636
batch 067 / 192 | loss: 0.77033 | err: 0.38433
batch 068 / 192 | loss: 0.75900 | err: 0.37868
batch 069 / 192 | loss: 0.74800 | err: 0.37319
batch 070 / 192 | loss: 0.74958 | err: 0.37500
batch 071 / 192 | loss: 0.74060 | err: 0.36972
batch 072 / 192 | loss: 0.74805 | err: 0.37500
batch 073 / 192 | loss: 0.74384 | err: 0.37329
batch 074 / 192 | loss: 0.73883 | err: 0.37162
batch 075 / 192 | loss: 0.73122 | err: 0.36667
batch 076 / 1

batch 036 / 192 | loss: 0.87484 | err: 0.40278
batch 037 / 192 | loss: 0.88082 | err: 0.40541
batch 038 / 192 | loss: 0.88226 | err: 0.40789
batch 039 / 192 | loss: 0.87327 | err: 0.40385
batch 040 / 192 | loss: 0.89773 | err: 0.41875
batch 041 / 192 | loss: 0.89860 | err: 0.42073
batch 042 / 192 | loss: 0.90489 | err: 0.42262
batch 043 / 192 | loss: 0.90049 | err: 0.41860
batch 044 / 192 | loss: 0.88055 | err: 0.40909
batch 045 / 192 | loss: 0.88385 | err: 0.41111
batch 046 / 192 | loss: 0.89319 | err: 0.41848
batch 047 / 192 | loss: 0.87620 | err: 0.40957
batch 048 / 192 | loss: 0.87219 | err: 0.40625
batch 049 / 192 | loss: 0.87366 | err: 0.40816
batch 050 / 192 | loss: 0.88460 | err: 0.41000
batch 051 / 192 | loss: 0.88475 | err: 0.41176
batch 052 / 192 | loss: 0.89847 | err: 0.41827
batch 053 / 192 | loss: 0.90100 | err: 0.41981
batch 054 / 192 | loss: 0.89552 | err: 0.41667
batch 055 / 192 | loss: 0.88748 | err: 0.41364
batch 056 / 192 | loss: 0.87945 | err: 0.41071
batch 057 / 1

batch 016 / 192 | loss: 0.84921 | err: 0.42188
batch 017 / 192 | loss: 0.84530 | err: 0.42647
batch 018 / 192 | loss: 0.83027 | err: 0.41667
batch 019 / 192 | loss: 0.82559 | err: 0.42105
batch 020 / 192 | loss: 0.79316 | err: 0.40000
batch 021 / 192 | loss: 0.81795 | err: 0.41667
batch 022 / 192 | loss: 0.82412 | err: 0.42045
batch 023 / 192 | loss: 0.81986 | err: 0.42391
batch 024 / 192 | loss: 0.81172 | err: 0.41667
batch 025 / 192 | loss: 0.79564 | err: 0.41000
batch 026 / 192 | loss: 0.77387 | err: 0.39423
batch 027 / 192 | loss: 0.77569 | err: 0.38889
batch 028 / 192 | loss: 0.76370 | err: 0.38393
batch 029 / 192 | loss: 0.74390 | err: 0.37069
batch 030 / 192 | loss: 0.73870 | err: 0.36667
batch 031 / 192 | loss: 0.75254 | err: 0.37097
batch 032 / 192 | loss: 0.75923 | err: 0.37500
batch 033 / 192 | loss: 0.75860 | err: 0.37121
batch 034 / 192 | loss: 0.74575 | err: 0.36029
batch 035 / 192 | loss: 0.73261 | err: 0.35714
batch 036 / 192 | loss: 0.71241 | err: 0.34722
batch 037 / 1

batch 191 / 192 | loss: 0.74634 | err: 0.35733
batch 192 / 192 | loss: 0.74468 | err: 0.35677
------------- epoch 004 / 005 | time: 061 sec | loss: 0.79755 | err: 0.41797
batch 001 / 192 | loss: 0.66643 | err: 0.25000
batch 002 / 192 | loss: 0.92710 | err: 0.37500
batch 003 / 192 | loss: 0.84885 | err: 0.33333
batch 004 / 192 | loss: 0.66389 | err: 0.25000
batch 005 / 192 | loss: 0.83197 | err: 0.35000
batch 006 / 192 | loss: 0.87784 | err: 0.37500
batch 007 / 192 | loss: 0.95441 | err: 0.42857
batch 008 / 192 | loss: 0.92092 | err: 0.43750
batch 009 / 192 | loss: 0.83207 | err: 0.38889
batch 010 / 192 | loss: 0.75655 | err: 0.35000
batch 011 / 192 | loss: 0.77979 | err: 0.36364
batch 012 / 192 | loss: 0.81678 | err: 0.35417
batch 013 / 192 | loss: 0.82516 | err: 0.36538
batch 014 / 192 | loss: 0.82891 | err: 0.37500
batch 015 / 192 | loss: 0.83184 | err: 0.38333
batch 016 / 192 | loss: 0.81249 | err: 0.37500
batch 017 / 192 | loss: 0.77330 | err: 0.35294
batch 018 / 192 | loss: 0.7371

batch 172 / 192 | loss: 0.74093 | err: 0.35756
batch 173 / 192 | loss: 0.74592 | err: 0.35838
batch 174 / 192 | loss: 0.74860 | err: 0.35920
batch 175 / 192 | loss: 0.74902 | err: 0.35857
batch 176 / 192 | loss: 0.75431 | err: 0.36080
batch 177 / 192 | loss: 0.75526 | err: 0.36017
batch 178 / 192 | loss: 0.75953 | err: 0.36236
batch 179 / 192 | loss: 0.76232 | err: 0.36313
batch 180 / 192 | loss: 0.75886 | err: 0.36111
batch 181 / 192 | loss: 0.75520 | err: 0.35912
batch 182 / 192 | loss: 0.75320 | err: 0.35852
batch 183 / 192 | loss: 0.75125 | err: 0.35792
batch 184 / 192 | loss: 0.75340 | err: 0.35870
batch 185 / 192 | loss: 0.75619 | err: 0.35946
batch 186 / 192 | loss: 0.75904 | err: 0.36022
batch 187 / 192 | loss: 0.76125 | err: 0.36096
batch 188 / 192 | loss: 0.75940 | err: 0.36037
batch 189 / 192 | loss: 0.75539 | err: 0.35847
batch 190 / 192 | loss: 0.75432 | err: 0.35789
batch 191 / 192 | loss: 0.75307 | err: 0.35733
batch 192 / 192 | loss: 0.74969 | err: 0.35547
model saved!


batch 054 / 096 | loss: 0.71072 | err: 0.36806
batch 055 / 096 | loss: 0.71043 | err: 0.36818
batch 056 / 096 | loss: 0.70736 | err: 0.36607
batch 057 / 096 | loss: 0.71025 | err: 0.36623
batch 058 / 096 | loss: 0.70566 | err: 0.36638
batch 059 / 096 | loss: 0.69737 | err: 0.36017
batch 060 / 096 | loss: 0.69166 | err: 0.35625
batch 061 / 096 | loss: 0.68570 | err: 0.35246
batch 062 / 096 | loss: 0.68236 | err: 0.35081
batch 063 / 096 | loss: 0.69276 | err: 0.35714
batch 064 / 096 | loss: 0.69003 | err: 0.35742
batch 065 / 096 | loss: 0.68457 | err: 0.35385
batch 066 / 096 | loss: 0.68466 | err: 0.35417
batch 067 / 096 | loss: 0.67912 | err: 0.35261
batch 068 / 096 | loss: 0.67932 | err: 0.35294
batch 069 / 096 | loss: 0.67579 | err: 0.35145
batch 070 / 096 | loss: 0.68553 | err: 0.35357
batch 071 / 096 | loss: 0.69155 | err: 0.35563
batch 072 / 096 | loss: 0.68481 | err: 0.35069
batch 073 / 096 | loss: 0.68019 | err: 0.34932
batch 074 / 096 | loss: 0.68395 | err: 0.35135
batch 075 / 0

batch 033 / 096 | loss: 0.75300 | err: 0.38258
batch 034 / 096 | loss: 0.74366 | err: 0.37868
batch 035 / 096 | loss: 0.74586 | err: 0.37857
batch 036 / 096 | loss: 0.74583 | err: 0.37153
batch 037 / 096 | loss: 0.75431 | err: 0.37838
batch 038 / 096 | loss: 0.75601 | err: 0.37829
batch 039 / 096 | loss: 0.75569 | err: 0.38141
batch 040 / 096 | loss: 0.75023 | err: 0.37812
batch 041 / 096 | loss: 0.74393 | err: 0.37500
batch 042 / 096 | loss: 0.74216 | err: 0.37798
batch 043 / 096 | loss: 0.73476 | err: 0.37500
batch 044 / 096 | loss: 0.73674 | err: 0.37784
batch 045 / 096 | loss: 0.72496 | err: 0.36944
batch 046 / 096 | loss: 0.72680 | err: 0.36957
batch 047 / 096 | loss: 0.72597 | err: 0.36702
batch 048 / 096 | loss: 0.72978 | err: 0.36979
batch 049 / 096 | loss: 0.72515 | err: 0.36735
batch 050 / 096 | loss: 0.72052 | err: 0.36250
batch 051 / 096 | loss: 0.73554 | err: 0.36765
batch 052 / 096 | loss: 0.73314 | err: 0.36538
batch 053 / 096 | loss: 0.72789 | err: 0.36085
batch 054 / 0

batch 012 / 048 | loss: 0.83261 | err: 0.40104
batch 013 / 048 | loss: 0.86307 | err: 0.41346
batch 014 / 048 | loss: 0.85845 | err: 0.42411
batch 015 / 048 | loss: 0.85537 | err: 0.42083
batch 016 / 048 | loss: 0.83367 | err: 0.41016
batch 017 / 048 | loss: 0.83172 | err: 0.41544
batch 018 / 048 | loss: 0.82203 | err: 0.40972
batch 019 / 048 | loss: 0.81618 | err: 0.41118
batch 020 / 048 | loss: 0.79036 | err: 0.40000
batch 021 / 048 | loss: 0.77189 | err: 0.38988
batch 022 / 048 | loss: 0.75964 | err: 0.38068
batch 023 / 048 | loss: 0.76059 | err: 0.38315
batch 024 / 048 | loss: 0.76092 | err: 0.38021
batch 025 / 048 | loss: 0.75803 | err: 0.38000
batch 026 / 048 | loss: 0.75035 | err: 0.37500
batch 027 / 048 | loss: 0.75444 | err: 0.37500
batch 028 / 048 | loss: 0.75346 | err: 0.37723
batch 029 / 048 | loss: 0.75503 | err: 0.37500
batch 030 / 048 | loss: 0.75744 | err: 0.37708
batch 031 / 048 | loss: 0.75711 | err: 0.37903
batch 032 / 048 | loss: 0.74410 | err: 0.37109
batch 033 / 0

batch 037 / 048 | loss: 0.68212 | err: 0.33953
batch 038 / 048 | loss: 0.68088 | err: 0.34046
batch 039 / 048 | loss: 0.67869 | err: 0.33814
batch 040 / 048 | loss: 0.67500 | err: 0.33437
batch 041 / 048 | loss: 0.67628 | err: 0.33384
batch 042 / 048 | loss: 0.67513 | err: 0.33333
batch 043 / 048 | loss: 0.67264 | err: 0.32994
batch 044 / 048 | loss: 0.66642 | err: 0.32670
batch 045 / 048 | loss: 0.66726 | err: 0.32778
batch 046 / 048 | loss: 0.66872 | err: 0.32609
batch 047 / 048 | loss: 0.66793 | err: 0.32580
batch 048 / 048 | loss: 0.66331 | err: 0.32292
------------- epoch 004 / 005 | time: 029 sec | loss: 0.74580 | err: 0.37500
batch 001 / 048 | loss: 0.75050 | err: 0.37500
batch 002 / 048 | loss: 0.88819 | err: 0.50000
batch 003 / 048 | loss: 0.81576 | err: 0.41667
batch 004 / 048 | loss: 0.76209 | err: 0.39062
batch 005 / 048 | loss: 0.66555 | err: 0.33750
batch 006 / 048 | loss: 0.61986 | err: 0.30208
batch 007 / 048 | loss: 0.60673 | err: 0.29464
batch 008 / 048 | loss: 0.6727

batch 009 / 024 | loss: 0.72545 | err: 0.40278
batch 010 / 024 | loss: 0.72191 | err: 0.40000
batch 011 / 024 | loss: 0.71662 | err: 0.39489
batch 012 / 024 | loss: 0.70891 | err: 0.39062
batch 013 / 024 | loss: 0.69743 | err: 0.37740
batch 014 / 024 | loss: 0.68656 | err: 0.36384
batch 015 / 024 | loss: 0.67271 | err: 0.35417
batch 016 / 024 | loss: 0.65893 | err: 0.34570
batch 017 / 024 | loss: 0.67484 | err: 0.35478
batch 018 / 024 | loss: 0.69362 | err: 0.36632
batch 019 / 024 | loss: 0.68996 | err: 0.36513
batch 020 / 024 | loss: 0.68067 | err: 0.35938
batch 021 / 024 | loss: 0.68283 | err: 0.36012
batch 022 / 024 | loss: 0.67829 | err: 0.35653
batch 023 / 024 | loss: 0.67137 | err: 0.35326
batch 024 / 024 | loss: 0.67153 | err: 0.35286
------------- epoch 005 / 005 | time: 025 sec | loss: 0.71739 | err: 0.35547
training time: 140.9406497478485 seconds
batch 001 / 012 | loss: 0.97793 | err: 0.54688
batch 002 / 012 | loss: 0.89142 | err: 0.43750
batch 003 / 012 | loss: 0.85175 | er

In [48]:
training_datas = []

# distress 
X, Y = load_financial_distress_data()
X, Y, Xval, Yval = split_data(X, Y, 0.25)
n = len(X)
effective_n = n - n%24
X, Y = X[:effective_n], Y[:effective_n]
n = len(Xval)
effective_n = n - n%24
Xval, Yval = Xval[:effective_n], Yval[:effective_n]
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "epochs": 7,
                        "batch_size": 24,
                        "name": "distress"})

# fraud dataset
X, Y = load_card_fraud_data()
X, Y, Xval, Yval = split_data(X, Y, 0.25)
n = len(X)
effective_n = n - n%24
X, Y = X[:effective_n], Y[:effective_n]
n = len(Xval)
effective_n = n - n%24
Xval, Yval = Xval[:effective_n], Yval[:effective_n]
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "epochs": 7,
                        "batch_size": 24, 
                        "name": "fraud"})


# credit data
X, Y = load_credit_default_data()
X, Y = X[:3000], Y[:3000]
X, Y, Xval, Yval = split_data(X, Y, 0.25)
n = len(X)
effective_n = n - n%24
X, Y = X[:effective_n], Y[:effective_n]
n = len(Xval)
effective_n = n - n%24
Xval, Yval = Xval[:effective_n], Yval[:effective_n]
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "epochs": 7,
                        "batch_size": 64, 
                        "name": "credit"})

# spam dataset
X, Y = load_spam_data()
X, Y, Xval, Yval = split_data(X, Y, 0.25)
n = len(X)
effective_n = n - n%24
X, Y = X[:effective_n], Y[:effective_n]
n = len(Xval)
effective_n = n - n%24
Xval, Yval = Xval[:effective_n], Yval[:effective_n]
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "epochs": 7,
                        "batch_size": 128, 
                        "name": "spam"})

In [None]:
PATH = "C:/Users/sagil/Desktop/nir_project/models/real_dataset_runtimes"

for training_data in training_datas:
    path = PATH + "/" + training_data["name"]
    
    # load dataset
    X = training_data["X"]
    Y = training_data["Y"]
    Xval = training_data["Xval"]
    Yval = training_data["Yval"]
    
    # save dataset splits
    if not os.path.exists(path):
        os.makedirs(path)
    pd.DataFrame(X.numpy()).to_csv(path + '/X.csv')
    pd.DataFrame(Y.numpy()).to_csv(path + '/Y.csv')
    pd.DataFrame(Xval.numpy()).to_csv(path + '/Xval.csv')
    pd.DataFrame(Yval.numpy()).to_csv(path + '/Yval.csv')
    
    # training parameters
    x_dim = len(X[0])
    epochs = training_data["epochs"]
    batch_size = training_data["batch_size"]

    funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}
    funcs_batch = {"f": f_batch, "g": g_batch, "f_derivative": f_derivative_batch, "c": c_batch, "score": score}


    strategic_model = MyStrategicModel(x_dim, batch_size, funcs, funcs_batch, TRAIN_SLOPE, EVAL_SLOPE, scale=scale, strategic=True)
    strategic_model.fit(path, X, Y, Xval, Yval,
                opt=torch.optim.Adam, opt_kwargs={"lr": 5*(1e-1)},
                batch_size=batch_size, epochs=epochs, verbose=True,
               comment="batched")
    
    runtimes = [strategic_model.total_time, strategic_model.ccp_time]     
    pd.DataFrame(np.array(runtimes)).to_csv(path + '/results.csv')