In [91]:
%matplotlib notebook
import cvxpy as cp
import dccp
import torch
import numpy as np
from cvxpylayers.torch import CvxpyLayer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn.metrics import zero_one_loss, confusion_matrix
from scipy.io import arff
import pandas as pd
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.utils import shuffle
import matplotlib.patches as mpatches
import json
import random
import math
import os, psutil
from datetime import datetime
import torch.nn as nn
from torch.autograd import Variable
from torch.autograd.functional import jacobian

torch.set_default_dtype(torch.float64)
torch.manual_seed(0)
np.random.seed(0)

TRAIN_SLOPE = 1
EVAL_SLOPE = 5
X_LOWER_BOUND = -10
X_UPPER_BOUND = 10

  and should_run_async(code)


# Utils

In [92]:
def split_data(X, Y, percentage):
    num_val = int(len(X)*percentage)
    return X[num_val:], Y[num_val:], X[:num_val], Y[:num_val]

def shuffle(X, Y):
    data = torch.cat((X, Y), 1)
    data = data[torch.randperm(data.size()[0])]
    X = data[:, :2]
    Y = data[:, 2]
    return X, Y

def conf_mat(Y1, Y2):
    num_of_samples = len(Y1)
    mat = confusion_matrix(Y1, Y2, labels=[-1, 1])*100/num_of_samples
    acc = np.trace(mat)
    return mat, acc

def calc_accuracy(Y, Ypred):
    num = len(Y)
    temp = Y - Ypred
    acc = len(temp[temp == 0])*1./num
    return acc

# Dataset

In [93]:
def load_spam_data():
    torch.manual_seed(0)
    np.random.seed(0)
    path = r"C:\Users\sagil\Desktop\nir_project\tip_spam_data\IS_journal_tip_spam.arff"
    data, meta = arff.loadarff(path)
    df = pd.DataFrame(data)
    most_disc = ['qTips_plc', 'rating_plc', 'qEmail_tip', 'qContacts_tip', 'qURL_tip', 'qPhone_tip', 'qNumeriChar_tip', 'sentistrength_tip', 'combined_tip', 'qWords_tip', 'followers_followees_gph', 'qunigram_avg_tip', 'qTips_usr', 'indeg_gph', 'qCapitalChar_tip', 'class1']
    df = df[most_disc]
    df["class1"].replace({b'spam': -1, b'notspam': 1}, inplace=True)
    df = df.sample(frac=1, random_state=0).reset_index(drop=True)

    Y = df['class1'].values
    X = df.drop('class1', axis = 1).values
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    return torch.from_numpy(X), torch.from_numpy(Y)

# CCP classes

In [94]:
class CCP:
    def __init__(self, x_dim, h_dim, funcs):
        self.f_derivative = funcs["f_derivative"]
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.v = cp.Variable(h_dim)
        self.xt = cp.Parameter(x_dim)
        self.r = cp.Parameter(x_dim)
        self.w = cp.Parameter(x_dim)
        self.b = cp.Parameter(1)
        self.B_span = cp.Parameter((x_dim, h_dim))
        self.slope = cp.Parameter(1)
        

        target = self.x@self.f_derivative(self.xt, self.w, self.b, self.slope)-self.g(self.x, self.w, self.b, self.slope)-self.c(self.x, self.r, x_dim)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND,
                      self.B_span@self.v == self.x-self.r]
        self.prob = cp.Problem(cp.Maximize(target), constraints)
        
    def ccp(self, r, B_span):
        """
        numpy to numpy
        """
        self.xt.value = r
        self.r.value = r
        self.B_span.value = B_span
        result = self.prob.solve()
        diff = np.linalg.norm(self.xt.value - self.x.value)
        cnt = 0
        while diff > 0.0001 and cnt < 10:
            cnt += 1
            self.xt.value = self.x.value
            result = self.prob.solve()
            diff = np.linalg.norm(self.x.value - self.xt.value)
        return self.x.value
    
    def optimize_X(self, X, w, b, B_SPAN, slope):
        """
        tensor to tensor
        """
        X = X.numpy()
        w = w.detach().numpy()
        b = b.detach().numpy()
        B_SPAN = B_SPAN.numpy()
        slope = np.full(1, slope)
        
        self.w.value = w
        self.b.value = b
        self.slope.value = slope
        
        return torch.stack([torch.from_numpy(self.ccp(x, B_span)) for x, B_span in zip(X, B_SPAN)])

In [95]:
class DELTA():
    
    def __init__(self, x_dim, h_dim, funcs):
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.v = cp.Variable(h_dim)
        self.r = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.w = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.b = cp.Parameter(1, value = np.random.randn(1))
        self.f_der = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.B_span = cp.Parameter((x_dim, h_dim), value = np.random.randn(x_dim, h_dim))

        target = self.x@self.f_der-self.g(self.x, self.w, self.b, TRAIN_SLOPE)-self.c(self.x, self.r, x_dim)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND,
                      self.B_span@self.v == self.x-self.r]
        objective = cp.Maximize(target)
        problem = cp.Problem(objective, constraints)
        self.layer = CvxpyLayer(problem, parameters=[self.r, self.w, self.b, self.f_der, self.B_span],
                                variables=[self.x])
        
        
    def optimize_X(self, X, w, b, F_DER, B_SPAN):
        return self.layer(X, w, b, F_DER, B_SPAN)[0]

# Gain & Cost functions

In [96]:
def score(x, w, b):
    return x@w + b

def f(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) + 1)]), 2)

def g(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) - 1)]), 2)

def c(x, r, x_dim):
    return (1/x_dim)*cp.sum_squares(x-r)

def f_derivative(x, w, b, slope):
    return 0.5*cp.multiply(slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1)), w)

funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}

# CAE

In [97]:
class CAE(nn.Module):
    def __init__(self, x_dim, h_dim, lamb):
        super(CAE, self).__init__()
        
        self.lamb = lamb
        self.x_dim = x_dim
        self.h_dim = h_dim
        self.fc1 = nn.Linear(x_dim, h_dim, bias = True) # Encoder
        self.fc2 = nn.Linear(h_dim, x_dim, bias = True) # Decoder

        self.sigmoid = nn.Sigmoid()

    def encoder(self, x):
        return self.sigmoid(self.fc1(x))

    def decoder(self, z):
        return self.fc2(z)

    def forward(self, x):
            h1 = self.encoder(x)
            h2 = self.decoder(h1)
            return h1, h2
    
    def loss(self, x, recons_x, h):
        """Compute the Contractive AutoEncoder Loss
        Evalutes the CAE loss, which is composed as the summation of a Mean
        Squared Error and the weighted l2-norm of the Jacobian of the hidden
        units with respect to the inputs.
        See reference below for an in-depth discussion:
          #1: http://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder
        Args:
            `W` (FloatTensor): (N_hidden x N), where N_hidden and N are the
              dimensions of the hidden units and input respectively.
            `x` (Variable): the input to the network, with dims (N_batch x N)
            recons_x (Variable): the reconstruction of the input, with dims
              N_batch x N.
            `h` (Variable): the hidden units of the network, with dims
              batch_size x N_hidden
            `lam` (float): the weight given to the jacobian regulariser term
        Returns:
            Variable: the (scalar) CAE loss
        """

        mse_loss = nn.MSELoss(size_average = False)
        mse = mse_loss(recons_x, x)

        W = self.state_dict()['fc1.weight']
        # Since: W is shape of N_hidden x N. So, we do not need to transpose it as
        # opposed to #1
        dh = h * (1 - h) # Hadamard product produces size N_batch x N_hidden
        # Sum through the input dimension to improve efficiency, as suggested in #1
        w_sum = torch.sum(Variable(W)**2, dim=1)
        # unsqueeze to avoid issues with torch.mv
        w_sum = w_sum.unsqueeze(1) # shape N_hidden x 1
        contractive_loss = torch.sum(torch.mm(dh**2, w_sum), 0)
        return mse + contractive_loss.mul_(self.lamb)

    def fit(self, X, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False):
        train_dset = TensorDataset(X, torch.ones(len(X)))
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        opt = opt(self.parameters(), **opt_kwargs)
        
        for epoch in range(epochs):
            train_loss = 0
            self.train()
            for idx, (Xbatch, _) in enumerate(train_loader):
                Xbatch = Variable(Xbatch)
                opt.zero_grad()

                hidden_representation, recons_x = self.forward(Xbatch)

                # Get the weights
                # model.state_dict().keys()
                # change the key by seeing the keys manually.
                # (In future I will try to make it automatic)
                l = self.loss(Xbatch, recons_x, hidden_representation)

                l.backward()
                train_loss += l.data[0]
                opt.step()

                if idx % 10 == 0 and verbose:
                    print('Train epoch: {} [{}/{}({:.0f}%)]\t Loss: {:.6f}'.format(
                          epoch, idx*len(Xbatch), len(train_loader.dataset),
                          100*idx/len(train_loader),
                          l.data[0]/len(Xbatch)))
            if verbose:
                print('====> Epoch: {} Average loss: {:.4f}'.format(
                     epoch, train_loss / len(train_loader.dataset)))

# Model

In [98]:
class MyStrategicModel(torch.nn.Module):
    def __init__(self, x_dim, cae, funcs, train_slope, eval_slope, strategic=False):
        torch.manual_seed(0)
        np.random.seed(0)
        super(MyStrategicModel, self).__init__()
        self.x_dim = x_dim
        self.h_dim = cae.h_dim
        self.train_slope, self.eval_slope = train_slope, eval_slope
        self.w = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(x_dim, dtype=torch.float64, requires_grad=True)))
        self.b = torch.nn.parameter.Parameter(torch.rand(1, dtype=torch.float64, requires_grad=True))
        self.strategic = strategic
        self.ccp = CCP(self.x_dim, self.h_dim, funcs)
        self.delta = DELTA(self.x_dim, self.h_dim, funcs)
        self.cae = cae

    def forward(self, X, evaluation=False):
        if self.strategic:
            B_SPANS = self.get_spans(X)
            
            if evaluation:
                XT = self.ccp.optimize_X(X, self.w, self.b, B_SPANS, self.eval_slope)
                X_opt = XT
            else:
                XT = self.ccp.optimize_X(X, self.w, self.b, B_SPANS, self.train_slope)
                F_DER = self.get_f_ders(XT, self.train_slope)
                X_opt = self.delta.optimize_X(X, self.w, self.b, F_DER, B_SPANS) # Xopt should be equal to XT but we do it again for the gradients
            output = self.score(X_opt)
        else:
            output = self.score(X)        
        return output
    
    
    def get_spans(self, X):
        def func(x):
            return self.cae(x)[0]
        
        eps = 0.01
        B_SPANS = []
        for x in X:
            J = jacobian(func, x)
            U, S, _ = torch.svd(J.T)
            B_span = U
#             B_span = U[:, S>eps]
#             if B_span.size()[1] < self.h_dim:
#                 pad = torch.zeros((x_dim, self.h_dim-B_span.size()[1]))
#                 B_span = torch.cat((B_span, pad), 1)
            B_SPANS.append(B_span)
            
        return torch.stack(B_SPANS)
        
    
    def optimize_X(self, X, evaluation=False):
        slope = self.eval_slope if evaluation else self.train_slope
        return self.ccp.optimize_X(X, self.w, self.b, slope)
    
    def score(self, x):
        return x@self.w + self.b
    
    def get_f_ders(self, XT, slope):
        return torch.stack([0.5*slope*((slope*self.score(xt) + 1)/torch.sqrt((slope*self.score(xt) + 1)**2 + 1))*self.w for xt in XT])

    def calc_accuracy(self, Y, Y_pred):
        Y_pred = torch.sign(Y_pred)
        num = len(Y)
        temp = Y - Y_pred
        acc = len(temp[temp == 0])*1./num        
        return acc
    
    def evaluate(self, X, Y):      
        return self.calc_accuracy(Y, self.forward(X, evaluation=True))
    
    def loss(self, Y, Y_pred):
        return torch.mean(torch.clamp(1 - Y_pred * Y, min=0))
    
    def save_model(self, train_errors, val_errors, train_losses, val_losses, info, path, comment=None):
        if comment is not None:
            path += "_____" + comment
            
        filename = path + "/model.pt"
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
        torch.save(self.state_dict(), filename)
        
        with open(path + "/info.txt", "w") as f:
            f.write(info)
    
    def load_model(self, filename):
        self.load_state_dict(torch.load(filename))
        self.eval()
    
    def fit(self, X, Y, Xval, Yval, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False, callback=None, comment=None):
        train_dset = TensorDataset(X, Y)
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        opt = opt(self.parameters(), **opt_kwargs)

        train_losses = []
        val_losses = []
        train_errors = []
        val_errors = []
        
        best_val_error = 1
        consecutive_no_improvement = 0
        now = datetime.now()
        path = "C:/Users/sagil/Desktop/nir_project/models/manifold/" + now.strftime("%d-%m-%Y_%H-%M-%S")

        total_time = time.time()
        for epoch in range(epochs):
            t1 = time.time()
            batch = 1
            train_losses.append([])
            train_errors.append([])
            for Xbatch, Ybatch in train_loader:
                opt.zero_grad()
                Ybatch_pred = self.forward(Xbatch)
                l = self.loss(Ybatch, Ybatch_pred)
                l.backward()
                opt.step()
                train_losses[-1].append(l.item())
                with torch.no_grad():
                    e = self.calc_accuracy(Ybatch, Ybatch_pred)
                    train_errors[-1].append(1-e)
                if verbose:
                    print("batch %03d / %03d | loss: %3.5f | err: %3.5f" %
                          (batch, len(train_loader), np.mean(train_losses[-1]), np.mean(train_errors[-1])))
                batch += 1
                if callback is not None:
                    callback()

            with torch.no_grad():
                Yval_pred = self.forward(Xval, evaluation=True)
                val_loss = self.loss(Yval, Yval_pred).item()
                val_losses.append(val_loss)
                val_error = 1-self.calc_accuracy(Yval, Yval_pred)
                val_errors.append(val_error)
                if val_error < best_val_error:
                    consecutive_no_improvement = 0
                    best_val_error = val_error
                    if self.strategic:
                        info = "training time in seconds: {}\nepoch: {}\nbatch size: {}\ntrain slope: {}\neval slope: {}\nlearning rate: {}\nvalidation loss: {}\nvalidation error: {}\n".format(
                        time.time()-total_time, epoch, batch_size, self.train_slope, self.eval_slope, opt_kwargs["lr"], val_loss, val_error)
                        self.save_model(train_errors, val_errors, train_losses, val_losses, info, path, comment)
                        print("model saved!")
                else:
                    consecutive_no_improvement += 1
                    if consecutive_no_improvement >= 4:
                        break
                
            t2 = time.time()
            if verbose:
                print("----- epoch %03d / %03d | time: %03d sec | loss: %3.5f | err: %3.5f" % (epoch + 1, epochs, t2-t1, val_losses[-1], val_errors[-1]))
        print("training time: {} seconds".format(time.time()-total_time)) 
        return train_errors, val_errors, train_losses, val_losses

# Data generation

In [99]:
X, Y = load_spam_data()
X, Y, Xval, Yval = split_data(X, Y, 0.25)

print("percent of positive samples: {}%".format(100 * len(Y[Y == 1]) / len(Y)))

percent of positive samples: 49.820991143772375%


# Train

In [101]:
EPOCHS = 10
BATCH_SIZE = 128
LAMBDA = 0.001

x_dim = len(X[0])
h_dim = x_dim - 3
cae = CAE(x_dim, h_dim, LAMBDA)
cae.fit(X, opt=torch.optim.Adam, opt_kwargs={"lr": (1e-2)}, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)

for param in cae.parameters():
    param.requires_grad = False

# MAKE SURE THIS DISABLES TUNING OF THE CAE

====> Epoch: 0 Average loss: 12.3829
====> Epoch: 1 Average loss: 7.3771
====> Epoch: 2 Average loss: 5.2693
====> Epoch: 3 Average loss: 4.0380
====> Epoch: 4 Average loss: 3.2523
====> Epoch: 5 Average loss: 2.7254
====> Epoch: 6 Average loss: 2.3476
====> Epoch: 7 Average loss: 2.0843
====> Epoch: 8 Average loss: 1.8680
====> Epoch: 9 Average loss: 1.6992


In [102]:
EPOCHS = 5
BATCH_SIZE = 128

# non-strategic classification
print("---------- training non-strategically----------")
non_strategic_model = MyStrategicModel(x_dim, cae, funcs, TRAIN_SLOPE, EVAL_SLOPE, strategic=False)

fit_res_non_strategic = non_strategic_model.fit(X, Y, Xval, Yval,
                                opt=torch.optim.Adam, opt_kwargs={"lr": (1e-1)},
                                batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)

# strategic classification
print("---------- training strategically----------")
strategic_model = MyStrategicModel(x_dim, cae, funcs, TRAIN_SLOPE, EVAL_SLOPE, strategic=True)

fit_res_strategic = strategic_model.fit(X, Y, Xval, Yval,
                                opt=torch.optim.Adam, opt_kwargs={"lr": (1e-1)},
                                batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)

---------- training non-strategically----------
batch 001 / 042 | loss: 1.03230 | err: 0.46875
batch 002 / 042 | loss: 0.85961 | err: 0.37891
batch 003 / 042 | loss: 0.76384 | err: 0.34375
batch 004 / 042 | loss: 0.67284 | err: 0.30078
batch 005 / 042 | loss: 0.63510 | err: 0.27656
batch 006 / 042 | loss: 0.59659 | err: 0.25651
batch 007 / 042 | loss: 0.59714 | err: 0.25446
batch 008 / 042 | loss: 0.57212 | err: 0.24414
batch 009 / 042 | loss: 0.56225 | err: 0.23698
batch 010 / 042 | loss: 0.55123 | err: 0.23203
batch 011 / 042 | loss: 0.54602 | err: 0.23011
batch 012 / 042 | loss: 0.52964 | err: 0.22461
batch 013 / 042 | loss: 0.52774 | err: 0.22536
batch 014 / 042 | loss: 0.52693 | err: 0.22154
batch 015 / 042 | loss: 0.52067 | err: 0.21875
batch 016 / 042 | loss: 0.50931 | err: 0.21338
batch 017 / 042 | loss: 0.51080 | err: 0.21232
batch 018 / 042 | loss: 0.50639 | err: 0.21094
batch 019 / 042 | loss: 0.50278 | err: 0.20929
batch 020 / 042 | loss: 0.50064 | err: 0.20703
batch 021 / 

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.




batch 027 / 042 | loss: 0.46949 | err: 0.19589
batch 028 / 042 | loss: 0.46523 | err: 0.19448
batch 029 / 042 | loss: 0.46776 | err: 0.19343
batch 030 / 042 | loss: 0.46293 | err: 0.19167
batch 031 / 042 | loss: 0.46136 | err: 0.19128
batch 032 / 042 | loss: 0.46288 | err: 0.19238
batch 033 / 042 | loss: 0.45872 | err: 0.19081
batch 034 / 042 | loss: 0.45732 | err: 0.18980
batch 035 / 042 | loss: 0.45512 | err: 0.18929
batch 036 / 042 | loss: 0.45332 | err: 0.18945
batch 037 / 042 | loss: 0.45138 | err: 0.18834
batch 038 / 042 | loss: 0.45025 | err: 0.18709
batch 039 / 042 | loss: 0.44795 | err: 0.18590
batch 040 / 042 | loss: 0.44713 | err: 0.18633
batch 041 / 042 | loss: 0.44904 | err: 0.18655
batch 042 / 042 | loss: 0.44494 | err: 0.18453
----- epoch 001 / 005 | time: 000 sec | loss: 0.47668 | err: 0.18259
batch 001 / 042 | loss: 0.46118 | err: 0.17188
batch 002 / 042 | loss: 0.44573 | err: 0.17969
batch 003 / 042 | loss: 0.44804 | err: 0.19010
batch 004 / 042 | loss: 0.43151 | err

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

batch 001 / 042 | loss: 1.06769 | err: 0.50781
batch 002 / 042 | loss: 0.93576 | err: 0.47266
batch 003 / 042 | loss: 0.89602 | err: 0.44792


  "Solution may be inaccurate. Try another solver, "


batch 004 / 042 | loss: 0.83870 | err: 0.42188
batch 005 / 042 | loss: 0.84645 | err: 0.42812
batch 006 / 042 | loss: 0.83164 | err: 0.42969
batch 007 / 042 | loss: 0.82121 | err: 0.42299
batch 008 / 042 | loss: 0.81060 | err: 0.39648
batch 009 / 042 | loss: 0.80301 | err: 0.38542
batch 010 / 042 | loss: 0.79418 | err: 0.38203
batch 011 / 042 | loss: 0.78576 | err: 0.38210
batch 012 / 042 | loss: 0.77927 | err: 0.38411
batch 013 / 042 | loss: 0.77702 | err: 0.38702
batch 014 / 042 | loss: 0.76552 | err: 0.38170
batch 015 / 042 | loss: 0.75475 | err: 0.37292
batch 016 / 042 | loss: 0.74487 | err: 0.36328
batch 017 / 042 | loss: 0.74032 | err: 0.35754
batch 018 / 042 | loss: 0.73210 | err: 0.35286
batch 019 / 042 | loss: 0.72622 | err: 0.35156
batch 020 / 042 | loss: 0.72029 | err: 0.34961
batch 021 / 042 | loss: 0.71192 | err: 0.34524
batch 022 / 042 | loss: 0.70397 | err: 0.34091
batch 023 / 042 | loss: 0.69593 | err: 0.33662
batch 024 / 042 | loss: 0.68887 | err: 0.33301
batch 025 / 0

TypeError: unsupported operand type(s) for -: 'float' and 'NoneType'

# Test results