# Import Libraries

In [17]:
import tensorly as tl
from tensorly.decomposition import parafac
from tensorly.decomposition import tucker
from tensorly.decomposition import tensor_train
from tensorly.decomposition import tensor_train
from tensorly import tt_to_tensor
from tensorly.decomposition import matrix_product_state
from __future__ import print_function, division
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms, utils
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import time
import os
import copy

print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)
print("GPU is available?", torch.cuda.is_available())

PyTorch Version: 1.13.1
Torchvision Version: 0.14.1
GPU is available? True


# Process Data

In [18]:
dtype = torch.float
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Convert to tensor and scale to [0, 1]
ts = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0,), (1,))])
mnist_trainset = datasets.MNIST('../data', train=True, download=True, transform=ts)
mnist_testset = datasets.MNIST(root='../data', train=False, download=True, transform=ts)

In [19]:
from torch.utils.data import Subset
train_subset_size = int(0.1 * len(mnist_trainset))
test_subset_size = int(0.0999 * len(mnist_testset))
train_indices = list(range(len(mnist_trainset)))
test_indices = list(range(len(mnist_testset)))

# set the seed 
seed_value = 10
np.random.seed(seed_value)

np.random.shuffle(train_indices)
np.random.shuffle(test_indices)

train_subset_indices = train_indices[:train_subset_size]
test_subset_indices = test_indices[:test_subset_size]

mnist_trainset = Subset(mnist_trainset, train_subset_indices)
mnist_testset = Subset(mnist_testset, test_subset_indices)

In [21]:
x_d0 = mnist_trainset[0][0].size()[0]
x_d1 = mnist_trainset[0][0].size()[1]
x_d2 = mnist_trainset[0][0].size()[2]
# ([1, 28, 28])
N = x_d3 = len(mnist_trainset)
K = 10
x_train = torch.empty((N,x_d0*x_d1*x_d2), device=device)
# (60000, 28*28)

y_train = torch.empty(N, dtype=torch.long)


for i in range(N):
     x_train[i,:] = torch.reshape(mnist_trainset[i][0], (1, x_d0*x_d1*x_d2))
     y_train[i] = mnist_trainset[i][1]
x_train = torch.t(x_train)
y_one_hot = torch.zeros(N, K).scatter_(1, torch.reshape(y_train, (N, 1)), 1)
y_one_hot = torch.t(y_one_hot).to(device=device)
y_train = y_train.to(device=device)

# Manipulate test set
N_test = x_d3_test = len(mnist_testset)
x_test = torch.empty((N_test,x_d0*x_d1*x_d2), device=device)
y_test = torch.empty(N_test, dtype=torch.long)
for i in range(N_test):
     x_test[i,:] = torch.reshape(mnist_testset[i][0], (1, x_d0*x_d1*x_d2))
     y_test[i] = mnist_testset[i][1]
x_test = torch.t(x_test)
y_test_one_hot = torch.zeros(N_test, K).scatter_(1, torch.reshape(y_test, (N_test, 1)), 1)
y_test_one_hot = torch.t(y_test_one_hot).to(device=device)
y_test = y_test.to(device=device)

# Reshape and Reorder Input

In [22]:
#### Reshape X to X-bar
x_trainTensor = torch.reshape(x_train, (x_d1, x_d2, x_d0,-1))
x_testTensor  = torch.reshape(x_test, (x_d1, x_d2, x_d0,-1))
print(x_trainTensor.shape)
print(x_testTensor.shape)

torch.Size([28, 28, 1, 6000])
torch.Size([28, 28, 1, 999])


In [23]:
def inputX_CNN(x_Tensor, filter_size, stride):
    H, W, C, n = x_Tensor.size()
    Hprime = torch.floor(torch.tensor((H-filter_size)/stride))+1
    Hprime = Hprime.to(torch.int)
    Wprime = torch.floor(torch.tensor((W-filter_size)/stride))+1
    Wprime = Wprime.to(torch.int)
    Xtranform = torch.zeros((Hprime * Wprime, filter_size * filter_size * C, n), device=device)
    for i in range(n):
      Data =  x_Tensor[:,:,:,i]
      for hh in range(Hprime):
        for ww in range(Wprime):
          #  print(range(ww * stride, ww * stride + filter_size))
            DataTemp = Data[range(hh * stride, hh * stride + filter_size), :,:]
            DataTemp = DataTemp[:, range(ww * stride, ww * stride + filter_size), :]
            Xtranform[Hprime * hh + ww, :, i] = torch.reshape(DataTemp, (1, filter_size * filter_size * C))
    return Xtranform

In [24]:
filter_size=3
stride = 2
x_trainTS = inputX_CNN(x_trainTensor,filter_size,stride)
x_testTS  = inputX_CNN(x_testTensor,filter_size,stride)

# Main Algorithm

In [25]:
def updateWb_CNN(U, V, W, W_tensor_rec, alpha, rho,tau): 
    W_tensor_rec = torch.as_tensor(W_tensor_rec,device=device).float()
    W_tensor2matrix = W_tensor_rec.reshape(W.shape) 
    d,N = V.size()
    I = torch.eye(N, device=device)
    U_prime = torch.t(U).reshape(n*Hprime*Wprime,-1)
    Wstar = torch.mm(torch.inverse(rho*(torch.mm(torch.t(V),V))+(alpha+tau)*I), rho*torch.mm(torch.t(V),U_prime)+alpha*W+tau*W_tensor2matrix)
    return Wstar

In [26]:
def updateWb_CNNorg(U, V, W, alpha, rho):
    d,N = V.size()
    I = torch.eye(N, device=device)
    #_, col_U = U.size()
    U_prime = torch.t(U).reshape(n*Hprime*Wprime,-1)
    Wstar = torch.mm(torch.inverse(rho*(torch.mm(torch.t(V),V))+alpha*I), rho*torch.mm(torch.t(V),U_prime)+alpha*W)
    
    return Wstar

In [27]:
def updateV(U1,U2,W,b,rho,gamma):
    _, d = W.size()
    I = torch.eye(d, device=device) #Returns a 2-D tensor with ones on the diagonal and zeros elsewhere.
    U1 = nn.ReLU()(U1)
    _, col_U2 = U2.size()
    Vstar = torch.mm(torch.inverse(rho*(torch.mm(torch.t(W),W))+gamma*I), rho*torch.mm(torch.t(W),U2-b.repeat(1,col_U2))+gamma*U1)
    # check how to get V* W*
    # torch.mm*=(input,mat2) performs a matrix multiplication of the matrices input and mat2.
    #   if input is a (n*m) tensor,mat2 is a (m*p) tensor, output will be (n*p) tensor
    return Vstar

In [28]:
def updateWb_org(U, V, W, b, alpha, rho):
    d,N = V.size()
    I = torch.eye(d, device=device)
    _, col_U = U.size()
    Wstar = torch.mm(alpha*W+rho*torch.mm(U-b.repeat(1,col_U),torch.t(V)),torch.inverse(alpha*I+rho*(torch.mm(V,torch.t(V)))))
    bstar = 0*(alpha*b+rho*torch.sum(U-torch.mm(Wstar,V), dim=1).reshape(b.size()))/(rho*N+alpha)
    return Wstar, bstar

In [29]:
def updateWb(U, V, W, b, W_tensor_rec, alpha, rho,tau):
    W_tensor_rec = torch.as_tensor(W_tensor_rec,device=device).float()
    W_tensor2matrix = W_tensor_rec.reshape(W.shape)
    d,N = V.size()
    I = torch.eye(d, device=device)
    _, col_U = U.size()
    Wstar = torch.mm(alpha*W+tau*W_tensor2matrix+rho*torch.mm(U-b.repeat(1,col_U),torch.t(V)),torch.inverse((alpha+tau)*I+rho*(torch.mm(V,torch.t(V)))))
    bstar = 0*(alpha*b+rho*torch.sum(U-torch.mm(Wstar,V), dim=1).reshape(b.size()))/(rho*N+alpha)
    return Wstar, bstar

In [30]:
# U update  E(1) in global convergent
def relu_prox(a, b, gamma, d, N):
    val = torch.empty(d,N, device=device)
    x = (a+gamma*b)/(1+gamma)
    y = torch.min(b,torch.zeros(d,N, device=device))

    val = torch.where(a+gamma*b < 0, y, torch.zeros(d,N, device=device))
    val = torch.where(((a+gamma*b >= 0) & (b >=0)) | ((a*(gamma-np.sqrt(gamma*(gamma+1))) <= gamma*b) & (b < 0)), x, val)
    val = torch.where((-a <= gamma*b) & (gamma*b <= a*(gamma-np.sqrt(gamma*(gamma+1)))), b, val)
    return val

In [None]:

niter = 2000
rank = 220
tau = 0.2
gamma = 0.5
alpha = 1
rho = 0.5

print ("rank=",rank, "tau=",tau, "gamma=",gamma, "rho=",rho, "alpha",alpha)

loss1 = np.empty(niter)
loss2 = np.empty(niter)
accuracy_train = np.empty(niter)
accuracy_test = np.empty(niter)
time1 = np.empty(niter)

results = torch.zeros(1, 5, niter)

S = 32 ### number of filters 2^5
H, W, C, n = x_trainTensor.size()   # n is the same thing as N
Hprime = torch.floor(torch.tensor((H-filter_size)/stride))+1
Hprime = Hprime.to(torch.int)
Wprime = torch.floor(torch.tensor((W-filter_size)/stride))+1
Wprime = Wprime.to(torch.int)



for Out_iter in range(1):
    rank_initial = 700
    seed = 10 + 10*Out_iter
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.manual_seed(seed)

    ## Chenyang
    d1 =  Hprime * Wprime * S
    d2 =  1024
    d3 =  1024
    d4 =  10


    W1 = 0.01*torch.randn(filter_size * filter_size * C, S, device=device)  ## How people usually initialize the CNN kernel?
    #W1_torch_tensor = W1.reshape((filter_size, filter_size, C, 2,2,2,2,2)) ## TBD
    #W1_tl_tensor = tl.tensor(W1_torch_tensor.cpu().numpy())
    #factors1 = tensor_train(W1_tl_tensor, (1, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, 1))
    #W1_tl_tensor_rec = tt_to_tensor(factors1)
    b1 = 0*torch.ones(d1, 1, device=device) # 0 is stable


    W2 = init.kaiming_normal_(torch.empty(d2, d1, device=device),a=0, mode='fan_in', nonlinearity='leaky_relu')
    W2_torch_tensor = W2.reshape((2*Hprime,2*Wprime,4,4,4,4,4,2,2,2)) # 2^10 and Hprime * Wprime  *2^5*1^3
    W2_tl_tensor = tl.tensor(W2_torch_tensor.cpu().numpy())
    factors2 = tensor_train(W2_tl_tensor, (1, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial,1))
    # we have 9 rank_initial (r1,...,r9) here plus 2 of 1s (r0 & r10).
    # factors: set of tensor cores
    # http://tensorly.org/stable/user_guide/tensor_decomposition.html
    # http://tensorly.org/stable/modules/generated/tensorly.decomposition.tensor_train.html#tensorly.decomposition.tensor_train
    W2_tl_tensor_rec = tt_to_tensor(factors2)
    # tt_to_tensor(factors):Re-assembles ‘factors’, which represent a tensor in TT format into the corresponding full tensor
    #      facros: list of 3d-arrays tt-cores           output_tensor: ndarray
    b2 = 0*torch.ones(d2, 1, device=device)


    W3 = init.kaiming_normal_(torch.empty(d3, d2, device=device),a=0, mode='fan_in', nonlinearity='leaky_relu')
    W3_torch_tensor = W3.reshape((4,4,4,4,4,4,4,4,4,4)) # 8 number of 4s, 2 number of 8s
    W3_tl_tensor = tl.tensor(W3_torch_tensor.cpu().numpy())
    factors3 = tensor_train(W3_tl_tensor, (1, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial, rank_initial,1))
    W3_tl_tensor_rec = tt_to_tensor(factors3)
    b3 = 0*torch.ones(d3, 1, device=device)

    W4 = init.kaiming_normal_(torch.empty(d4, d3, device=device),a=0, mode='fan_in', nonlinearity='leaky_relu')
    b4 = 0*torch.ones(d4, 1, device=device)

    ## Chenyang: this is for the initialization of U1
    x_trainTS1 = torch.reshape(x_trainTS, (-1,n))
    x_trainTS1 = torch.t(x_trainTS1)
    x_trainTS1 = torch.reshape(x_trainTS1, (-1, filter_size * filter_size * C))   ### this is X-bar-bar'
    U1prime = torch.matmul(x_trainTS1, W1)
    U1prime = torch.reshape(U1prime, (n,-1))
    U1 = torch.t(U1prime)

    V1 = nn.ReLU()(U1)
    U2 = torch.addmm(b2.repeat(1, N), W2, V1)
    V2 = nn.ReLU()(U2)
    U3 = torch.addmm(b3.repeat(1, N), W3, V2)
    V3 = nn.ReLU()(U3)
    U4 = torch.addmm(b4.repeat(1, N), W4, V3)
    V4 = U4

    ### to create the X-bar-bar' for test data
    x_testTS1 = torch.reshape(x_testTS, (-1, N_test))
    x_testTS1 = torch.t(x_testTS1)
    x_testTS1 = torch.reshape(x_testTS1, (-1, filter_size * filter_size * C))   ### this is X-bar-bar' test


    # Iterations
    print('Train on', N, 'samples, validate on', N_test, 'samples')
    for k in range(niter):
        start = time.time()

  # update for last layer
        # update V4
        V4 = (y_one_hot + gamma*U4 + alpha*V4)/(1 + gamma + alpha)

        # update U4
        U4 = (gamma*V4 + rho*(torch.mm(W4,V3) + b4.repeat(1,N)))/(gamma + rho)

        # update W4 and b4
        W4, b4 = updateWb_org(U4,V3,W4,b4,alpha,rho)
        # W3_torch_tensor = W3.reshape((d3,4,4,4,4,4,4))
        # W3_tl_tensor = tl.tensor(W3_torch_tensor.cpu().numpy())
        # factors3 = tensor_train(W3_tl_tensor, (1,rank,rank,rank,rank,rank,rank,1))
        # W3_tl_tensor_rec = tt_to_tensor(factors3)


  # update for 3nd layer
        # update V3
        V3 = updateV(U3,U4,W4,b4,rho,gamma)

        # update U3
        U3 = relu_prox(V3,(rho*torch.addmm(b3.repeat(1,N), W3, V2) + alpha*U3)/(rho + alpha),(rho + alpha)/gamma,d3,N)

        # update W3 and b3
        W3, b3 = updateWb(U3,V2,W3,b3,W3_tl_tensor_rec, alpha,rho,tau)

        # G update (TTD)
        W3_torch_tensor = W3.reshape((4,4,4,4,4,4,4,4,4,4))
        W3_tl_tensor = tl.tensor(W3_torch_tensor.cpu().numpy())  # transfer tensorly package
        factors3 = tensor_train(W3_tl_tensor, (1,rank,rank,rank,rank,rank,rank,rank,rank,rank,1))
        #set of tensor cores
        W3_tl_tensor_rec = tt_to_tensor(factors3)


  # update for 2nd layer
        # update V2
        V2 = updateV(U2,U3,W3,b3,rho,gamma)

        # update U2
        U2 = relu_prox(V2,(rho*torch.addmm(b2.repeat(1,N), W2, V1) + alpha*U2)/(rho + alpha),(rho + alpha)/gamma,d2,N)

        # update W2 and b2
        W2, b2 = updateWb(U2,V1,W2,b2,W2_tl_tensor_rec, alpha,rho,tau)

        # G update (TTD)
        W2_torch_tensor = W2.reshape((2*Hprime,2*Wprime,4,4,4,4,4,2,2,2))
        W2_tl_tensor = tl.tensor(W2_torch_tensor.cpu().numpy())
        factors2 = tensor_train(W2_tl_tensor, (1,rank,rank,rank,rank,rank,rank,rank,rank,rank,1))
        W2_tl_tensor_rec = tt_to_tensor(factors2)


 # update for 1st layer
        # update V1
        V1 = updateV(U1,U2,W2,b2,rho,gamma)

        ## Chenyang
        XprimeW = torch.reshape(torch.matmul(x_trainTS1, W1), (n,-1))
        XprimeWtranspose = torch.t(XprimeW)
        # update U1
        U1 = relu_prox(V1,(rho*XprimeWtranspose + alpha*U1)/(rho + alpha),(rho + alpha)/gamma,d1,N)

        # update W1 and b1
        W1 = updateWb_CNNorg(U1,x_trainTS1,W1,alpha,rho)




  # prediction for trainning data
        ### we should use reconstructed tensnor W1 for prediction
        XprimeW = torch.reshape(torch.matmul(x_trainTS1, torch.as_tensor(W1,device=device).reshape((filter_size * filter_size * C, S)).float()), (n,-1))
        XprimeWtranspose = torch.t(XprimeW)
        a1_train = nn.ReLU()(XprimeWtranspose)
        #a1_train = nn.ReLU()(torch.addmm(b1.repeat(1, N), W1, x_train))
        a2_train = nn.ReLU()(torch.addmm(b2.repeat(1, N), torch.as_tensor(W2_tl_tensor_rec,device=device).reshape((d2, d1)).float(), a1_train))
        a3_train = nn.ReLU()(torch.addmm(b3.repeat(1, N), torch.as_tensor(W3_tl_tensor_rec,device=device).reshape((d3, d2)), a2_train))
        #print(torch.addmm(b4.repeat(1, N), W4, a3_train))
        pred = torch.argmax(torch.addmm(b4.repeat(1, N), W4, a3_train), dim=0)
        # check argmax and addmm, dim=0

  #Prediction for test data
        XprimeWtest = torch.reshape(torch.matmul(x_testTS1, torch.as_tensor(W1,device=device).reshape((filter_size * filter_size * C, S)).float()), (N_test,-1))
        XprimeWtesttranspose = torch.t(XprimeWtest)
        a1_test = nn.ReLU()(XprimeWtesttranspose) ## Chenyang
        #a1_test = nn.ReLU()(torch.addmm(b1.repeat(1, N_test), W1, x_test))
        a2_test = nn.ReLU()(torch.addmm(b2.repeat(1, N_test), torch.as_tensor(W2_tl_tensor_rec,device=device).reshape((d2, d1)).float(), a1_test))
        a3_test = nn.ReLU()(torch.addmm(b3.repeat(1, N_test), torch.as_tensor(W3_tl_tensor_rec,device=device).reshape((d3, d2)), a2_test))
        pred_test = torch.argmax(torch.addmm(b4.repeat(1, N_test), W4, a3_test), dim=0)


    #emperical loss
        loss1[k] = gamma/2*torch.pow(torch.dist(V4,y_one_hot,2),2).cpu().numpy()

        # Eq (5) in paper
        ## Chenyang
        loss2[k] = loss1[k] + rho/2*torch.pow(torch.dist(XprimeWtranspose,U1,2),2).cpu().numpy() \
        +rho/2*torch.pow(torch.dist(torch.addmm(b2.repeat(1,N), W2, V1),U2,2),2).cpu().numpy() \
        +rho/2*torch.pow(torch.dist(torch.addmm(b3.repeat(1,N), W3, V2),U3,2),2).cpu().numpy() \
        +rho/2*torch.pow(torch.dist(torch.addmm(b4.repeat(1,N), W4, V3),U4,2),2).cpu().numpy() \
        + gamma/2*torch.pow(torch.dist(V1,nn.ReLU()(U1),2),2).cpu().numpy() \
        + gamma/2*torch.pow(torch.dist(V2,nn.ReLU()(U2),2),2).cpu().numpy() \
        + gamma/2*torch.pow(torch.dist(V3,nn.ReLU()(U3),2),2).cpu().numpy() \
        + gamma/2*torch.pow(torch.dist(V4,U4,2),2).cpu().numpy() \
        +tau/2*torch.pow(torch.dist(W2.reshape((2*Hprime,2*Wprime,4,4,4,4,4,2,2,2)),torch.as_tensor(W2_tl_tensor_rec,device=device).float(),2),2).cpu().numpy() \
        +tau/2*torch.pow(torch.dist(W3.reshape((4,4,4,4,4,4,4,4,4,4)),torch.as_tensor(W3_tl_tensor_rec,device=device).float(),2),2).cpu().numpy() \
        # +tau/2*torch.pow(torch.dist(W3.reshape((d3,4,4,4,4,4,4)),torch.as_tensor(W3_tl_tensor_rec,device=device).float(),2),2).cpu().numpy()
        # +tau/2*torch.pow(torch.dist(W4.reshape((50,30,K)),torch.as_tensor(W4_tl_tensor_rec,device=device),2),2).cpu().numpy()

        # compute training accuracy
        correct_train = pred == y_train
        accuracy_train[k] = np.mean(correct_train.cpu().numpy())

        # compute validation accuracy
        correct_test = pred_test == y_test
        accuracy_test[k] = np.mean(correct_test.cpu().numpy())

        # compute training time
        stop = time.time()
        duration = stop - start
        time1[k] = duration

        # print results
        print('Repeatition', Out_iter + 1, 'Epoch', k + 1, '/', niter, '\n',
              '-', 'time:', time1[k], '-', 'sq_loss:', loss1[k], '-', 'tot_loss:', loss2[k],
              '-', 'acc:', accuracy_train[k], '-', 'val_acc:', accuracy_test[k])

##############
############## compute CR
    # factors1_shape=[f.shape for f in factors1]
    # Sum_of_variables_factors1=sum(list(x*y*z for x,y,z in factors1_shape))
    factors2_shape=[f.shape for f in factors2]
    Sum_of_variables_factors2=sum(list(x*y*z for x,y,z in factors2_shape))
    factors3_shape=[f.shape for f in factors3]
    Sum_of_variables_factors3=sum(list(x*y*z for x,y,z in factors3_shape))
    total_variabels=Sum_of_variables_factors2+Sum_of_variables_factors3

    layer2_CR = Sum_of_variables_factors2/(d1*d2).item()
    layer3_CR = Sum_of_variables_factors3/(d2*d3)
    Compressedlayers_CR = total_variabels/(d1*d2+d2*d3).item()
    Compressedlayers_CR2 = (total_variabels+d3*d4)/(d1*d2+d2*d3+d3*d4).item()

    # print("CR_1 = "+str(CR_1)+"   "+"CR_2 = " +str(CR_2))


    results[Out_iter,0,:] = torch.tensor(loss1)
    results[Out_iter,1,:] = torch.tensor(loss2)
    results[Out_iter,2,:] = torch.tensor(accuracy_train)
    results[Out_iter,3,:] = torch.tensor(accuracy_test)
    results[Out_iter,4,:] = torch.tensor(time1)
    CR=(layer2_CR,layer3_CR,Compressedlayers_CR,Compressedlayers_CR2)


rank= 220 tau= 0.2 gamma= 0.5 rho= 0.5 alpha 1
Train on 6000 samples, validate on 999 samples
Repeatition 1 Epoch 1 / 2000 
 - time: 5.950917482376099 - sq_loss: 542.9500732421875 - tot_loss: 863.1941393403337 - acc: 0.29 - val_acc: 0.2822822822822823
Repeatition 1 Epoch 2 / 2000 
 - time: 5.642564058303833 - sq_loss: 241.3111572265625 - tot_loss: 493.68705853717404 - acc: 0.421 - val_acc: 0.42042042042042044
Repeatition 1 Epoch 3 / 2000 
 - time: 5.632015228271484 - sq_loss: 120.87895202636719 - tot_loss: 296.38874887079 - acc: 0.5043333333333333 - val_acc: 0.5055055055055055
Repeatition 1 Epoch 4 / 2000 
 - time: 5.6204938888549805 - sq_loss: 63.263763427734375 - tot_loss: 182.79723730385302 - acc: 0.5723333333333334 - val_acc: 0.5785785785785785
Repeatition 1 Epoch 5 / 2000 
 - time: 5.608172655105591 - sq_loss: 33.651947021484375 - tot_loss: 116.10604887530206 - acc: 0.6215 - val_acc: 0.6256256256256256
Repeatition 1 Epoch 6 / 2000 
 - time: 5.63688588142395 - sq_loss: 18.027166366

Repeatition 1 Epoch 49 / 2000 
 - time: 5.580448627471924 - sq_loss: 0.0001926186087075621 - tot_loss: 0.6112024812202436 - acc: 0.905 - val_acc: 0.9039039039039038
Repeatition 1 Epoch 50 / 2000 
 - time: 5.595702171325684 - sq_loss: 0.00018325838027521968 - tot_loss: 0.5944153199438006 - acc: 0.9065 - val_acc: 0.9039039039039038
Repeatition 1 Epoch 51 / 2000 
 - time: 5.606915235519409 - sq_loss: 0.000174726388650015 - tot_loss: 0.5784801504167263 - acc: 0.9066666666666666 - val_acc: 0.9039039039039038
Repeatition 1 Epoch 52 / 2000 
 - time: 5.935134410858154 - sq_loss: 0.0001669601770117879 - tot_loss: 0.5633365927496925 - acc: 0.9071666666666667 - val_acc: 0.9049049049049049
Repeatition 1 Epoch 53 / 2000 
 - time: 5.648139715194702 - sq_loss: 0.00015984858328010887 - tot_loss: 0.5489295928826323 - acc: 0.9076666666666666 - val_acc: 0.9069069069069069
Repeatition 1 Epoch 54 / 2000 
 - time: 5.59767484664917 - sq_loss: 0.0001533022295916453 - tot_loss: 0.535210158539121 - acc: 0.90783