## Import libraries

In [1]:
# !pip install tensorly

import tensorly as tl
from tensorly.decomposition import parafac
from tensorly.decomposition import tucker
from tensorly.decomposition import tensor_train
from math import ceil
from tensorly import tt_to_tensor
from tensorly.decomposition import matrix_product_state
import torch.nn.init as init

In [2]:
"""
5 runs of 50 epochs, seed = 10, 20, 30, 40, 50;
validation accuracies: 0.9492, 0.9457, 0.9463, 0.9439, 0.9455
"""
#from __future__ import print_function, division
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms, utils
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import time
import os
import copy
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import balanced_accuracy_score

print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)
print("GPU is available?", torch.cuda.is_available())

dtype = torch.float64
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

PyTorch Version: 1.13.1
Torchvision Version: 0.14.1
GPU is available? True


## train data processing

In [3]:
df_train_1 = pd.read_csv('/home/c/cl237/Datasets/Flare_LSTM_dataset/M/normalized_training.csv')
df_train_2 = pd.read_csv('/home/c/cl237/Datasets/Flare_LSTM_dataset/M/normalized_validation.csv')
# concat train and validation into train. we don't need validation set
df_train = df_train_1.append(df_train_2).reset_index(drop=True)

# undersampling for "negative" samples
negative_df = df_train[df_train['label'] == 'Negative']
positive_df = df_train[df_train['label'] == 'Positive']

# Get number of "positive" samples
num_positive = len(positive_df)

# take the same number of "negative" samples as there are "positive" samples
balanced_negative_df = negative_df.sample(n=num_positive, random_state=10086)

df_train_balanced = pd.concat([positive_df, balanced_negative_df])
print(df_train_balanced['label'].value_counts())

Positive    4057
Negative    4057
Name: label, dtype: int64


In [4]:
pd_X_train = df_train_balanced.iloc[:, 5:]
pd_y_train = df_train_balanced.iloc[:, 0]

N = len(pd_X_train)
K = 2

pd_X_train = pd_X_train.values
X_train = torch.tensor(pd_X_train, dtype=dtype, device=device)
X_train = torch.t(X_train)

# scaler = MinMaxScaler()
# x = pd_X_train.values
# x_scaled = scaler.fit_transform(x)
# X_train = torch.tensor(x_scaled, dtype=dtype, device=device)
# X_train = torch.t(X_train)

# Initialize the LabelEncoder
encoder = LabelEncoder()
# Fit and transform the y values
y = encoder.fit_transform(pd_y_train.values)
y_train=torch.tensor(y, dtype=torch.long, device=device)
y_train = torch.flatten(y_train)

y_one_hot = torch.zeros(N, K, device=device).scatter_(1, y_train.unsqueeze(1), 1)
y_one_hot = torch.t(y_one_hot).to(device=device)

print(list(encoder.classes_))

['Negative', 'Positive']


## test data processing

In [5]:
df_test = pd.read_csv('/home/c/cl237/Datasets/Flare_LSTM_dataset/M/normalized_testing.csv')

# Undersampling for "negative" samples
#negative_df = df_test[df_test['label'] == 'Negative']
#positive_df = df_test[df_test['label'] == 'Positive']

# Get number of "positive" samples
#num_positive = len(positive_df)

# Take the same number of "negative" samples as there are "positive" samples
#balanced_negative_df = negative_df.sample(n=num_positive, random_state=10086)

#df_test_balanced = pd.concat([positive_df, balanced_negative_df])
#print(df_test_balanced['label'].value_counts())

In [6]:
pd_X_test = df_test.iloc[:, 5:]
pd_y_test = df_test.iloc[:, 0]

N_test = len(pd_X_test)
K = 2

pd_X_test = pd_X_test.values
X_test = torch.tensor(pd_X_test, dtype=dtype, device=device)
X_test = torch.t(X_test)
# x = pd_X_test.values
# x_scaled = scaler.transform(x)  # only transform x, don't fit the scaler again
# X_test = torch.tensor(x_scaled, dtype=dtype, device=device)
# X_test = torch.t(X_test)


# Initialize the LabelEncoder
encoder = LabelEncoder()
# Fit and transform the y values
y = encoder.fit_transform(pd_y_test.values)
y_test=torch.tensor(y, dtype=torch.long, device=device)
y_test = torch.flatten(y_test)

y_test_one_hot = torch.zeros(N_test, K, device=device).scatter_(1, y_test.unsqueeze(1), 1)
y_test_one_hot = torch.t(y_test_one_hot).to(device=device)

In [7]:
print(df_test['label'].value_counts())

Negative    43411
Positive     1278
Name: label, dtype: int64


## Main algorithm

### Define functions for updating blocks

In [8]:
# def updateMask(W, sparsity):
#     torch.dist(V1,nn.ReLU()(U1),2),2).cpu().numpy()
#     Mask
#     Wsparse
#     return Mask, Wsparse

# Wsquare = torch.square(W3)
# Threshold = torch.quantile(torch.reshape(Wsquare, (-1,)), 0.5, interpolation='linear')
# Wsparse = W3
# Wsparse[Wsquare < Threshold] =  0

# Wsparse

In [9]:
def updateV(U1,U2,W,b,rho,gamma):
    _, d = W.size()
    I = torch.eye(d, device=device)
    U1 = nn.ReLU()(U1)
    _, col_U2 = U2.size()
    Vstar = torch.mm(torch.inverse(rho*(torch.mm(torch.t(W),W))+gamma*I), rho*torch.mm(torch.t(W),U2-b.repeat(1,col_U2))+gamma*U1)
    return Vstar

In [10]:
def updateWb_org(U, V, W, b, alpha, rho):
    d,N = V.size()
    I = torch.eye(d, device=device)
    _, col_U = U.size()
    Wstar = torch.mm(alpha*W+rho*torch.mm(U-b.repeat(1,col_U),torch.t(V)),torch.inverse(alpha*I+rho*(torch.mm(V,torch.t(V)))))
    bstar = 0*(alpha*b+rho*torch.sum(U-torch.mm(Wstar,V), dim=1).reshape(b.size()))/(rho*N+alpha)
    return Wstar, bstar

In [11]:
def updateWb(U, V, W, b, W_tensor_rec, alpha, rho,tau):
    W_tensor_rec = torch.as_tensor(W_tensor_rec,device=device).float()
    W_tensor2matrix = W_tensor_rec.reshape(W.shape)
    d,N = V.size()
    I = torch.eye(d, device=device)
    _, col_U = U.size()
    Wstar = torch.mm(alpha*W+tau*W_tensor2matrix+rho*torch.mm(U-b.repeat(1,col_U),torch.t(V)),torch.inverse((alpha+tau)*I+rho*(torch.mm(V,torch.t(V)))))
    bstar = 0*(alpha*b+rho*torch.sum(U-torch.mm(Wstar,V), dim=1).reshape(b.size()))/(rho*N+alpha)
    return Wstar, bstar

In [12]:
def updateWbsparse(U, V, W, b,  W_tensor2matrix, alpha, rho,tau):
    d,N = V.size()
    I = torch.eye(d, device=device)
    _, col_U = U.size()
    Wstar = torch.mm(alpha*W+tau*W_tensor2matrix+rho*torch.mm(U-b.repeat(1,col_U),torch.t(V)),torch.inverse((alpha+tau)*I+rho*(torch.mm(V,torch.t(V)))))
    bstar = 0*(alpha*b+rho*torch.sum(U-torch.mm(Wstar,V), dim=1).reshape(b.size()))/(rho*N+alpha)
    return Wstar, bstar

### Define the proximal operator of the ReLU activation function

In [13]:
def relu_prox(a, b, gamma, d, N):
    val = torch.empty(d,N, device=device)
    x = (a+gamma*b)/(1+gamma)
    y = torch.min(b,torch.zeros(d,N, device=device))

    val = torch.where(a+gamma*b < 0, y, torch.zeros(d,N, device=device))
    val = torch.where(((a+gamma*b >= 0) & (b >=0)) | ((a*(gamma-np.sqrt(gamma*(gamma+1))) <= gamma*b) & (b < 0)), x, val)
    val = torch.where((-a <= gamma*b) & (gamma*b <= a*(gamma-np.sqrt(gamma*(gamma+1)))), b, val)
    return val

### Effective Sparsity

In [14]:
def process_weights(W1, W2, W3):
    # Clone the tensors to keep the originals unchanged
    W1, W2, W3 = W1.clone(), W2.clone(), W3.clone()

    while True:
        # Store a copy of the current weights
        old_W1, old_W2, old_W3 = W1.clone(), W2.clone(), W3.clone()

        # Check all rows of W1
        # If all values in a row of W1 are 0, set corresponding column in W2 to 0
        zero_rows_W1 = torch.all(W1 == 0, dim=1)
        W2[:, zero_rows_W1] = 0

        # Check all columns of W2
        # If all values in a column of W2 are 0, set corresponding row in W1 to 0
        zero_cols_W2 = torch.all(W2 == 0, dim=0)
        W1[zero_cols_W2, :] = 0

        # Check all rows of W2
        # If all values in a row of W2 are 0, set corresponding column in W3 to 0
        zero_rows_W2 = torch.all(W2 == 0, dim=1)
        W3[:, zero_rows_W2] = 0

        # Check if matrices are unchanged
        if torch.equal(W1, old_W1) and torch.equal(W2, old_W2) and torch.equal(W3, old_W3):
            break

    return W1, W2, W3

### Parameter initialization

In [16]:
#df = pd.DataFrame()
#df.to_csv('/home/c/cl237/TenBCD/Sparse/LeNet300_100/Flare(BCD method)/differernt gamma rho/result.csv')

niter = 1000
sparsity = 0.9

tau = 200
alpha = 1
rho = 1
gamma = 100

        

# Do something with these parameters
print(sparsity, tau, gamma, rho, alpha)

loss1 = np.empty(niter)
loss2 = np.empty(niter)
accuracy_train = np.empty(niter)
accuracy_test = np.empty(niter)
time1 = np.empty(niter)
bacc_train = np.empty(niter)
bacc_test = np.empty(niter)
true_sparsity = np.empty(niter)
effective_sparsity = np.empty(niter)

results = torch.zeros(1, 9, niter)

for Out_iter in range(1):
    rank_initial = 700
    seed = 10 + 10*Out_iter
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.manual_seed(seed)

    d0 = 40
    d1 =  300
    d2 =  100
    d3 = K # Layers: input + 2 hidden + output

    W1 = init.uniform_(torch.empty(d1, d0, device=device, dtype=dtype), a=-0.01, b=0.01)
    b1 = 0*torch.ones(d1, 1, dtype=torch.float64,device=device)
    W1square = torch.square(W1)
    Threshold = torch.quantile(torch.reshape(W1square, (-1,)), sparsity, interpolation='linear')
    W1sparse = W1
    W1sparse[W1square < Threshold] =  0

    W2 = init.uniform_(torch.empty(d2, d1, device=device, dtype=dtype), a=-0.01, b=0.01)
    b2 = 0*torch.ones(d2, 1, dtype=torch.float64,device=device)
    W2square = torch.square(W2)
    Threshold = torch.quantile(torch.reshape(W2square, (-1,)), sparsity, interpolation='linear')
    W2sparse = W2
    W2sparse[W2square < Threshold] =  0


    W3 = init.uniform_(torch.empty(d3, d2, device=device, dtype=dtype), a=-0.01, b=0.01)
    b3 = 0*torch.ones(d3, 1, dtype=torch.float64,device=device)
    # W3square = torch.square(W3)
    # Threshold = torch.quantile(torch.reshape(W3square, (-1,)), sparsity, interpolation='linear')
    # W3sparse = W3
    # W3sparse[W3square < Threshold] =  0


    U1 = torch.addmm(b1.repeat(1, N), W1, X_train)
    V1 = nn.ReLU()(U1)
    U2 = torch.addmm(b2.repeat(1, N), W2, V1)
    V2 = nn.ReLU()(U2)
    U3 = torch.addmm(b3.repeat(1, N), W3, V2)
    V3 = U3
    # U4 = torch.addmm(b4.repeat(1, N), W4, V3)
    # V4 = U4


    print('Train on', N, 'samples, validate on', N_test, 'samples')

    for k in range(niter):

        start = time.time()

        # update V4
        V3 = (y_one_hot + gamma*U3 + alpha*V3)/(1 + gamma + alpha)

        # update U4
        U3 = (gamma*V3 + rho*(torch.mm(W3,V2) + b3.repeat(1,N)))/(gamma + rho)

        # update W4 and b4
        W3, b3 = updateWb_org(U3,V2,W3,b3, alpha,rho)
        # W3square = torch.square(W3)
        # Threshold = torch.quantile(torch.reshape(W3square, (-1,)), sparsity, interpolation='linear')
        # W3sparse = W3
        # W3sparse[W3square < Threshold] =  0
        # update V2
        V2 = updateV(U2,U3,W3,b3,rho,gamma)

        # update U2
        U2 = relu_prox(V2,(rho*torch.addmm(b2.repeat(1,N), W2, V1) + alpha*U2)/(rho + alpha),(rho + alpha)/gamma,d2,N)

        # update W2 and b2

        W2, b2 = updateWbsparse(U2,V1,W2,b2,W2sparse, alpha,rho,tau)
        W2square = torch.square(W2)
        Threshold = torch.quantile(torch.reshape(W2square, (-1,)), sparsity, interpolation='linear')
        W2sparse = W2
        W2sparse[W2square < Threshold] =  0

        # update V1
        V1 = updateV(U1,U2,W2,b2,rho,gamma)

        # update U1
        U1 = relu_prox(V1,(rho*torch.addmm(b1.repeat(1,N), W1, X_train) + alpha*U1)/(rho + alpha),(rho + alpha)/gamma,d1,N)

        # update W1 and b1
        W1, b1 = updateWbsparse(U1,X_train,W1,b1,W1sparse, alpha,rho,tau)
        W1square = torch.square(W1)
        Threshold = torch.quantile(torch.reshape(W1square, (-1,)), sparsity, interpolation='linear')
        W1sparse = W1
        W1sparse[W1square < Threshold] =  0

        a1_train = nn.ReLU()(torch.addmm(b1.repeat(1, N), W1sparse, X_train)).double()
        a2_train = nn.ReLU()(torch.addmm(b2.repeat(1, N),  W2sparse, a1_train)).double()
        pred = torch.argmax(torch.addmm(b3.repeat(1, N), W3, a2_train), dim=0)

        a1_test = nn.ReLU()(torch.addmm(b1.repeat(1, N_test), W1sparse, X_test)).double()
        a2_test = nn.ReLU()(torch.addmm(b2.repeat(1, N_test),  W2sparse, a1_test)).double()
        pred_test = torch.argmax(torch.addmm(b3.repeat(1, N_test), W3, a2_test), dim=0)


        loss1[k] = gamma/2*torch.pow(torch.dist(V3,y_one_hot,2),2).cpu().numpy()
        loss2[k] = loss1[k] + rho/2*torch.pow(torch.dist(torch.addmm(b1.repeat(1,N), W1sparse, X_train),U1,2),2).cpu().numpy() \
        +rho/2*torch.pow(torch.dist(torch.addmm(b2.repeat(1,N),  W2sparse, V1),U2,2),2).cpu().numpy() \
        +rho/2*torch.pow(torch.dist(torch.addmm(b3.repeat(1,N), W3, V2),U3,2),2).cpu().numpy() \
        + gamma/2*torch.pow(torch.dist(V1,nn.ReLU()(U1),2),2).cpu().numpy() \
        + gamma/2*torch.pow(torch.dist(V2,nn.ReLU()(U2),2),2).cpu().numpy() \
        + gamma/2*torch.pow(torch.dist(V3,U3,2),2).cpu().numpy() \
        + tau/2*torch.pow(torch.dist(W1,W1sparse,2),2).cpu().numpy()\
        + tau/2*torch.pow(torch.dist(W2,W2sparse,2),2).cpu().numpy()\
        # + tau/2*torch.pow(torch.dist(W3,W3sparse,2),2).cpu().numpy()

        # compute training accuracy
        correct_train = pred == y_train
        accuracy_train[k] = np.mean(correct_train.cpu().numpy())

        # compute validation accuracy
        correct_test = pred_test == y_test
        accuracy_test[k] = np.mean(correct_test.cpu().numpy())


        pred_train_np = pred.cpu().numpy()
        y_train_np = y_train.cpu().numpy()
        pred_test_np = pred_test.cpu().numpy()
        y_test_np = y_test.cpu().numpy()

        bacc_train[k] = balanced_accuracy_score(y_train_np, pred_train_np)
        bacc_test[k] = balanced_accuracy_score(y_test_np, pred_test_np)

        # compute training time
        stop = time.time()
        duration = stop - start
        time1[k] = duration

        # sparsity
        num_zeros_W1 = torch.sum(W1 == 0).item()
        num_zeros_W2 = torch.sum(W2 == 0).item()
        num_zeros_W3 = torch.sum(W3 == 0).item()
        total_zeros_old = num_zeros_W1 + num_zeros_W2 + num_zeros_W3
        total_weights = d0*d1+d1*d2+d2*d3
        true_sparsity[k] = total_zeros_old / total_weights

        new_W1, new_W2, new_W3 = process_weights(W1, W2, W3)
        num_zeros_W1_new = torch.sum(new_W1 == 0).item()
        num_zeros_W2_new = torch.sum(new_W2 == 0).item()
        num_zeros_W3_new = torch.sum(new_W3 == 0).item()
        total_zeros = num_zeros_W1_new + num_zeros_W2_new + num_zeros_W3_new
        total_weights = d0*d1+d1*d2+d2*d3
        effective_sparsity[k] = total_zeros / total_weights


        # print results
        print('Epoch', k + 1, '/', niter, '\n',
              '-', 'time:', time1[k], '-', 'sq_loss:', loss1[k], '-', 'tot_loss:', loss2[k],
              '-', 'acc:', accuracy_train[k], '-', 'val_acc:', accuracy_test[k],
              '-', 'bacc_train:', bacc_train[k], '-', 'bacc_test:', bacc_test[k],'-', 'true_sparsity:', true_sparsity[k],
             '-', 'effective_sparsity:', effective_sparsity[k])


    results[Out_iter,0,:] = torch.tensor(loss1)
    results[Out_iter,1,:] = torch.tensor(loss2)
    results[Out_iter,2,:] = torch.tensor(accuracy_train)
    results[Out_iter,3,:] = torch.tensor(accuracy_test)
    results[Out_iter,4,:] = torch.tensor(time1)
    results[Out_iter,5,:] = torch.tensor(bacc_train)
    results[Out_iter,6,:] = torch.tensor(bacc_test)
    results[Out_iter,7,:] = torch.tensor(true_sparsity)
    results[Out_iter,8,:] = torch.tensor(effective_sparsity)

#             df=pd.read_csv('/home/c/cl237/TenBCD/Sparse/LeNet300_100/Flare(BCD method)/differernt gamma rho/result.csv')
#             new_row = {
#                         'tau': tau, 
#                         'gamma': gamma, 
#                         'rho': rho, 
#                         'alpha': alpha,
#                         'loss1': loss1[niter-1], 
#                         'loss2': loss2[niter-1], 
#                         'accuracy_train': accuracy_train[niter-1],
#                         'accuracy_test': accuracy_test[niter-1], 
#                         'max_accuracy_train': max(accuracy_train),
#                         'max_accuracy_test': max(accuracy_test),
#                         'time': time1[niter-1], 
#                         'BACC_train': bacc_train[niter-1],
#                         'BACC_test': bacc_test[niter-1],
#                         'max_BACC_train': max(bacc_train),
#                         'max_BACC_test': max(bacc_test),
#                         'Sparsity': sparsity,
#                         'seed' : seed
#                     }

#             df=df.append(new_row,ignore_index=True)
#             df.to_csv('/home/c/cl237/TenBCD/Sparse/LeNet300_100/Flare(BCD method)/differernt gamma rho/result.csv',index=False)

filename="UniformScaled_" + "niter_"+ str(niter) + "Sparsity_" + str(sparsity) +"tau_" + str(tau) + "gamma_" + str(gamma) + \
                "rho_" + str(rho) + "alpha_" + str(alpha)+ ".mat"
from scipy.io import savemat
%cd /home/c/cl237/TenBCD/Sparse/LeNet300_100/Flare(BCD method)/different init/
savemat (filename, {'results': torch.Tensor.numpy(results)})

0.9 200 100 1 1
Train on 8114 samples, validate on 44689 samples
Epoch 1 / 1000 
 - time: 0.7295775413513184 - sq_loss: 397755.41356882214 - tot_loss: 397755.7515115086 - acc: 0.5 - val_acc: 0.028597641477768578 - bacc_train: 0.5 - bacc_test: 0.5 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 2 / 1000 
 - time: 0.02849102020263672 - sq_loss: 390070.2274626472 - tot_loss: 390071.484676654 - acc: 0.5 - val_acc: 0.028597641477768578 - bacc_train: 0.5 - bacc_test: 0.5 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 3 / 1000 
 - time: 0.028199195861816406 - sq_loss: 382602.5768030275 - tot_loss: 382605.2118149946 - acc: 0.5 - val_acc: 0.028597641477768578 - bacc_train: 0.5 - bacc_test: 0.5 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 4 / 1000 
 - time: 0.028158187866210938 - sq_loss: 375341.317144702 - tot_loss: 375345.65084919165 - acc: 0.5001232437761893 - val_acc: 0.02879903331916

Epoch 32 / 1000 
 - time: 0.027875185012817383 - sq_loss: 219213.70031182579 - tot_loss: 219214.38288269076 - acc: 0.80564456494947 - val_acc: 0.8003983083085323 - bacc_train: 0.8056445649494701 - bacc_test: 0.8630863988844263 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 33 / 1000 
 - time: 0.027258634567260742 - sq_loss: 214988.85840292714 - tot_loss: 214989.54562858184 - acc: 0.8066305151589844 - val_acc: 0.8034639396719551 - bacc_train: 0.8066305151589845 - bacc_test: 0.8642846214706044 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 34 / 1000 
 - time: 0.02697467803955078 - sq_loss: 210845.1381384029 - tot_loss: 210845.82973352718 - acc: 0.8078629529208775 - val_acc: 0.806507194164112 - bacc_train: 0.8078629529208774 - bacc_test: 0.8654713262387179 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 35 / 1000 
 - time: 0.02682185173034668 - sq_loss: 206781.00948456273 - tot_loss:

Epoch 64 / 1000 
 - time: 0.02756643295288086 - sq_loss: 117552.66796953283 - tot_loss: 117553.39848207217 - acc: 0.8171062361350752 - val_acc: 0.850007831904943 - bacc_train: 0.8171062361350752 - bacc_test: 0.8791284393169065 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 65 / 1000 
 - time: 0.027240753173828125 - sq_loss: 115284.73511910863 - tot_loss: 115285.46527848694 - acc: 0.8168597485826966 - val_acc: 0.8509029067555774 - bacc_train: 0.8168597485826966 - bacc_test: 0.8792094335508236 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 66 / 1000 
 - time: 0.028280973434448242 - sq_loss: 113060.52923447457 - tot_loss: 113061.25911241569 - acc: 0.8177224550160217 - val_acc: 0.8516637203786167 - bacc_train: 0.8177224550160217 - bacc_test: 0.8792213208763535 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 67 / 1000 
 - time: 0.03740835189819336 - sq_loss: 110879.20920696137 - tot_lo

Epoch 95 / 1000 
 - time: 0.031017780303955078 - sq_loss: 64256.51574837666 - tot_loss: 64257.24914700711 - acc: 0.8228986936159723 - val_acc: 0.866589093512945 - bacc_train: 0.8228986936159723 - bacc_test: 0.8823470836614289 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 96 / 1000 
 - time: 0.027227401733398438 - sq_loss: 63016.552532469526 - tot_loss: 63017.287241350255 - acc: 0.8228986936159723 - val_acc: 0.8668352390968694 - bacc_train: 0.8228986936159723 - bacc_test: 0.882473779660139 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 97 / 1000 
 - time: 0.04511094093322754 - sq_loss: 61800.513653084556 - tot_loss: 61801.24969280961 - acc: 0.8227754498397831 - val_acc: 0.867059007809528 - bacc_train: 0.8227754498397831 - bacc_test: 0.8822092393521197 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 98 / 1000 
 - time: 0.02687358856201172 - sq_loss: 60607.93799827645 - tot_loss: 60

Epoch 126 / 1000 
 - time: 0.026944398880004883 - sq_loss: 35121.49855946515 - tot_loss: 35122.43003307446 - acc: 0.8280749322159231 - val_acc: 0.8769272080377721 - bacc_train: 0.8280749322159231 - bacc_test: 0.8793145088566252 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 127 / 1000 
 - time: 0.026819229125976562 - sq_loss: 34443.740837280675 - tot_loss: 34444.67099885735 - acc: 0.8286911510968696 - val_acc: 0.8769495849090381 - bacc_train: 0.8286911510968696 - bacc_test: 0.8797057451633545 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 128 / 1000 
 - time: 0.026836156845092773 - sq_loss: 33779.06279091486 - tot_loss: 33780.04627991588 - acc: 0.8294306137540054 - val_acc: 0.8722951956857392 - bacc_train: 0.8294306137540055 - bacc_test: 0.878828912960588 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 129 / 1000 
 - time: 0.02695488929748535 - sq_loss: 33127.21210613857 - tot_los

Epoch 158 / 1000 
 - time: 0.026883840560913086 - sq_loss: 18826.773569123015 - tot_loss: 18827.666050070402 - acc: 0.8322652206063593 - val_acc: 0.8779565441160017 - bacc_train: 0.8322652206063594 - bacc_test: 0.8832617948855769 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 159 / 1000 
 - time: 0.026866436004638672 - sq_loss: 18463.4977658571 - tot_loss: 18464.39097296095 - acc: 0.8321419768301701 - val_acc: 0.8779565441160017 - bacc_train: 0.8321419768301701 - bacc_test: 0.8832617948855769 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 160 / 1000 
 - time: 0.026889801025390625 - sq_loss: 18107.23320498578 - tot_loss: 18108.126640811868 - acc: 0.8318954892777914 - val_acc: 0.8780460516010652 - bacc_train: 0.8318954892777914 - bacc_test: 0.8833078661578351 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 161 / 1000 
 - time: 0.026798009872436523 - sq_loss: 17757.84453908007 - tot_

Epoch 190 / 1000 
 - time: 0.027692794799804688 - sq_loss: 10092.899091648087 - tot_loss: 10093.970074123396 - acc: 0.832511708158738 - val_acc: 0.8821857727852491 - bacc_train: 0.832511708158738 - bacc_test: 0.8854386624997761 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 191 / 1000 
 - time: 0.026860952377319336 - sq_loss: 9898.184306677267 - tot_loss: 9899.243353755955 - acc: 0.8322652206063593 - val_acc: 0.8820962653001857 - bacc_train: 0.8322652206063594 - bacc_test: 0.8853925912275178 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 192 / 1000 
 - time: 0.026866674423217773 - sq_loss: 9707.227437518946 - tot_loss: 9708.278497221672 - acc: 0.8323884643825487 - val_acc: 0.882051511557654 - bacc_train: 0.8323884643825487 - bacc_test: 0.8853695555913887 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 193 / 1000 
 - time: 0.026875972747802734 - sq_loss: 9519.955942513941 - tot_los

Epoch 222 / 1000 
 - time: 0.02730274200439453 - sq_loss: 5411.504921792199 - tot_loss: 5412.620713785158 - acc: 0.8334976583682524 - val_acc: 0.8808655373805635 - bacc_train: 0.8334976583682524 - bacc_test: 0.885898266699962 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 223 / 1000 
 - time: 0.02679443359375 - sq_loss: 5307.133837296382 - tot_loss: 5308.262584980419 - acc: 0.8336209021444417 - val_acc: 0.8808655373805635 - bacc_train: 0.8336209021444416 - bacc_test: 0.885898266699962 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 224 / 1000 
 - time: 0.0268557071685791 - sq_loss: 5204.776812250402 - tot_loss: 5205.9218575386785 - acc: 0.8337441459206311 - val_acc: 0.8808879142518293 - bacc_train: 0.833744145920631 - bacc_test: 0.8859097845180266 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 225 / 1000 
 - time: 0.026821136474609375 - sq_loss: 5104.394940552747 - tot_loss: 5105.

Epoch 254 / 1000 
 - time: 0.027948379516601562 - sq_loss: 2902.0812293298573 - tot_loss: 2903.3214668449195 - acc: 0.8342371210253883 - val_acc: 0.8806865224104365 - bacc_train: 0.8342371210253883 - bacc_test: 0.8865655611327751 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 255 / 1000 
 - time: 0.02683877944946289 - sq_loss: 2846.1307131034673 - tot_loss: 2847.3880038688503 - acc: 0.8342371210253883 - val_acc: 0.8806641455391707 - bacc_train: 0.8342371210253883 - bacc_test: 0.8865540433147105 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 256 / 1000 
 - time: 0.026850461959838867 - sq_loss: 2791.259685424641 - tot_loss: 2792.540072109524 - acc: 0.8343603648015775 - val_acc: 0.8806417686679049 - bacc_train: 0.8343603648015775 - bacc_test: 0.8869222439853107 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 257 / 1000 
 - time: 0.026796579360961914 - sq_loss: 2737.4472950199734 - to

Epoch 286 / 1000 
 - time: 0.02679276466369629 - sq_loss: 1556.7738837175361 - tot_loss: 1558.0214449223051 - acc: 0.8362090214444171 - val_acc: 0.8801942312425877 - bacc_train: 0.8362090214444171 - bacc_test: 0.8882107615786786 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 287 / 1000 
 - time: 0.02685713768005371 - sq_loss: 1526.7762425529233 - tot_loss: 1528.0099865603395 - acc: 0.8364555089967957 - val_acc: 0.8801942312425877 - bacc_train: 0.8364555089967957 - bacc_test: 0.8882107615786786 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 288 / 1000 
 - time: 0.02718949317932129 - sq_loss: 1497.3571993592107 - tot_loss: 1498.5786173318456 - acc: 0.8364555089967957 - val_acc: 0.8801718543713218 - bacc_train: 0.8364555089967957 - bacc_test: 0.8881992437606141 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 289 / 1000 
 - time: 0.026880264282226562 - sq_loss: 1468.5055875375863 - to

Epoch 318 / 1000 
 - time: 0.026927471160888672 - sq_loss: 835.4315101656983 - tot_loss: 836.6174226297085 - acc: 0.8376879467586887 - val_acc: 0.8795453019758778 - bacc_train: 0.8376879467586886 - bacc_test: 0.8893956188094657 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 319 / 1000 
 - time: 0.026843547821044922 - sq_loss: 819.3450285249403 - tot_loss: 820.5462527475066 - acc: 0.837811190534878 - val_acc: 0.879500548233346 - bacc_train: 0.837811190534878 - bacc_test: 0.8893725831733366 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 320 / 1000 
 - time: 0.026897907257080078 - sq_loss: 803.5687083158374 - tot_loss: 804.7870877680027 - acc: 0.837811190534878 - val_acc: 0.8794781713620802 - bacc_train: 0.837811190534878 - bacc_test: 0.889361065355272 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 321 / 1000 
 - time: 0.026853084564208984 - sq_loss: 788.0965649858862 - tot_loss: 78

Epoch 350 / 1000 
 - time: 0.027060508728027344 - sq_loss: 448.56264808987623 - tot_loss: 449.54420458492825 - acc: 0.838180921863446 - val_acc: 0.8785830965114457 - bacc_train: 0.838180921863446 - bacc_test: 0.8900395080986844 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 351 / 1000 
 - time: 0.026790380477905273 - sq_loss: 439.9337687190604 - tot_loss: 440.90982437709016 - acc: 0.838180921863446 - val_acc: 0.8785383427689141 - bacc_train: 0.838180921863446 - bacc_test: 0.8900164724625552 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 352 / 1000 
 - time: 0.026831865310668945 - sq_loss: 431.4711787406065 - tot_loss: 432.441638963724 - acc: 0.8383041656396352 - val_acc: 0.8785383427689141 - bacc_train: 0.8383041656396352 - bacc_test: 0.89039619095122 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 353 / 1000 
 - time: 0.026942729949951172 - sq_loss: 423.17166392649864 - tot_loss:

Epoch 382 / 1000 
 - time: 0.026842832565307617 - sq_loss: 241.01164184721236 - tot_loss: 241.84585136930275 - acc: 0.838180921863446 - val_acc: 0.8784264584125847 - bacc_train: 0.838180921863446 - bacc_test: 0.8899588833722325 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 383 / 1000 
 - time: 0.02685093879699707 - sq_loss: 236.38131263209607 - tot_loss: 237.21140241538706 - acc: 0.838180921863446 - val_acc: 0.8784935890263823 - bacc_train: 0.838180921863446 - bacc_test: 0.8899934368264262 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 384 / 1000 
 - time: 0.026871204376220703 - sq_loss: 231.84015267302348 - tot_loss: 232.66660415613643 - acc: 0.838180921863446 - val_acc: 0.8785607196401799 - bacc_train: 0.838180921863446 - bacc_test: 0.8900279902806199 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 385 / 1000 
 - time: 0.026840686798095703 - sq_loss: 227.38644267525038 - tot_lo

Epoch 414 / 1000 
 - time: 0.02681732177734375 - sq_loss: 129.61560648225532 - tot_loss: 130.37218145114718 - acc: 0.8391668720729604 - val_acc: 0.8785607196401799 - bacc_train: 0.8391668720729604 - bacc_test: 0.8900279902806199 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 415 / 1000 
 - time: 0.02686762809753418 - sq_loss: 127.12970221605397 - tot_loss: 127.88395242533635 - acc: 0.8391668720729604 - val_acc: 0.8786054733827117 - bacc_train: 0.8391668720729604 - bacc_test: 0.8900510259167489 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 416 / 1000 
 - time: 0.026849985122680664 - sq_loss: 124.6916258125393 - tot_loss: 125.44370804478132 - acc: 0.8391668720729604 - val_acc: 0.8785830965114457 - bacc_train: 0.8391668720729604 - bacc_test: 0.8900395080986844 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 417 / 1000 
 - time: 0.0268402099609375 - sq_loss: 122.30045598031762 - tot_

Epoch 446 / 1000 
 - time: 0.02684187889099121 - sq_loss: 69.79374320515653 - tot_loss: 70.48271144006289 - acc: 0.8401528222824748 - val_acc: 0.8780460516010652 - bacc_train: 0.8401528222824748 - bacc_test: 0.8901427989538 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 447 / 1000 
 - time: 0.027148008346557617 - sq_loss: 68.45824510870786 - tot_loss: 69.14524481301402 - acc: 0.8401528222824748 - val_acc: 0.8780908053435968 - bacc_train: 0.8401528222824748 - bacc_test: 0.890165834589929 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 448 / 1000 
 - time: 0.02683854103088379 - sq_loss: 67.14840993001435 - tot_loss: 67.83326742558998 - acc: 0.8401528222824748 - val_acc: 0.8781355590861286 - bacc_train: 0.8401528222824748 - bacc_test: 0.8901888702260582 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 449 / 1000 
 - time: 0.02683734893798828 - sq_loss: 65.8637439041845 - tot_loss: 66.5

Epoch 478 / 1000 
 - time: 0.026856660842895508 - sq_loss: 37.64423816138695 - tot_loss: 38.28109123885951 - acc: 0.840645797387232 - val_acc: 0.8781803128286603 - bacc_train: 0.8406457973872319 - bacc_test: 0.8909713428395167 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 479 / 1000 
 - time: 0.02684950828552246 - sq_loss: 36.926149982989955 - tot_loss: 37.56153727574613 - acc: 0.840645797387232 - val_acc: 0.8782026896999262 - bacc_train: 0.8406457973872319 - bacc_test: 0.8909828606575813 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 480 / 1000 
 - time: 0.027058839797973633 - sq_loss: 36.22183926607471 - tot_loss: 36.85582255697192 - acc: 0.840645797387232 - val_acc: 0.8781579359573944 - bacc_train: 0.8406457973872319 - bacc_test: 0.8909598250214521 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 481 / 1000 
 - time: 0.026841163635253906 - sq_loss: 35.53104088596896 - tot_loss:

Epoch 510 / 1000 
 - time: 0.02699899673461914 - sq_loss: 20.349674898020446 - tot_loss: 20.950956078311048 - acc: 0.8417549913729356 - val_acc: 0.8780908053435968 - bacc_train: 0.8417549913729356 - bacc_test: 0.8909252715672584 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 511 / 1000 
 - time: 0.02682781219482422 - sq_loss: 19.963124367624836 - tot_loss: 20.56377217525842 - acc: 0.8416317475967464 - val_acc: 0.8781131822148627 - bacc_train: 0.8416317475967463 - bacc_test: 0.8909367893853231 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 512 / 1000 
 - time: 0.026819944381713867 - sq_loss: 19.583974462204537 - tot_loss: 20.184048251948184 - acc: 0.8416317475967464 - val_acc: 0.878225066571192 - bacc_train: 0.8416317475967463 - bacc_test: 0.8909943784756458 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 513 / 1000 
 - time: 0.026815414428710938 - sq_loss: 19.212083143314164 - tot

Epoch 542 / 1000 
 - time: 0.02696681022644043 - sq_loss: 11.034114334911301 - tot_loss: 11.605822394797306 - acc: 0.841878235149125 - val_acc: 0.8764125399986574 - bacc_train: 0.841878235149125 - bacc_test: 0.8904411537010823 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 543 / 1000 
 - time: 0.026843547821044922 - sq_loss: 10.825716572399115 - tot_loss: 11.394406343926825 - acc: 0.8420014789253143 - val_acc: 0.876323032513594 - bacc_train: 0.8420014789253143 - bacc_test: 0.890395082428824 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 544 / 1000 
 - time: 0.027100563049316406 - sq_loss: 10.621298007163348 - tot_loss: 11.187735045239137 - acc: 0.8420014789253143 - val_acc: 0.8762782787710622 - bacc_train: 0.8420014789253143 - bacc_test: 0.8907517652813597 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 545 / 1000 
 - time: 0.026782751083374023 - sq_loss: 10.42078234590383 - tot_l

Epoch 574 / 1000 
 - time: 0.027240991592407227 - sq_loss: 6.007830250308783 - tot_loss: 6.54668138851339 - acc: 0.8420014789253143 - val_acc: 0.875830741345745 - bacc_train: 0.8420014789253143 - bacc_test: 0.8909011274087335 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 575 / 1000 
 - time: 0.026828765869140625 - sq_loss: 5.89525515159608 - tot_loss: 6.433403962725831 - acc: 0.8420014789253143 - val_acc: 0.8758754950882768 - bacc_train: 0.8420014789253143 - bacc_test: 0.8913038815335274 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 576 / 1000 
 - time: 0.026897430419921875 - sq_loss: 5.784821459159035 - tot_loss: 6.322347237148668 - acc: 0.8422479664776928 - val_acc: 0.8758754950882768 - bacc_train: 0.842247966477693 - bacc_test: 0.8913038815335274 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 577 / 1000 
 - time: 0.026813983917236328 - sq_loss: 5.676488180731484 - tot_loss: 

Epoch 606 / 1000 
 - time: 0.026837825775146484 - sq_loss: 3.2896782459416873 - tot_loss: 3.808139441949828 - acc: 0.841878235149125 - val_acc: 0.8757859876032134 - bacc_train: 0.841878235149125 - bacc_test: 0.8912578102612692 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 607 / 1000 
 - time: 0.02687382698059082 - sq_loss: 3.228704482821296 - tot_loss: 3.7465772386289213 - acc: 0.841878235149125 - val_acc: 0.8757412338606816 - bacc_train: 0.841878235149125 - bacc_test: 0.8912347746251401 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 608 / 1000 
 - time: 0.026920795440673828 - sq_loss: 3.1688849698744392 - tot_loss: 3.686177432065572 - acc: 0.841878235149125 - val_acc: 0.8757412338606816 - bacc_train: 0.841878235149125 - bacc_test: 0.8912347746251401 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 609 / 1000 
 - time: 0.026823759078979492 - sq_loss: 3.1101976971837564 - tot_loss:

Epoch 638 / 1000 
 - time: 0.026862621307373047 - sq_loss: 1.815385018424014 - tot_loss: 2.3189249572401787 - acc: 0.8432339166872073 - val_acc: 0.8754503345342254 - bacc_train: 0.8432339166872073 - bacc_test: 0.8922241984562951 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 639 / 1000 
 - time: 0.027077674865722656 - sq_loss: 1.782247015863987 - tot_loss: 2.285357908141613 - acc: 0.8432339166872073 - val_acc: 0.8753832039204279 - bacc_train: 0.8432339166872073 - bacc_test: 0.8921896450021015 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 640 / 1000 
 - time: 0.026897907257080078 - sq_loss: 1.7497323299764618 - tot_loss: 2.25240800249971 - acc: 0.8433571604633966 - val_acc: 0.8754727114054913 - bacc_train: 0.8433571604633966 - bacc_test: 0.8922357162743597 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 641 / 1000 
 - time: 0.026858806610107422 - sq_loss: 1.7178291057981045 - tot_

Epoch 670 / 1000 
 - time: 0.0268709659576416 - sq_loss: 1.0126541244385037 - tot_loss: 1.505736570436211 - acc: 0.8434804042395859 - val_acc: 0.875360827049162 - bacc_train: 0.8434804042395859 - bacc_test: 0.8925578456727017 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 671 / 1000 
 - time: 0.026982545852661133 - sq_loss: 0.9945634761590364 - tot_loss: 1.4873845347741181 - acc: 0.8434804042395859 - val_acc: 0.875360827049162 - bacc_train: 0.8434804042395859 - bacc_test: 0.8925578456727017 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 672 / 1000 
 - time: 0.026842594146728516 - sq_loss: 0.9768102615597534 - tot_loss: 1.4693318029241333 - acc: 0.8437268917919645 - val_acc: 0.875360827049162 - bacc_train: 0.8437268917919645 - bacc_test: 0.8925578456727017 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 673 / 1000 
 - time: 0.02689337730407715 - sq_loss: 0.959387982191587 - tot_los

Epoch 702 / 1000 
 - time: 0.026998043060302734 - sq_loss: 0.573367511371847 - tot_loss: 1.073048508668904 - acc: 0.8449593295538576 - val_acc: 0.8750923045939717 - bacc_train: 0.8449593295538576 - bacc_test: 0.8924196318559272 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 703 / 1000 
 - time: 0.02717113494873047 - sq_loss: 0.5634337130973447 - tot_loss: 1.0643283824870382 - acc: 0.8449593295538576 - val_acc: 0.8750475508514399 - bacc_train: 0.8449593295538576 - bacc_test: 0.892396596219798 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 704 / 1000 
 - time: 0.027070999145507812 - sq_loss: 0.5536831914674955 - tot_loss: 1.0558399550125726 - acc: 0.8449593295538576 - val_acc: 0.8750923045939717 - bacc_train: 0.8449593295538576 - bacc_test: 0.8924196318559272 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 705 / 1000 
 - time: 0.02693462371826172 - sq_loss: 0.5441124711728325 - tot_

Epoch 734 / 1000 
 - time: 0.027541399002075195 - sq_loss: 0.3313941450009358 - tot_loss: 0.8727680865865569 - acc: 0.8454523046586148 - val_acc: 0.8750699277227059 - bacc_train: 0.8454523046586148 - bacc_test: 0.8927878325265273 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 735 / 1000 
 - time: 0.02682185173034668 - sq_loss: 0.3258979558921324 - tot_loss: 0.8648388170638561 - acc: 0.8454523046586148 - val_acc: 0.8750251739801741 - bacc_train: 0.8454523046586148 - bacc_test: 0.8927647968903982 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 736 / 1000 
 - time: 0.0268709659576416 - sq_loss: 0.3205017169134869 - tot_loss: 0.857937338337469 - acc: 0.8454523046586148 - val_acc: 0.8750699277227059 - bacc_train: 0.8454523046586147 - bacc_test: 0.8927878325265273 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 737 / 1000 
 - time: 0.027091264724731445 - sq_loss: 0.31520356456772985 - to

Epoch 764 / 1000 
 - time: 0.026910781860351562 - sq_loss: 0.20328417098052876 - tot_loss: 0.795370824489286 - acc: 0.8452058171062361 - val_acc: 0.8743762447134642 - bacc_train: 0.8452058171062361 - bacc_test: 0.8924307801665263 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 765 / 1000 
 - time: 0.026826858520507812 - sq_loss: 0.20010086694754042 - tot_loss: 0.7943074555596806 - acc: 0.8452058171062361 - val_acc: 0.8743538678421983 - bacc_train: 0.8452058171062361 - bacc_test: 0.8924192623484618 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 766 / 1000 
 - time: 0.026825904846191406 - sq_loss: 0.19697442877491458 - tot_loss: 0.7933007067200716 - acc: 0.8452058171062361 - val_acc: 0.8742867372284008 - bacc_train: 0.8452058171062361 - bacc_test: 0.8923847088942682 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 767 / 1000 
 - time: 0.02712559700012207 - sq_loss: 0.1939037957544137 

Epoch 795 / 1000 
 - time: 0.02842116355895996 - sq_loss: 0.12688288047602553 - tot_loss: 0.7187194323253506 - acc: 0.8460685235395613 - val_acc: 0.8778446597596724 - bacc_train: 0.8460685235395613 - bacc_test: 0.8934566049892015 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 796 / 1000 
 - time: 0.0268862247467041 - sq_loss: 0.12505183193304453 - tot_loss: 0.6940106600042368 - acc: 0.8460685235395613 - val_acc: 0.8778446597596724 - bacc_train: 0.8460685235395613 - bacc_test: 0.8934566049892015 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 797 / 1000 
 - time: 0.02686285972595215 - sq_loss: 0.12325277597424586 - tot_loss: 0.6845091479407762 - acc: 0.8460685235395613 - val_acc: 0.8778446597596724 - bacc_train: 0.8460685235395613 - bacc_test: 0.8934566049892015 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 798 / 1000 
 - time: 0.026812314987182617 - sq_loss: 0.1214851172134368 - 

Epoch 822 / 1000 
 - time: 0.04982709884643555 - sq_loss: 0.08713710377081606 - tot_loss: 0.6010358007044437 - acc: 0.8461917673157505 - val_acc: 0.8772181073642283 - bacc_train: 0.8461917673157505 - bacc_test: 0.8927543875947296 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 823 / 1000 
 - time: 0.04906153678894043 - sq_loss: 0.08599296344791553 - tot_loss: 0.5987497537098566 - acc: 0.8460685235395613 - val_acc: 0.8772181073642283 - bacc_train: 0.8460685235395613 - bacc_test: 0.8927543875947296 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 824 / 1000 
 - time: 0.05046892166137695 - sq_loss: 0.0848683180020466 - tot_loss: 0.596473269730182 - acc: 0.8460685235395613 - val_acc: 0.8771957304929625 - bacc_train: 0.8460685235395613 - bacc_test: 0.8927428697766651 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 825 / 1000 
 - time: 0.04351806640625 - sq_loss: 0.08376280073775431 - tot_

Epoch 853 / 1000 
 - time: 0.028272390365600586 - sq_loss: 0.05933403030964427 - tot_loss: 0.5372746997683387 - acc: 0.8460685235395613 - val_acc: 0.8762559018997964 - bacc_train: 0.8460685235395613 - bacc_test: 0.892259121417954 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 854 / 1000 
 - time: 0.026906251907348633 - sq_loss: 0.058656194520733264 - tot_loss: 0.5356029705399598 - acc: 0.8460685235395613 - val_acc: 0.876166394414733 - bacc_train: 0.8460685235395613 - bacc_test: 0.892213050145696 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 855 / 1000 
 - time: 0.02677011489868164 - sq_loss: 0.05798949047930693 - tot_loss: 0.5340178479147547 - acc: 0.8460685235395613 - val_acc: 0.876166394414733 - bacc_train: 0.8460685235395613 - bacc_test: 0.892213050145696 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 856 / 1000 
 - time: 0.026883602142333984 - sq_loss: 0.05733373817973867 - 

Epoch 885 / 1000 
 - time: 0.026912927627563477 - sq_loss: 0.042304209415786866 - tot_loss: 0.49199197527773914 - acc: 0.8469312299728864 - val_acc: 0.8758083644744792 - bacc_train: 0.8469312299728864 - bacc_test: 0.8905098911020043 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 886 / 1000 
 - time: 0.026871919631958008 - sq_loss: 0.04190053544663094 - tot_loss: 0.4909162882479341 - acc: 0.8469312299728864 - val_acc: 0.8758083644744792 - bacc_train: 0.8469312299728864 - bacc_test: 0.8905098911020043 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 887 / 1000 
 - time: 0.026920080184936523 - sq_loss: 0.04150318933736531 - tot_loss: 0.4898355213803008 - acc: 0.8469312299728864 - val_acc: 0.8757412338606816 - bacc_train: 0.8469312299728864 - bacc_test: 0.8900956191591459 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 888 / 1000 
 - time: 0.026861906051635742 - sq_loss: 0.0411120537954

Epoch 917 / 1000 
 - time: 0.02768087387084961 - sq_loss: 0.03204436045084767 - tot_loss: 0.4618641537532178 - acc: 0.8468079861966971 - val_acc: 0.8750475508514399 - bacc_train: 0.8468079861966971 - bacc_test: 0.8893588483104803 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 918 / 1000 
 - time: 0.02690410614013672 - sq_loss: 0.03179737609315106 - tot_loss: 0.4611262528288432 - acc: 0.8468079861966971 - val_acc: 0.8750475508514399 - bacc_train: 0.8468079861966971 - bacc_test: 0.8893588483104803 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 919 / 1000 
 - time: 0.026895523071289062 - sq_loss: 0.03155403348678239 - tot_loss: 0.46037086779084446 - acc: 0.8468079861966971 - val_acc: 0.8750027971089083 - bacc_train: 0.8468079861966971 - bacc_test: 0.8893358126743511 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 920 / 1000 
 - time: 0.026903152465820312 - sq_loss: 0.0313142742162155

Epoch 949 / 1000 
 - time: 0.0269930362701416 - sq_loss: 0.025680122543535033 - tot_loss: 0.4407775200683795 - acc: 0.8475474488538329 - val_acc: 0.8749580433663765 - bacc_train: 0.8475474488538328 - bacc_test: 0.8896924955268868 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 950 / 1000 
 - time: 0.026824235916137695 - sq_loss: 0.02552412442424744 - tot_loss: 0.4402686228684497 - acc: 0.8475474488538329 - val_acc: 0.8749356664951107 - bacc_train: 0.8475474488538328 - bacc_test: 0.8896809777088223 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 951 / 1000 
 - time: 0.02683544158935547 - sq_loss: 0.025370261792775444 - tot_loss: 0.4397781085202706 - acc: 0.8475474488538329 - val_acc: 0.8748909127525789 - bacc_train: 0.8475474488538328 - bacc_test: 0.8896579420726931 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 952 / 1000 
 - time: 0.026883602142333984 - sq_loss: 0.0252185060423737

Epoch 981 / 1000 
 - time: 0.026950836181640625 - sq_loss: 0.021596492765947252 - tot_loss: 0.42807928375877813 - acc: 0.8475474488538329 - val_acc: 0.8744433753272618 - bacc_train: 0.8475474488538328 - bacc_test: 0.8890478672227375 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 982 / 1000 
 - time: 0.026906967163085938 - sq_loss: 0.02149436092476948 - tot_loss: 0.42773457565749934 - acc: 0.8475474488538329 - val_acc: 0.8744209984559959 - bacc_train: 0.8475474488538328 - bacc_test: 0.8890363494046729 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 983 / 1000 
 - time: 0.02681708335876465 - sq_loss: 0.021393508001515456 - tot_loss: 0.42742591334912694 - acc: 0.8477939364062115 - val_acc: 0.87439862158473 - bacc_train: 0.8477939364062115 - bacc_test: 0.8890248315866085 - true_sparsity: 0.8957345971563981 - effective_sparsity: 0.8979620853080569
Epoch 984 / 1000 
 - time: 0.0269467830657959 - sq_loss: 0.021293923660904

### Visualization of training results

In [None]:
plt.figure()
plt.plot(np.arange(1,niter+1), loss2)
plt.yscale('log',base=2)
plt.title('training loss')

plt.figure()
plt.plot(np.arange(1,niter+1), accuracy_train)
plt.plot(np.arange(1,niter+1), accuracy_test)
plt.title('accuracy')