In [1]:
import sys
sys.path.append("../../src")
import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F

import glob
import os
from datetime import datetime
import time
import math
from tqdm import tqdm

from itertools import repeat
from torch.nn.parameter import Parameter
import collections
import matplotlib
from torch_utils import *
from ContrastiveModels import ContrastiveCorInfoMaxHopfieldSparse
from visualization import *
# matplotlib.use('Agg')

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), 
                                            std=(3*0.2023, 3*0.1994, 3*0.2010))])

cifar_dset_train = torchvision.datasets.CIFAR100('../../data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(cifar_dset_train, batch_size=20, shuffle=True, num_workers=0)

cifar_dset_test = torchvision.datasets.CIFAR100('../../data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(cifar_dset_test, batch_size=20, shuffle=False, num_workers=0)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
activation = hard_sigmoid
architecture = architecture = [int(32*32*3), 2000, 1000, 100]

STlambda_lr_list = [5*1e-6, 5*1e-6, 0.01]
sparse_layers = [1, 2, 3]

beta = 1
lambda_ = 0.99999
epsilon = 0.15
lr_start =  {'ff' : np.array([0.16, 0.13, 0.08]), 'fb': np.array([np.nan, 0.06, 0.04])}


neural_lr_start = 0.03
neural_lr_stop = 0.001
neural_lr_rule = "constant"
neural_lr_decay_multiplier = 0.01
neural_dynamic_iterations_nudged = 5
neural_dynamic_iterations_free = 20
hopfield_g = 0.1
use_random_sign_beta = True
use_three_phase = False
weight_decay = False

model = ContrastiveCorInfoMaxHopfieldSparse(architecture = architecture, lambda_ = lambda_, 
                                            epsilon = epsilon, activation = activation, 
                                            sparse_layers = sparse_layers)

In [5]:
_ = evaluateContrastiveCorInfoMaxHopfieldSparseV2(model, test_loader, hopfield_g, neural_lr_start, 
                                                neural_lr_stop, STlambda_lr_list, neural_lr_rule, 
                                                neural_lr_decay_multiplier, 
                                                neural_dynamic_iterations_free, 
                                                device, printing = True, check_sparsity = True)

Test accuracy :	 0.0133
Sparsity for layers:  [0.5216525 0.5409814 0.629342 ]


In [6]:
# _ = evaluateContrastiveCorInfoMaxHopfieldSparse_topk( model, train_loader, hopfield_g,
#                                                       neural_lr_start, neural_lr_stop, STlambda_lr, 
#                                                       neural_lr_rule, 
#                                                       neural_lr_decay_multiplier, 
#                                                       neural_dynamic_iterations_free, device)

In [7]:
trn_acc_list = []
tst_acc_list = []
tst_sparsity_list = []
n_epochs = 30

for epoch_ in range(n_epochs):
    if epoch_ < 15:
        lr = {'ff' : lr_start['ff'] * (0.95)**epoch_, 'fb' : lr_start['fb'] * (0.95)**epoch_}
    else:
        lr = {'ff' : lr_start['ff'] * (0.9)**epoch_, 'fb' : lr_start['fb'] * (0.9)**epoch_}
    for idx, (x, y) in tqdm(enumerate(train_loader)):
        x, y = x.to(device), y.to(device)
        x = x.view(x.size(0),-1).T
        y_one_hot = F.one_hot(y, 100).to(device).T
        take_debug_logs_ = (idx % 500 == 0)
        if use_random_sign_beta:
            rnd_sgn = 2*np.random.randint(2) - 1
            beta = rnd_sgn*beta
            
        neurons = model.batch_step_hopfield( x, y_one_hot, hopfield_g, 
                                             lr, neural_lr_start, neural_lr_stop, STlambda_lr_list, neural_lr_rule, 
                                             neural_lr_decay_multiplier, neural_dynamic_iterations_free,
                                             neural_dynamic_iterations_nudged, beta, 
                                             use_three_phase, take_debug_logs_, weight_decay)
    
    trn_acc = evaluateContrastiveCorInfoMaxHopfieldSparseV2(model, test_loader, hopfield_g, neural_lr_start, 
                                                            neural_lr_stop, STlambda_lr_list, neural_lr_rule, 
                                                            neural_lr_decay_multiplier, 
                                                            neural_dynamic_iterations_free, 
                                                            device, printing = False, check_sparsity = False)
    tst_acc, tst_sparsity = evaluateContrastiveCorInfoMaxHopfieldSparseV2(  model, test_loader, 
                                                                            hopfield_g, neural_lr_start, 
                                                                            neural_lr_stop, STlambda_lr_list, 
                                                                            neural_lr_rule, 
                                                                            neural_lr_decay_multiplier, 
                                                                            neural_dynamic_iterations_free, 
                                                                            device, printing = False, 
                                                                            check_sparsity = True)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    tst_sparsity_list.append(tst_sparsity)
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))
    print("Layer sparsity : ", tst_sparsity_list[-1])
    print("Free Information ratio: {}".format(np.array(model.layerwise_forward_corinfo_list_free)[-1] / np.array(model.layerwise_backward_corinfo_list_free)[-1]))
    print("Nudged Information ratio: {}".format(np.array(model.layerwise_forward_corinfo_list_nudged)[-1] / np.array(model.layerwise_backward_corinfo_list_nudged)[-1]))

2500it [02:35, 16.12it/s]
0it [00:00, ?it/s]

Epoch : 1, Train Accuracy : 0.0249, Test Accuracy : 0.0249
Layer sparsity :  [0.5298092 0.5350262 0.924759 ]
Free Information ratio: [0.61978313 0.01732457]
Nudged Information ratio: [0.61977913 0.01732444]


2500it [02:35, 16.09it/s]
0it [00:00, ?it/s]

Epoch : 2, Train Accuracy : 0.0377, Test Accuracy : 0.0377
Layer sparsity :  [0.5405542  0.52263135 0.912247  ]
Free Information ratio: [0.58961045 0.01874532]
Nudged Information ratio: [0.58960564 0.01874508]


2500it [02:35, 16.08it/s]
0it [00:00, ?it/s]

Epoch : 3, Train Accuracy : 0.0527, Test Accuracy : 0.0527
Layer sparsity :  [0.5476212  0.50640345 0.90343493]
Free Information ratio: [0.67289763 0.02635359]
Nudged Information ratio: [0.67289218 0.02635323]


2500it [02:37, 15.85it/s]
0it [00:00, ?it/s]

Epoch : 4, Train Accuracy : 0.0644, Test Accuracy : 0.0644
Layer sparsity :  [0.54707986 0.4890207  0.9142609 ]
Free Information ratio: [0.73130916 0.02843024]
Nudged Information ratio: [0.73130353 0.02842985]


2500it [02:35, 16.09it/s]
0it [00:00, ?it/s]

Epoch : 5, Train Accuracy : 0.0724, Test Accuracy : 0.0724
Layer sparsity :  [0.5716174  0.48066744 0.90668595]
Free Information ratio: [0.91564656 0.03201453]
Nudged Information ratio: [0.9156375  0.03201397]


2500it [02:35, 16.09it/s]
0it [00:00, ?it/s]

Epoch : 6, Train Accuracy : 0.082, Test Accuracy : 0.082
Layer sparsity :  [0.5717618  0.46739292 0.90332794]
Free Information ratio: [1.0307707  0.03368557]
Nudged Information ratio: [1.03076116 0.033685  ]


2500it [02:35, 16.05it/s]
0it [00:00, ?it/s]

Epoch : 7, Train Accuracy : 0.0863, Test Accuracy : 0.0863
Layer sparsity :  [0.5740826  0.4562911  0.90869296]
Free Information ratio: [1.20303756 0.03278962]
Nudged Information ratio: [1.20302776 0.0327891 ]


2500it [02:35, 16.09it/s]
0it [00:00, ?it/s]

Epoch : 8, Train Accuracy : 0.095, Test Accuracy : 0.095
Layer sparsity :  [0.5688054  0.44582894 0.9069039 ]
Free Information ratio: [1.16830594 0.03444556]
Nudged Information ratio: [1.16829746 0.03444503]


2500it [02:35, 16.08it/s]
0it [00:00, ?it/s]

Epoch : 9, Train Accuracy : 0.0975, Test Accuracy : 0.0975
Layer sparsity :  [0.5732144  0.43489024 0.912771  ]
Free Information ratio: [1.09977332 0.03931457]
Nudged Information ratio: [1.09976393 0.03931386]


2500it [02:35, 16.08it/s]
0it [00:00, ?it/s]

Epoch : 10, Train Accuracy : 0.1012, Test Accuracy : 0.1012
Layer sparsity :  [0.58573776 0.4292769  0.907555  ]
Free Information ratio: [1.22470623 0.03249265]
Nudged Information ratio: [1.22470034 0.03249226]


2500it [02:35, 16.08it/s]
0it [00:00, ?it/s]

Epoch : 11, Train Accuracy : 0.1052, Test Accuracy : 0.1052
Layer sparsity :  [0.5675791 0.4174559 0.911196 ]
Free Information ratio: [0.99825506 0.03959445]
Nudged Information ratio: [0.99824799 0.03959383]


2500it [02:35, 16.08it/s]
0it [00:00, ?it/s]

Epoch : 12, Train Accuracy : 0.1121, Test Accuracy : 0.1121
Layer sparsity :  [0.5572179  0.41632533 0.91554797]
Free Information ratio: [0.86606361 0.04418336]
Nudged Information ratio: [0.86605528 0.04418249]


2500it [02:35, 16.09it/s]
0it [00:00, ?it/s]

Epoch : 13, Train Accuracy : 0.1189, Test Accuracy : 0.1189
Layer sparsity :  [0.5623707  0.40807042 0.91851395]
Free Information ratio: [0.8098898  0.04405276]
Nudged Information ratio: [0.80988213 0.04405191]


2500it [02:35, 16.08it/s]
0it [00:00, ?it/s]

Epoch : 14, Train Accuracy : 0.1228, Test Accuracy : 0.1228
Layer sparsity :  [0.5551405 0.4027608 0.9179311]
Free Information ratio: [0.76776186 0.04808859]
Nudged Information ratio: [0.76775291 0.04808751]


2500it [02:35, 16.07it/s]
0it [00:00, ?it/s]

Epoch : 15, Train Accuracy : 0.1252, Test Accuracy : 0.1252
Layer sparsity :  [0.5417823  0.4003716  0.91564894]
Free Information ratio: [0.68844316 0.04795342]
Nudged Information ratio: [0.68843492 0.04795233]


2500it [02:35, 16.07it/s]
0it [00:00, ?it/s]

Epoch : 16, Train Accuracy : 0.13, Test Accuracy : 0.13
Layer sparsity :  [0.5485954  0.39652044 0.91776407]
Free Information ratio: [0.78142888 0.0504871 ]
Nudged Information ratio: [0.7814208  0.05048606]


2500it [02:35, 16.09it/s]
0it [00:00, ?it/s]

Epoch : 17, Train Accuracy : 0.1293, Test Accuracy : 0.1293
Layer sparsity :  [0.54501647 0.39605463 0.91503096]
Free Information ratio: [0.73196634 0.05179776]
Nudged Information ratio: [0.73195594 0.05179635]


2500it [02:35, 16.10it/s]
0it [00:00, ?it/s]

Epoch : 18, Train Accuracy : 0.131, Test Accuracy : 0.131
Layer sparsity :  [0.5474185  0.3942358  0.91590995]
Free Information ratio: [0.77431356 0.0528175 ]
Nudged Information ratio: [0.77430352 0.05281615]


2500it [02:35, 16.09it/s]
0it [00:00, ?it/s]

Epoch : 19, Train Accuracy : 0.133, Test Accuracy : 0.133
Layer sparsity :  [0.54760516 0.3929374  0.9149649 ]
Free Information ratio: [0.71699459 0.05298506]
Nudged Information ratio: [0.71698492 0.05298369]


2500it [02:35, 16.09it/s]
0it [00:00, ?it/s]

Epoch : 20, Train Accuracy : 0.1338, Test Accuracy : 0.1338
Layer sparsity :  [0.546864   0.39115036 0.9163619 ]
Free Information ratio: [0.73408651 0.05117445]
Nudged Information ratio: [0.73407631 0.05117301]


2500it [02:35, 16.11it/s]
0it [00:00, ?it/s]

Epoch : 21, Train Accuracy : 0.1334, Test Accuracy : 0.1334
Layer sparsity :  [0.5475542  0.38987952 0.91501   ]
Free Information ratio: [0.75161587 0.05157254]
Nudged Information ratio: [0.75160559 0.05157107]


2500it [02:35, 16.10it/s]
0it [00:00, ?it/s]

Epoch : 22, Train Accuracy : 0.134, Test Accuracy : 0.134
Layer sparsity :  [0.5462882  0.38918114 0.91679806]
Free Information ratio: [0.70627992 0.0530677 ]
Nudged Information ratio: [0.70626934 0.05306612]


2500it [02:35, 16.10it/s]
0it [00:00, ?it/s]

Epoch : 23, Train Accuracy : 0.1333, Test Accuracy : 0.1333
Layer sparsity :  [0.5446193  0.38889152 0.917225  ]
Free Information ratio: [0.72947848 0.05041824]
Nudged Information ratio: [0.72946825 0.05041672]


2500it [02:35, 16.10it/s]
0it [00:00, ?it/s]

Epoch : 24, Train Accuracy : 0.135, Test Accuracy : 0.135
Layer sparsity :  [0.5436369  0.3884232  0.91681796]
Free Information ratio: [0.74593364 0.054707  ]
Nudged Information ratio: [0.74592314 0.0547054 ]


2500it [02:35, 16.11it/s]
0it [00:00, ?it/s]

Epoch : 25, Train Accuracy : 0.1356, Test Accuracy : 0.1356
Layer sparsity :  [0.54471606 0.38728553 0.91619396]
Free Information ratio: [0.72805156 0.05240976]
Nudged Information ratio: [0.72804042 0.05240805]


2500it [02:35, 16.10it/s]
0it [00:00, ?it/s]

Epoch : 26, Train Accuracy : 0.136, Test Accuracy : 0.136
Layer sparsity :  [0.54377055 0.38698593 0.9173569 ]
Free Information ratio: [0.74401037 0.05129936]
Nudged Information ratio: [0.74399963 0.05129769]


2500it [02:35, 16.10it/s]
0it [00:00, ?it/s]

Epoch : 27, Train Accuracy : 0.1362, Test Accuracy : 0.1362
Layer sparsity :  [0.542729   0.3865765  0.91614795]
Free Information ratio: [0.69883757 0.05366377]
Nudged Information ratio: [0.69882583 0.05366188]


2500it [02:35, 16.08it/s]
0it [00:00, ?it/s]

Epoch : 28, Train Accuracy : 0.136, Test Accuracy : 0.136
Layer sparsity :  [0.54175955 0.38642374 0.916866  ]
Free Information ratio: [0.72223494 0.05123036]
Nudged Information ratio: [0.72222365 0.05122854]


2500it [02:35, 16.11it/s]
0it [00:00, ?it/s]

Epoch : 29, Train Accuracy : 0.1358, Test Accuracy : 0.1358
Layer sparsity :  [0.54041064 0.38601163 0.91631895]
Free Information ratio: [0.66750552 0.05092161]
Nudged Information ratio: [0.66749304 0.0509195 ]


2500it [02:35, 16.08it/s]


Epoch : 30, Train Accuracy : 0.1359, Test Accuracy : 0.1359
Layer sparsity :  [0.54084885 0.38538194 0.916151  ]
Free Information ratio: [0.64916693 0.05329695]
Nudged Information ratio: [0.64915424 0.05329471]


In [8]:
def columnwise_sparsity(x, threshold = 0.01):
    return (x < threshold).sum(0) / x.shape[0]

In [9]:
x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)

x = x.view(x.size(0),-1).T
y_one_hot = F.one_hot(y, 10).to(device).T
neurons = model.init_neurons(x.size(1), device = model.device)

neurons = model.run_neural_dynamics_hopfield(x, 0, neurons, hopfield_g, neural_lr_start, 
                                   neural_lr_stop, STlambda_lr_list, neural_lr_rule, 
                                   neural_lr_decay_multiplier, neural_dynamic_iterations_free, 
                                   0, False)

../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:276: operator(): block: [0,0,0], thread: [0,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:276: operator(): block: [0,0,0], thread: [1,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:276: operator(): block: [0,0,0], thread: [2,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:276: operator(): block: [0,0,0], thread: [4,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:276: operator(): block: [0,0,0], thread: [5,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:276: operator(): block: [0,0,0], 

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
columnwise_sparsity(neurons[0][0]).mean()

In [None]:
model.save_model_weights(pickle_name = "CorInfoTrial")

In [None]:
model2 = ContrastiveCorInfoMaxHopfieldSparse(architecture = architecture, lambda_ = lambda_, 
                                            epsilon = epsilon, activation = activation)


In [None]:
model2.load_model_weights("CorInfoTrial")

In [None]:
tst_acc = evaluateContrastiveCorInfoMaxHopfieldSparse(  model, test_loader, hopfield_g, neural_lr_start, 
                                                        neural_lr_stop, STlambda_lr_list, neural_lr_rule, 
                                                        neural_lr_decay_multiplier, 
                                                        neural_dynamic_iterations_free, 
                                                        device, printing = True)

In [None]:
tst_acc = evaluateContrastiveCorInfoMaxHopfieldSparse(  model2, test_loader, hopfield_g, neural_lr_start, 
                                                        neural_lr_stop, STlambda_lr_list, neural_lr_rule, 
                                                        neural_lr_decay_multiplier, 
                                                        neural_dynamic_iterations_free, 
                                                        device, printing = True)

In [None]:
plt.plot(model.forward_backward_angles)

In [None]:
plot_convergence_plot(trn_acc_list, xlabel = 'Number of Epochs', ylabel = 'Accuracy %',
                      title = 'Contrastive CorInfoMax Train Accuracy w.r.t. Epochs', 
                      figsize = (12,8), fontsize = 25, linewidth = 3)

In [None]:
plot_convergence_plot(tst_acc_list, xlabel = 'Number of Epochs', ylabel = 'Accuracy %',
                      title = 'Contrastive CorInfoMax Test Accuracy w.r.t. Epochs', 
                      figsize = (12,8), fontsize = 25, linewidth = 3)

In [None]:
# from IPython.display import Math, display
# ########### LATEX Style Display Matrix ###############
# def display_matrix(array):
#     """Display given numpy array with Latex format in Jupyter Notebook.
#     Args:
#         array (numpy array): Array to be displayed
#     """
#     data = ""
#     for line in array:
#         if len(line) == 1:
#             data += " %.3f &" % line + r" \\\n"
#             continue
#         for element in line:
#             data += " %.3f &" % element
#         data += r" \\" + "\n"
#     display(Math("\\begin{bmatrix} \n%s\\end{bmatrix}" % data))

In [None]:
# display_matrix(model.B[0]['weight'][:10,:10])

In [None]:
# display_matrix(torch.linalg.inv(model.Rh1)[:10,:10])

In [None]:
# torch.norm(model.B[0]['weight'] - torch.linalg.inv(model.Rh1))

In [None]:
# display_matrix(torch.linalg.inv(model.Rh2)[:10,:10])

In [None]:
# In this section, we present the CorInfoMax network structure and corresponding neuronal dynamics for different selections of $\displaystyle \Pcal^{(k)}$. In particular, we can choose different presumed domain $\displaystyle \Pcal^{(k)}$ for each layer-$k$. To illustrate, consider $\displaystyle \Pcal^{(k)} = \mathcal{B}_{1,+}=\{\rvr: \|\rvr\|_1 \leq 1, \vzero \leq \rvr \}$ that is the intersection of the $L1$ norm ball and the nonnegative orthant. To derive the network dynamics corresponding to $\rvr^{(k)}[t]$, we consider the following constrained optimization similar to (\ref{eq:objsysdynamics}),

# \begin{eqnarray}
#     \underset{\rvr^{(k)}[t]}{\text{maximize }} & \Bigg( \Bigg.\frac{1}{2}(\log \det (\hat{\rmR}_{\rvr^{(k)}}[t]+ \epsilon_{k-1} \mI)+\log \det (\hat{\rmR}_{\rvr^{(k)}}[t]+ \epsilon_{k} \mI))\nonumber\\  &-\frac{1}{2\epsilon_{k-1}}\left\|\overset{\rightarrow}{\rve}^{(k)}[t]\right\|_2^2-\frac{1}{2\epsilon_k}\left\|\overset{\leftarrow}{\rve}^{(k)}[t]\right\|_2^2\Bigg. \Bigg)\label{eq:objsysdynamicsSparse}\\
#     \text{subject to} &  \|\rvr^{(k)}[t]\|_1 \le 1,\nonumber\\
#     &   \mathbf{0}\le \rvr^{(k)}[t],\nonumber
# \end{eqnarray}

# We can write down the Lagrangian min-max problem corresponding to this optimization as 
# \begin{eqnarray}
# \underset{{q}_k[t] \ge 0}{\text{minimize }} 
#  \underset{\rvr^{(k)}[t] \ge \vzero}{\text{maximize }}  L(\rvr^{(k)}[t], {q}_k[t])=O(\rvr^{(k)}[t])+{q}_k[t](\|\rvr^{(k)}[t]\|_1 - 1)
# \end{eqnarray}
# where $O(\rvr^{(k)}[t])$ is the objective in (\ref{eq:objsysdynamicsSparse}). Following the proximal gradient update for $\displaystyle \rvr^{(k)}[t]$ with the gradient expression (\ref{eq:gradrk}), we can write the output dynamics for layer-$k$ as follows, 

# \begin{align}
#     &\tau_{\rvu}\frac{d \rvu^{(k)}[t;s]}{ds}=-g_{lk}\rvu^{(k)}[t;s]+g_{A,k}(\rvv^{(k)}_A[t;s]-\rvu^{(k)}[t;s])+g_{B,k}(\rvv^{(k)}_B[t;s]-\rvu^{(k)}[t;s]), \nonumber%\label{eq:hiddynamicsSparse1}
#     \\
#   &\rvr^{(k)}[t;s]= \text{ReLU}(\rvu^{(k)}[t;s] - q^{(k)}[t;s]) \nonumber, %\label{eq:hiddynamicsSparse2}
#   \end{align}
  
# % \begin{eqnarray}
# %     \tau_{\rvu}\frac{d \rvu^{(k)}[t;s]}{ds}&=&-g_{lk}\rvu^{(k)}[t;s]+\frac{1}{\epsilon_k}\mM^{(k)}[t]\vr^{(k)}[t;s]-\frac{1}{\epsilon_{k-1}}\overset{\rightarrow}{\rve}_u^{(k)}[t;s]-\frac{1}{\epsilon_{k}}\overset{\leftarrow}{\rve}_u^{(k)}[t;s], \nonumber\\
# %    \overset{\rightarrow}{\rve}_u^{(k)}[t;s]&=&\rvu^{(k)}[t;s]-\mW^{(k-1)}_{ff}[t]\rvr^{(k-1)}[t;s], \nonumber\\
# % \overset{\leftarrow}{\rve}_u^{(k)}[t;s]&=&\rvu^{(k)}[t;s]-\mW^{(k)}_{fb}[t]\rvr^{(k+1)}[t;s],\nonumber\\
# % \rvr^{(k)}[t;s]&=& \text{ReLU}(\rvu^{(k)}[t;s] - q^{(k)}[t;s]) \nonumber,
# % \end{eqnarray} 

# where we utilized the intermediate variable $\displaystyle \rvu^{(k)}$, and $\text{ReLU}$ is the element-wise rectified linear unit.
# The update corresponding to the Lagrangian variable $q_1[t;s]$ can be written based on the dual minimization,

# \begin{align}
#     \frac{d a_k[t;s]}{ds} = -a_k[t;s] + \sum_{j = 1}^{N_k} \rvr_j^{(k)}[t;s] - 1 + q^{(k)}[t;s], \quad q^{(k)}[t;s] = \text{ReLU}(a_k[t;s]) \nonumber
# \end{align}

# The Lagrangian variable $\displaystyle q^{(k)}$ in the above formulation corresponds to an additional inhibition inter-neuron that takes input from the whole neurons of layer-$k$ and produces an inhibition signal.