In [1]:
import sys
sys.path.append("../../src")
import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F

import glob
import os
from datetime import datetime
import time
import math
from tqdm import tqdm

from itertools import repeat
from torch.nn.parameter import Parameter
import collections
import matplotlib
from torch_utils import *
from ContrastiveModels import ContrastiveCorInfoMaxHopfieldSparse
from visualization import *
# matplotlib.use('Agg')

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.0,), std=(1.0,))])

mnist_dset_train = torchvision.datasets.MNIST('../../data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('../../data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [4]:
# # Best Hyperparameters so far
# activation = hard_sigmoid
# architecture = [784, 500, 10]

# beta = 1
# lambda_ = 0.99999
# epsilon = 0.15
# one_over_epsilon = 1 / epsilon
# lr_start = {'ff' : np.array([1, 0.75]), 'fb': np.array([0.15, 0.12])}

# neural_lr_start = 0.05
# neural_lr_stop = 0.001
# neural_lr_rule = "constant"
# neural_lr_decay_multiplier = 0.01
# neural_dynamic_iterations_nudged = 5
# neural_dynamic_iterations_free = 30
# hopfield_g = 0.5
# use_random_sign_beta = True
# use_three_phase = False
# weight_decay = False

activation = hard_sigmoid
architecture = [784, 500, 10]

beta = 1
lambda_ = 0.99999
epsilon = 0.15
one_over_epsilon = 1 / epsilon
lr_start = {'ff' : np.array([0.5, 0.25]), 'fb': np.array([0.15, 0.1])}

neural_lr_start = 0.1
neural_lr_stop = 0.001
STlambda_lr = 0.001
neural_lr_rule = "divide_by_slow_loop_index"
neural_lr_decay_multiplier = 0.01
neural_dynamic_iterations_nudged = 10
neural_dynamic_iterations_free = 30
hopfield_g = 0.25
use_random_sign_beta = True
use_three_phase = False
weight_decay = False


model = ContrastiveCorInfoMaxHopfieldSparse(architecture = architecture, lambda_ = lambda_, 
                                            epsilon = epsilon, activation = activation)

In [5]:
_ = evaluateContrastiveCorInfoMaxHopfieldSparse(model, train_loader, hopfield_g,
                                          neural_lr_start, neural_lr_stop, STlambda_lr, neural_lr_rule, 
                                          neural_lr_decay_multiplier, neural_dynamic_iterations_free, device)

Train accuracy :	 0.08503333333333334


In [6]:
_ = evaluateContrastiveCorInfoMaxHopfieldSparse_topk( model, train_loader, hopfield_g,
                                                      neural_lr_start, neural_lr_stop, STlambda_lr, 
                                                      neural_lr_rule, 
                                                      neural_lr_decay_multiplier, 
                                                      neural_dynamic_iterations_free, device)

Train accuracy :	 [0.08503333]


In [7]:
trn_acc_list = []
tst_acc_list = []

n_epochs = 30

for epoch_ in range(n_epochs):
    lr = {'ff' : lr_start['ff'] * (0.95)**epoch_, 'fb' : lr_start['fb'] * (0.95)**epoch_}
    for idx, (x, y) in tqdm(enumerate(train_loader)):
        x, y = x.to(device), y.to(device)
        x = x.view(x.size(0),-1).T
        y_one_hot = F.one_hot(y, 10).to(device).T
        take_debug_logs_ = (idx % 500 == 0)
        if use_random_sign_beta:
            rnd_sgn = 2*np.random.randint(2) - 1
            beta = rnd_sgn*beta
            
        neurons = model.batch_step_hopfield( x, y_one_hot, hopfield_g, 
                                             lr, neural_lr_start, neural_lr_stop, STlambda_lr, neural_lr_rule, 
                                             neural_lr_decay_multiplier, neural_dynamic_iterations_free,
                                             neural_dynamic_iterations_nudged, beta, 
                                             use_three_phase, take_debug_logs_, weight_decay)
    
    trn_acc = evaluateContrastiveCorInfoMaxHopfieldSparse(  model, train_loader, hopfield_g, neural_lr_start, 
                                                            neural_lr_stop, STlambda_lr, neural_lr_rule, 
                                                            neural_lr_decay_multiplier, 
                                                            neural_dynamic_iterations_free, 
                                                            device, printing = False)
    tst_acc = evaluateContrastiveCorInfoMaxHopfieldSparse(  model, test_loader, hopfield_g, neural_lr_start, 
                                                            neural_lr_stop, STlambda_lr, neural_lr_rule, 
                                                            neural_lr_decay_multiplier, 
                                                            neural_dynamic_iterations_free, 
                                                            device, printing = False)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))
    print("Free Information ratio: {}".format(np.array(model.layerwise_forward_corinfo_list_free)[-1] / np.array(model.layerwise_backward_corinfo_list_free)[-1]))
    print("Nudged Information ratio: {}".format(np.array(model.layerwise_forward_corinfo_list_nudged)[-1] / np.array(model.layerwise_backward_corinfo_list_nudged)[-1]))

3000it [02:47, 17.95it/s]
0it [00:00, ?it/s]

Epoch : 1, Train Accuracy : 0.8909166666666667, Test Accuracy : 0.8981
Free Information ratio: [0.07272815]
Nudged Information ratio: [0.07272758]


3000it [02:40, 18.68it/s]
0it [00:00, ?it/s]

Epoch : 2, Train Accuracy : 0.9481666666666667, Test Accuracy : 0.9471
Free Information ratio: [0.0827662]
Nudged Information ratio: [0.08276522]


3000it [03:01, 16.55it/s]
0it [00:00, ?it/s]

Epoch : 3, Train Accuracy : 0.9616, Test Accuracy : 0.9575
Free Information ratio: [0.08089834]
Nudged Information ratio: [0.08089676]


3000it [02:24, 20.76it/s]
0it [00:00, ?it/s]

Epoch : 4, Train Accuracy : 0.967, Test Accuracy : 0.9624
Free Information ratio: [0.07931589]
Nudged Information ratio: [0.07931417]


3000it [02:44, 18.20it/s]
0it [00:00, ?it/s]

Epoch : 5, Train Accuracy : 0.9700166666666666, Test Accuracy : 0.9644
Free Information ratio: [0.06716541]
Nudged Information ratio: [0.06716334]


3000it [02:49, 17.70it/s]
0it [00:00, ?it/s]

Epoch : 6, Train Accuracy : 0.9744833333333334, Test Accuracy : 0.9692
Free Information ratio: [0.06529249]
Nudged Information ratio: [0.06529031]


3000it [02:42, 18.44it/s]
0it [00:00, ?it/s]

Epoch : 7, Train Accuracy : 0.9767333333333333, Test Accuracy : 0.9709
Free Information ratio: [0.05807787]
Nudged Information ratio: [0.05807549]


3000it [03:03, 16.34it/s]
0it [00:00, ?it/s]

Epoch : 8, Train Accuracy : 0.9773833333333334, Test Accuracy : 0.9714
Free Information ratio: [0.05644403]
Nudged Information ratio: [0.05644168]


3000it [02:31, 19.75it/s]
0it [00:00, ?it/s]

Epoch : 9, Train Accuracy : 0.9782333333333333, Test Accuracy : 0.971
Free Information ratio: [0.06421381]
Nudged Information ratio: [0.0642115]


3000it [03:08, 15.93it/s]
0it [00:00, ?it/s]

Epoch : 10, Train Accuracy : 0.9794833333333334, Test Accuracy : 0.9725
Free Information ratio: [0.0579295]
Nudged Information ratio: [0.05792695]


3000it [02:24, 20.75it/s]
0it [00:00, ?it/s]

Epoch : 11, Train Accuracy : 0.9808833333333333, Test Accuracy : 0.9741
Free Information ratio: [0.0430798]
Nudged Information ratio: [0.04307673]


3000it [03:08, 15.95it/s]
0it [00:00, ?it/s]

Epoch : 12, Train Accuracy : 0.9795833333333334, Test Accuracy : 0.971
Free Information ratio: [0.04320855]
Nudged Information ratio: [0.04320554]


3000it [02:24, 20.74it/s]
0it [00:00, ?it/s]

Epoch : 13, Train Accuracy : 0.9815833333333334, Test Accuracy : 0.9732
Free Information ratio: [0.04316537]
Nudged Information ratio: [0.04316242]


3000it [03:08, 15.91it/s]
0it [00:00, ?it/s]

Epoch : 14, Train Accuracy : 0.98155, Test Accuracy : 0.9729
Free Information ratio: [0.03674217]
Nudged Information ratio: [0.03673954]


3000it [02:24, 20.75it/s]
0it [00:00, ?it/s]

Epoch : 15, Train Accuracy : 0.9820166666666666, Test Accuracy : 0.972
Free Information ratio: [0.03948595]
Nudged Information ratio: [0.03948346]


3000it [03:03, 16.31it/s]
0it [00:00, ?it/s]

Epoch : 16, Train Accuracy : 0.9820833333333333, Test Accuracy : 0.9729
Free Information ratio: [0.03793857]
Nudged Information ratio: [0.03793596]


3000it [02:32, 19.69it/s]
0it [00:00, ?it/s]

Epoch : 17, Train Accuracy : 0.9817833333333333, Test Accuracy : 0.9713
Free Information ratio: [0.03596206]
Nudged Information ratio: [0.03595957]


3000it [03:06, 16.12it/s]
0it [00:00, ?it/s]

Epoch : 18, Train Accuracy : 0.9816, Test Accuracy : 0.9703
Free Information ratio: [0.03212943]
Nudged Information ratio: [0.03212662]


3000it [02:43, 18.33it/s]
0it [00:00, ?it/s]

Epoch : 19, Train Accuracy : 0.9811666666666666, Test Accuracy : 0.9701
Free Information ratio: [0.02837979]
Nudged Information ratio: [0.02837744]


3000it [02:55, 17.08it/s]
0it [00:00, ?it/s]

Epoch : 20, Train Accuracy : 0.9810166666666666, Test Accuracy : 0.9711
Free Information ratio: [0.02959819]
Nudged Information ratio: [0.02959573]


3000it [02:54, 17.22it/s]
0it [00:00, ?it/s]

Epoch : 21, Train Accuracy : 0.9802666666666666, Test Accuracy : 0.9698
Free Information ratio: [0.02998589]
Nudged Information ratio: [0.02998344]


3000it [02:44, 18.22it/s]
0it [00:00, ?it/s]

Epoch : 22, Train Accuracy : 0.9807333333333333, Test Accuracy : 0.9707
Free Information ratio: [0.02492137]
Nudged Information ratio: [0.02491908]


3000it [03:04, 16.24it/s]
0it [00:00, ?it/s]

Epoch : 23, Train Accuracy : 0.9802, Test Accuracy : 0.97
Free Information ratio: [0.02311184]
Nudged Information ratio: [0.02310976]


3000it [02:34, 19.45it/s]
0it [00:00, ?it/s]

Epoch : 24, Train Accuracy : 0.9799, Test Accuracy : 0.9699
Free Information ratio: [0.02121543]
Nudged Information ratio: [0.02121329]


3000it [03:08, 15.90it/s]
0it [00:00, ?it/s]

Epoch : 25, Train Accuracy : 0.9797666666666667, Test Accuracy : 0.9686
Free Information ratio: [0.0212251]
Nudged Information ratio: [0.021223]


3000it [02:24, 20.71it/s]
0it [00:00, ?it/s]

Epoch : 26, Train Accuracy : 0.97945, Test Accuracy : 0.9694
Free Information ratio: [0.01636603]
Nudged Information ratio: [0.01636404]


3000it [03:07, 16.00it/s]
0it [00:00, ?it/s]

Epoch : 27, Train Accuracy : 0.9796, Test Accuracy : 0.9697
Free Information ratio: [0.01507299]
Nudged Information ratio: [0.01507103]


3000it [02:25, 20.67it/s]
0it [00:00, ?it/s]

Epoch : 28, Train Accuracy : 0.97895, Test Accuracy : 0.9696
Free Information ratio: [0.01747392]
Nudged Information ratio: [0.01747176]


3000it [03:08, 15.89it/s]
0it [00:00, ?it/s]

Epoch : 29, Train Accuracy : 0.9786833333333333, Test Accuracy : 0.9686
Free Information ratio: [0.01603772]
Nudged Information ratio: [0.01603576]


3000it [02:25, 20.65it/s]


Epoch : 30, Train Accuracy : 0.9786666666666667, Test Accuracy : 0.9695
Free Information ratio: [0.01928197]
Nudged Information ratio: [0.01927981]


In [8]:
model.save_model_weights(pickle_name = "CorInfoTrial")

In [9]:
model2 = ContrastiveCorInfoMaxHopfieldSparse(architecture = architecture, lambda_ = lambda_, 
                                            epsilon = epsilon, activation = activation)


In [10]:
model2.load_model_weights("CorInfoTrial")

In [11]:
tst_acc = evaluateContrastiveCorInfoMaxHopfieldSparse(  model, test_loader, hopfield_g, neural_lr_start, 
                                                        neural_lr_stop, STlambda_lr, neural_lr_rule, 
                                                        neural_lr_decay_multiplier, 
                                                        neural_dynamic_iterations_free, 
                                                        device, printing = True)

Test accuracy :	 0.9695


In [12]:
def topk_accuracy(output, target, topk=(1,)):
    """
    Computes the accuracy over the k top predictions for the specified values of k
    Modified from: https://github.com/EPFL-LCN/pub-illing2021-neurips/blob/b66061eddaec9d9f41213c3640d3f0961d13cc7b/vision/CLAPPVision/utils/utils.py
    output shape = (number of classes, batch size)
    target size = (batch size)
    """
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 0, True, True)
        
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        correct_k_list = []
        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            correct_k_list.append(correct_k.item())
            res.append(correct_k.mul(1.0 / batch_size).item())
        return np.array(res), np.array(correct_k_list)

In [13]:
topk = (1,2)
np.zeros(len(topk))

array([0., 0.])

In [14]:
def evaluateContrastiveCorInfoMaxHopfieldSparse_topk(model, loader, hopfield_g, neural_lr_start, neural_lr_stop, STlambda_lr,
                                                     neural_lr_rule, neural_lr_decay_multiplier,
                                                     T, device, topk = (1,), printing = True):
    # Evaluate the Contrastive CorInfoMax Hopfield model on a dataloader with T steps for the dynamics for the classification task
    correct = np.zeros(len(topk))
    phase = 'Train' if loader.dataset.train else 'Test'
    
    for x, y in loader:
        x = x.view(x.size(0),-1).to(device).T
        y = y.to(device)
        
        neurons = model.init_neurons(x.size(1), device = model.device)
        
        # dynamics for T time steps
        neurons, _, _ = model.run_neural_dynamics_hopfield(x, 0, neurons, hopfield_g, neural_lr_start, neural_lr_stop, STlambda_lr, neural_lr_rule, neural_lr_decay_multiplier, T, beta = 0) 
        
        # pred = torch.argmax(neurons[-1], dim=0).squeeze()  # in this case prediction is done directly on the last (output) layer of neurons
        correct += topk_accuracy(neurons[-1], y, topk)[1]

    acc = correct/len(loader.dataset) 
    if printing:
        print(phase+' accuracy :\t', acc)   
    return acc

In [15]:
tst_acc = evaluateContrastiveCorInfoMaxHopfieldSparse_topk( model2, test_loader, hopfield_g, neural_lr_start, 
                                                            neural_lr_stop, STlambda_lr, neural_lr_rule, 
                                                            neural_lr_decay_multiplier, 
                                                            neural_dynamic_iterations_free, 
                                                            device, topk = (1,2), printing = True)

Test accuracy :	 [0.9695 0.9871]


In [16]:
def topk_accuracy(output, target, topk=(1,)):
    """
    Computes the accuracy over the k top predictions for the specified values of k
    Modified from: https://github.com/EPFL-LCN/pub-illing2021-neurips/blob/b66061eddaec9d9f41213c3640d3f0961d13cc7b/vision/CLAPPVision/utils/utils.py
    output shape = (number of classes, batch size)
    target size = (batch size)
    """
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 0, True, True)
        
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul(1.0 / batch_size).item())
        return np.array(res), correct_k.item()

In [17]:
x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)
x = x.view(x.size(0),-1).T
y_one_hot = F.one_hot(y, 10).to(device).T

neurons = model.init_neurons(x.size(1), device = model.device)
T = neural_dynamic_iterations_free
neurons, _, _ = model.run_neural_dynamics_hopfield(x, 0, neurons, hopfield_g, 
                                                   neural_lr_start, neural_lr_stop, 
                                                   STlambda_lr, neural_lr_rule, 
                                                   neural_lr_decay_multiplier, 
                                                   T, beta = 0) 
topk_accuracy_, correct = topk_accuracy(neurons[-1], y, (1,2)) 
topk_accuracy_, correct

(array([1., 1.]), 20.0)

In [18]:
y.shape

torch.Size([20])

In [19]:
correct.sum(0).sum() / 20

AttributeError: 'float' object has no attribute 'sum'

In [None]:
torch.argmax(neurons[-1], dim=0).squeeze()

In [None]:
topk = (1,2)
target = y
output = neurons[-1]

maxk = max(topk)
batch_size = target.size(0)

_, pred = output.topk(maxk, 0, True, True)
# pred = pred.t()

correct = pred.eq(target.view(1, -1).expand_as(pred))

res = []
for k in topk:
    correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
    res.append(correct_k.mul_(100.0 / batch_size).item())
res

In [None]:
pred.shape, output.shape, correct.shape

In [None]:
Wff_save = []
for idx in range(len(model.Wff)):
    weight, bias = torch2numpy(model.Wff[idx]['weight']), torch2numpy(model.Wff[idx]['bias'])
    Wff_save.append({'weight': weight, 'bias': bias})
    
Wfb_save = []
for idx in range(len(model.Wfb)):
    weight = torch2numpy(model.Wfb[idx]['weight'])
    Wfb_save.append({'weight': weight})
    
B_save = []
for idx in range(len(model.B)):
    weight = torch2numpy(model.B[idx]['weight'])
    B_save.append({'weight': weight})
    
model_params = pd.DataFrame(columns = ['Wff', 'Wfb', 'B'])

model_params['Wff'] = Wff_save
model_params['Wfb'] = Wfb_save
model_params['B'] = B_save

model_params.to_pickle("model_save_trial" + ".pkl")

In [None]:
model_params_load

In [None]:
model_params_load = pd.read_pickle("model_save_trial" + ".pkl")

model2 = ContrastiveCorInfoMaxHopfieldSparse(architecture = architecture, lambda_ = lambda_, 
                                            epsilon = epsilon, activation = activation)

for idx in range(len(model2.Wff)):
    model2.Wff[idx]['weight'] = torch.tensor(model_params_load['Wff'].iloc[idx]['weight'], requires_grad = False).to(model2.device)
    model2.Wff[idx]['bias'] = torch.tensor(model_params_load['Wff'].iloc[idx]['bias'], requires_grad = False).to(model2.device)
    
for idx in range(len(model2.Wfb)):
    model2.Wfb[idx]['weight'] = torch.tensor(model_params_load['Wfb'].iloc[idx]['weight'], requires_grad = False).to(model2.device)
       
for idx in range(len(model2.B)):
    model2.B[idx]['weight'] = torch.tensor(model_params_load['B'].iloc[idx]['weight'], requires_grad = False).to(model2.device)
    
    

In [None]:
tst_acc = evaluateContrastiveCorInfoMaxHopfieldSparse(  model, test_loader, hopfield_g, neural_lr_start, 
                                                        neural_lr_stop, STlambda_lr, neural_lr_rule, 
                                                        neural_lr_decay_multiplier, 
                                                        neural_dynamic_iterations_free, 
                                                        device, printing = True)

In [None]:
tst_acc = evaluateContrastiveCorInfoMaxHopfieldSparse(  model2, test_loader, hopfield_g, neural_lr_start, 
                                                        neural_lr_stop, STlambda_lr, neural_lr_rule, 
                                                        neural_lr_decay_multiplier, 
                                                        neural_dynamic_iterations_free, 
                                                        device, printing = True)

In [None]:
model_params = pd.DataFrame(columns = ['Wff', 'Wfb', 'B', 'epsilon', 'lambda_'])
model_params['Wff'] = model.Wff
model_params['Wfb'] = model.Wfb
model_params['B'] = model.B
model_params['epsilon'] = model.epsilon
model_params['lambda_'] = model.lambda_

model_params

In [None]:
plot_convergence_plot(trn_acc_list, xlabel = 'Number of Epochs', ylabel = 'Accuracy %',
                      title = 'Contrastive CorInfoMax Train Accuracy w.r.t. Epochs', 
                      figsize = (12,8), fontsize = 25, linewidth = 3)

In [None]:
plot_convergence_plot(tst_acc_list, xlabel = 'Number of Epochs', ylabel = 'Accuracy %',
                      title = 'Contrastive CorInfoMax Test Accuracy w.r.t. Epochs', 
                      figsize = (12,8), fontsize = 25, linewidth = 3)

In [None]:
# from IPython.display import Math, display
# ########### LATEX Style Display Matrix ###############
# def display_matrix(array):
#     """Display given numpy array with Latex format in Jupyter Notebook.
#     Args:
#         array (numpy array): Array to be displayed
#     """
#     data = ""
#     for line in array:
#         if len(line) == 1:
#             data += " %.3f &" % line + r" \\\n"
#             continue
#         for element in line:
#             data += " %.3f &" % element
#         data += r" \\" + "\n"
#     display(Math("\\begin{bmatrix} \n%s\\end{bmatrix}" % data))

In [None]:
# display_matrix(model.B[0]['weight'][:10,:10])

In [None]:
# display_matrix(torch.linalg.inv(model.Rh1)[:10,:10])

In [None]:
# torch.norm(model.B[0]['weight'] - torch.linalg.inv(model.Rh1))

In [None]:
# display_matrix(torch.linalg.inv(model.Rh2)[:10,:10])