In [1]:
import sys
sys.path.append("../src")
import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F

import glob
import os
from datetime import datetime
import time
import math
from tqdm import tqdm

from itertools import repeat
from torch.nn.parameter import Parameter
import collections
import matplotlib
from torch_utils import *
from models import *
from visualization import *
# matplotlib.use('Agg')

In [2]:
def evaluateCorInfoMaxV3(model, loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                         neural_lr_decay_multiplier,
                         neural_dynamic_iterations, device, printing = True):
    # Evaluate the model on a dataloader with T steps for the dynamics
    #model.eval()
    correct=0
    phase = 'Train' if loader.dataset.train else 'Test'
    
    for x, y in loader:
#         x = x.view(x.size(0),-1).to(device).T
#         x = activation_inverse(2*x.view(x.size(0),-1).T - 1, "sigmoid").to(device)
        x = activation_inverse(x.view(x.size(0),-1).T , "sigmoid").to(device)
        y = y.to(device)
        
        neurons = model.fast_forward(x)
        
        # dynamics for T time steps
        neurons = model.run_neural_dynamics(x, y_one_hot, neurons, neural_lr_start, neural_lr_stop, 
                                            neural_lr_rule,
                                            neural_lr_decay_multiplier, neural_dynamic_iterations, 0, "test")
        pred = torch.argmax(neurons[-1], dim=0).squeeze()  # in this case prediction is done directly on the last (output) layer of neurons
        correct += (y == pred).sum().item()

    acc = correct/len(loader.dataset) 
    if printing:
        print(phase+' accuracy :\t', acc)   
    return acc

def evaluateCorInfoMaxV4(model, loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                         neural_lr_decay_multiplier,
                         neural_dynamic_iterations, device, printing = True):
    # Evaluate the model on a dataloader with T steps for the dynamics
    #model.eval()
    correct=0
    phase = 'Train' if loader.dataset.train else 'Test'
    
    for x, y in loader:
#         x = x.view(x.size(0),-1).to(device).T
#         x = activation_inverse(2*x.view(x.size(0),-1).T - 1, "sigmoid").to(device)
        x = activation_inverse(x.view(x.size(0),-1).T, "sigmoid").to(device)
        y = y.to(device)
        
        neurons = model.fast_forward(x)
        
#         # dynamics for T time steps
#         neurons = model.run_neural_dynamics(x, y_one_hot, neurons, neural_lr_start, neural_lr_stop, 
#                                             neural_lr_rule,
#                                             neural_lr_decay_multiplier, neural_dynamic_iterations, 0, "test")
        pred = torch.argmax(neurons[-1], dim=0).squeeze()  # in this case prediction is done directly on the last (output) layer of neurons
        correct += (y == pred).sum().item()

    acc = correct/len(loader.dataset) 
    if printing:
        print(phase+' accuracy :\t', acc)   
    return acc

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [4]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.0,), std=(1.0,))])

mnist_dset_train = torchvision.datasets.MNIST('./data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('./data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [5]:
activation = hard_sigmoid
activation_type = "sigmoid"
architecture = [784, 500, 10]

x,y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y_one_hot = F.one_hot(y, 10).to(device).T

beta = 1
lambda_ = 0.999
epsilon = 0.1#0.5
one_over_epsilon = 1 / epsilon
lr_start = {'ff' : 0.01, 'fb': 0.001, 'lat': 1e-3}
neural_lr_start = 0.02
neural_lr_stop = 1e-3
neural_lr_rule = "divide_by_slow_loop_index"
neural_lr_decay_multiplier = 0.1
neural_dynamic_iterations = 50
output_sparsity = True
STlambda_lr = 0.005
model = CorInfoMaxV2( architecture = architecture, lambda_ = lambda_,
                    epsilon = epsilon, activation_type = activation_type,
                    output_sparsity = output_sparsity, STlambda_lr = STlambda_lr
                  )

In [6]:
evaluateCorInfoMaxV3(model, test_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                         neural_lr_decay_multiplier,
                         neural_dynamic_iterations, device, printing = True)

Test accuracy :	 0.1


0.1

In [7]:
trn_acc_list = []
tst_acc_list = []

n_epochs = 50

for epoch_ in range(n_epochs):
    lr = {'ff' : lr_start['ff'] * (0.99)**epoch_, 'fb' : lr_start['fb'] * (0.99)**epoch_}
    for idx, (x, y) in tqdm(enumerate(train_loader)):
        x, y = x.to(device), y.to(device)
#         x = x.view(x.size(0),-1).T
#         x = activation_inverse(2*x.view(x.size(0),-1).T - 1, "sigmoid")
        x = activation_inverse(x.view(x.size(0),-1).T, "sigmoid")
        y_one_hot = F.one_hot(y, 10).to(device).T

        _ = model.batch_step(  x, y_one_hot, lr, neural_lr_start, neural_lr_stop, neural_lr_rule,
                                    neural_lr_decay_multiplier, neural_dynamic_iterations, beta)

    trn_acc = evaluateCorInfoMaxV4(  model, train_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                     neural_lr_decay_multiplier,
                                     neural_dynamic_iterations, device, printing = False)
    tst_acc = evaluateCorInfoMaxV4(  model, test_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                     neural_lr_decay_multiplier,
                                     neural_dynamic_iterations, device, printing = False)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))

3000it [02:33, 19.51it/s]
2it [00:00, 18.40it/s]

Epoch : 1, Train Accuracy : 0.4805, Test Accuracy : 0.4842


3000it [02:33, 19.56it/s]
3it [00:00, 20.26it/s]

Epoch : 2, Train Accuracy : 0.5909166666666666, Test Accuracy : 0.5928


3000it [02:33, 19.55it/s]
2it [00:00, 17.98it/s]

Epoch : 3, Train Accuracy : 0.5993166666666667, Test Accuracy : 0.595


3000it [02:33, 19.56it/s]
2it [00:00, 18.89it/s]

Epoch : 4, Train Accuracy : 0.6197833333333334, Test Accuracy : 0.6216


3000it [02:33, 19.56it/s]
2it [00:00, 18.10it/s]

Epoch : 5, Train Accuracy : 0.6461666666666667, Test Accuracy : 0.6501


3000it [02:33, 19.53it/s]
2it [00:00, 17.40it/s]

Epoch : 6, Train Accuracy : 0.6432333333333333, Test Accuracy : 0.6429


3000it [02:33, 19.56it/s]
2it [00:00, 18.24it/s]

Epoch : 7, Train Accuracy : 0.6372333333333333, Test Accuracy : 0.6391


3000it [01:21, 36.59it/s]
4it [00:00, 35.87it/s]

Epoch : 8, Train Accuracy : 0.6000833333333333, Test Accuracy : 0.5947


3000it [01:17, 38.65it/s]
4it [00:00, 35.67it/s]

Epoch : 9, Train Accuracy : 0.62365, Test Accuracy : 0.6222


3000it [01:17, 38.61it/s]
4it [00:00, 36.39it/s]

Epoch : 10, Train Accuracy : 0.5976666666666667, Test Accuracy : 0.594


3000it [01:17, 38.79it/s]
4it [00:00, 35.79it/s]

Epoch : 11, Train Accuracy : 0.62635, Test Accuracy : 0.6271


3000it [01:18, 38.46it/s]
4it [00:00, 35.53it/s]

Epoch : 12, Train Accuracy : 0.59065, Test Accuracy : 0.5865


3000it [01:17, 38.51it/s]
4it [00:00, 36.16it/s]

Epoch : 13, Train Accuracy : 0.6321333333333333, Test Accuracy : 0.6322


3000it [01:17, 38.68it/s]
4it [00:00, 35.48it/s]

Epoch : 14, Train Accuracy : 0.6033333333333334, Test Accuracy : 0.5985


3000it [01:17, 38.59it/s]
4it [00:00, 35.82it/s]

Epoch : 15, Train Accuracy : 0.5715333333333333, Test Accuracy : 0.5651


3000it [01:18, 38.39it/s]
4it [00:00, 36.28it/s]

Epoch : 16, Train Accuracy : 0.6290166666666667, Test Accuracy : 0.6333


3000it [01:17, 38.80it/s]
4it [00:00, 35.66it/s]

Epoch : 17, Train Accuracy : 0.6065666666666667, Test Accuracy : 0.6033


3000it [01:18, 38.46it/s]
4it [00:00, 36.42it/s]

Epoch : 18, Train Accuracy : 0.6156666666666667, Test Accuracy : 0.6171


3000it [01:17, 38.60it/s]
4it [00:00, 36.15it/s]

Epoch : 19, Train Accuracy : 0.5598333333333333, Test Accuracy : 0.5478


3000it [01:20, 37.47it/s]
4it [00:00, 35.56it/s]

Epoch : 20, Train Accuracy : 0.5620166666666667, Test Accuracy : 0.5655


3000it [01:17, 38.50it/s]
4it [00:00, 36.10it/s]

Epoch : 21, Train Accuracy : 0.5372, Test Accuracy : 0.5279


3000it [01:18, 38.40it/s]
4it [00:00, 35.67it/s]

Epoch : 22, Train Accuracy : 0.5388333333333334, Test Accuracy : 0.5286


3000it [01:17, 38.79it/s]
4it [00:00, 35.23it/s]

Epoch : 23, Train Accuracy : 0.5695666666666667, Test Accuracy : 0.5691


3000it [01:17, 38.54it/s]
4it [00:00, 35.77it/s]

Epoch : 24, Train Accuracy : 0.53765, Test Accuracy : 0.5307


3000it [01:17, 38.57it/s]
4it [00:00, 36.10it/s]

Epoch : 25, Train Accuracy : 0.56235, Test Accuracy : 0.5571


3000it [01:17, 38.60it/s]
4it [00:00, 35.98it/s]

Epoch : 26, Train Accuracy : 0.5915833333333333, Test Accuracy : 0.591


3000it [01:17, 38.66it/s]
4it [00:00, 35.89it/s]

Epoch : 27, Train Accuracy : 0.5629666666666666, Test Accuracy : 0.5588


3000it [01:17, 38.61it/s]
4it [00:00, 35.35it/s]

Epoch : 28, Train Accuracy : 0.5804166666666667, Test Accuracy : 0.5816


3000it [01:17, 38.58it/s]
4it [00:00, 36.10it/s]

Epoch : 29, Train Accuracy : 0.50765, Test Accuracy : 0.5022


3000it [01:17, 38.53it/s]
4it [00:00, 35.76it/s]

Epoch : 30, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.88it/s]
4it [00:00, 35.49it/s]

Epoch : 31, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.65it/s]
4it [00:00, 36.21it/s]

Epoch : 32, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.70it/s]
4it [00:00, 35.99it/s]

Epoch : 33, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.57it/s]
4it [00:00, 36.09it/s]

Epoch : 34, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:18, 38.46it/s]
4it [00:00, 35.92it/s]

Epoch : 35, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.76it/s]
4it [00:00, 35.82it/s]

Epoch : 36, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.49it/s]
4it [00:00, 35.61it/s]

Epoch : 37, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.49it/s]
4it [00:00, 35.56it/s]

Epoch : 38, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.47it/s]
4it [00:00, 35.73it/s]

Epoch : 39, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:18, 38.37it/s]
4it [00:00, 35.57it/s]

Epoch : 40, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:18, 38.30it/s]
4it [00:00, 35.72it/s]

Epoch : 41, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:18, 38.41it/s]
4it [00:00, 35.93it/s]

Epoch : 42, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:18, 38.23it/s]
4it [00:00, 34.91it/s]

Epoch : 43, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:18, 38.38it/s]
4it [00:00, 35.44it/s]

Epoch : 44, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:18, 38.34it/s]
4it [00:00, 36.26it/s]

Epoch : 45, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.56it/s]
4it [00:00, 36.50it/s]

Epoch : 46, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:18, 38.32it/s]
4it [00:00, 35.58it/s]

Epoch : 47, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.68it/s]
2it [00:00, 17.92it/s]

Epoch : 48, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:18, 38.40it/s]
4it [00:00, 35.46it/s]

Epoch : 49, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


3000it [01:17, 38.52it/s]


Epoch : 50, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


In [8]:
model.Wff

array([{'weight': tensor([[nan, nan, nan,  ..., nan, nan, nan],
               [nan, nan, nan,  ..., nan, nan, nan],
               [nan, nan, nan,  ..., nan, nan, nan],
               ...,
               [nan, nan, nan,  ..., nan, nan, nan],
               [nan, nan, nan,  ..., nan, nan, nan],
               [nan, nan, nan,  ..., nan, nan, nan]], device='cuda:0'), 'bias': tensor([[nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               [nan],
               

In [9]:
trn_acc = evaluateCorInfoMaxV4(  model, train_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                 neural_lr_decay_multiplier,
                                 neural_dynamic_iterations, device, printing = False)
tst_acc = evaluateCorInfoMaxV4(  model, test_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                 neural_lr_decay_multiplier,
                                 neural_dynamic_iterations, device, printing = False)
trn_acc_list.append(trn_acc)
tst_acc_list.append(tst_acc)

print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))

Epoch : 50, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


In [10]:
neural_lr_start = 0.1
neural_lr_stop = 1e-3
neural_lr_rule = "divide_by_slow_loop_index"
neural_lr_decay_multiplier = 0.1
neural_dynamic_iterations = 50
output_sparsity = True
STlambda_lr = 0.01

trn_acc = evaluateCorInfoMaxV3(  model, train_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                 neural_lr_decay_multiplier,
                                 neural_dynamic_iterations, device, printing = False)
tst_acc = evaluateCorInfoMaxV3(  model, test_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                 neural_lr_decay_multiplier,
                                 neural_dynamic_iterations, device, printing = False)
trn_acc_list.append(trn_acc)
tst_acc_list.append(tst_acc)
print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))

Epoch : 50, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098


In [11]:
print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))

Epoch : 50, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098
