In [1]:
import sys
sys.path.append("../src")
import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F

import glob
import os
from datetime import datetime
import time
import math
from tqdm import tqdm

from itertools import repeat
from torch.nn.parameter import Parameter
import collections
import matplotlib
from torch_utils import *
from PC import *
from visualization import *
# matplotlib.use('Agg')

In [2]:
def evaluatePC(model, loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                         neural_lr_decay_multiplier,
                         neural_dynamic_iterations, device, printing = True):
    # Evaluate the model on a dataloader with T steps for the dynamics
    #model.eval()
    correct=0
    phase = 'Train' if loader.dataset.train else 'Test'
    
    for x, y in loader:
        x = activation_inverse(2*x.view(x.size(0),-1).to(device).T - 1, "sigmoid")
#         x = activation_inverse(x.view(x.size(0),-1).T, "sigmoid").to(device)
#         x = x.view(x.size(0),-1).T.to(device)
        y = y.to(device)
        
        neurons = model.fast_forward(x)
        
#         # dynamics for T time steps
#         neurons = model.run_neural_dynamics(x, y_one_hot, neurons, neural_lr_start, neural_lr_stop, 
#                                             neural_lr_rule,
#                                             neural_lr_decay_multiplier, neural_dynamic_iterations, 0, "test")
        pred = torch.argmax(neurons[-1], dim=0).squeeze()  # in this case prediction is done directly on the last (output) layer of neurons
        correct += (y == pred).sum().item()

    acc = correct/len(loader.dataset) 
    if printing:
        print(phase+' accuracy :\t', acc)   
    return acc

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [4]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.0,), std=(1.0,))])

mnist_dset_train = torchvision.datasets.MNIST('./data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('./data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [5]:
activation_type = "sigmoid"
architecture = [784, 500, 500, 10]

x,y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y_one_hot = F.one_hot(y, 10).to(device).T

neural_lr_start = 0.1
neural_lr_stop = 0.05
neural_lr_rule = "divide_by_slow_loop_index"
neural_lr_decay_multiplier = 0.01
neural_dynamic_iterations = 50

lr_start = {'ff' : 0.0025}

model = SupervisedPredictiveCoding(architecture, activation_type)

In [6]:
x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)
x = activation_inverse(x.view(x.size(0),-1).T, "sigmoid")
x.shape

torch.Size([784, 20])

In [7]:
trn_acc_list = []
tst_acc_list = []

n_epochs = 50
lr = lr_start
for epoch_ in range(n_epochs):
    lr = {'ff' : lr_start['ff'] * (0.9)**epoch_}
    for idx, (x, y) in tqdm(enumerate(train_loader)):
#         x = x.view(x.size(0),-1).T#.to(device)
        x, y = x.to(device), y.to(device)
        x = activation_inverse(2*x.view(x.size(0),-1).T - 1, "sigmoid")
#         x = activation_inverse(x.view(x.size(0),-1).T, "sigmoid")
        y_one_hot = F.one_hot(y, 10).to(device).T
        y_one_hot = 0.94 * y_one_hot + 0.03 * torch.ones(*y_one_hot.shape, device = device)
        _ = model.batch_step(  x, y_one_hot, lr, neural_lr_start, neural_lr_stop, neural_lr_rule,
                               neural_lr_decay_multiplier, neural_dynamic_iterations,
                               optimizer = "adam")

    trn_acc = evaluatePC(  model, train_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                           neural_lr_decay_multiplier,
                           neural_dynamic_iterations, device, printing = False)
    tst_acc = evaluatePC(  model, test_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                           neural_lr_decay_multiplier,
                           neural_dynamic_iterations, device, printing = False)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))

3000it [01:38, 30.39it/s]
3it [00:00, 27.91it/s]

Epoch : 1, Train Accuracy : 0.9039, Test Accuracy : 0.9025


3000it [01:38, 30.54it/s]
3it [00:00, 28.16it/s]

Epoch : 2, Train Accuracy : 0.9318666666666666, Test Accuracy : 0.93


3000it [01:38, 30.48it/s]
3it [00:00, 28.03it/s]

Epoch : 3, Train Accuracy : 0.9371, Test Accuracy : 0.935


3000it [01:38, 30.48it/s]
3it [00:00, 27.77it/s]

Epoch : 4, Train Accuracy : 0.9382833333333334, Test Accuracy : 0.9323


3000it [01:38, 30.53it/s]
3it [00:00, 28.16it/s]

Epoch : 5, Train Accuracy : 0.9513833333333334, Test Accuracy : 0.9483


3000it [01:38, 30.34it/s]
3it [00:00, 28.11it/s]

Epoch : 6, Train Accuracy : 0.95385, Test Accuracy : 0.9487


3000it [01:38, 30.37it/s]
3it [00:00, 27.89it/s]

Epoch : 7, Train Accuracy : 0.9593833333333334, Test Accuracy : 0.9551


3000it [01:38, 30.50it/s]
3it [00:00, 27.88it/s]

Epoch : 8, Train Accuracy : 0.9608833333333333, Test Accuracy : 0.9527


3000it [01:38, 30.34it/s]
3it [00:00, 27.95it/s]

Epoch : 9, Train Accuracy : 0.9624333333333334, Test Accuracy : 0.9553


3000it [01:39, 30.20it/s]
3it [00:00, 28.91it/s]

Epoch : 10, Train Accuracy : 0.9635, Test Accuracy : 0.9573


3000it [01:38, 30.61it/s]
3it [00:00, 28.17it/s]

Epoch : 11, Train Accuracy : 0.9637, Test Accuracy : 0.9588


3000it [01:37, 30.63it/s]
3it [00:00, 28.22it/s]

Epoch : 12, Train Accuracy : 0.96535, Test Accuracy : 0.9588


3000it [01:37, 30.65it/s]
3it [00:00, 28.07it/s]

Epoch : 13, Train Accuracy : 0.9624166666666667, Test Accuracy : 0.9541


3000it [01:37, 30.65it/s]
3it [00:00, 28.28it/s]

Epoch : 14, Train Accuracy : 0.9655666666666667, Test Accuracy : 0.9565


3000it [01:37, 30.80it/s]
3it [00:00, 27.87it/s]

Epoch : 15, Train Accuracy : 0.9669833333333333, Test Accuracy : 0.9579


3000it [01:38, 30.53it/s]
3it [00:00, 27.98it/s]

Epoch : 16, Train Accuracy : 0.9692333333333333, Test Accuracy : 0.963


3000it [01:38, 30.56it/s]
3it [00:00, 28.61it/s]

Epoch : 17, Train Accuracy : 0.96865, Test Accuracy : 0.9614


3000it [01:37, 30.67it/s]
3it [00:00, 28.13it/s]

Epoch : 18, Train Accuracy : 0.9647166666666667, Test Accuracy : 0.9584


3000it [01:38, 30.57it/s]
3it [00:00, 28.28it/s]

Epoch : 19, Train Accuracy : 0.97225, Test Accuracy : 0.9656


3000it [01:37, 30.64it/s]
3it [00:00, 28.31it/s]

Epoch : 20, Train Accuracy : 0.9722833333333334, Test Accuracy : 0.9655


3000it [01:38, 30.61it/s]
3it [00:00, 28.65it/s]

Epoch : 21, Train Accuracy : 0.9732833333333333, Test Accuracy : 0.9659


3000it [01:38, 30.59it/s]
3it [00:00, 28.58it/s]

Epoch : 22, Train Accuracy : 0.9718333333333333, Test Accuracy : 0.9628


3000it [01:38, 30.54it/s]
3it [00:00, 28.49it/s]

Epoch : 23, Train Accuracy : 0.9753833333333334, Test Accuracy : 0.9682


3000it [01:38, 30.56it/s]
3it [00:00, 28.21it/s]

Epoch : 24, Train Accuracy : 0.9713666666666667, Test Accuracy : 0.9621


3000it [01:37, 30.64it/s]
3it [00:00, 27.92it/s]

Epoch : 25, Train Accuracy : 0.9761666666666666, Test Accuracy : 0.9681


3000it [01:38, 30.52it/s]
3it [00:00, 28.70it/s]

Epoch : 26, Train Accuracy : 0.9771, Test Accuracy : 0.97


3000it [01:37, 30.64it/s]
3it [00:00, 28.23it/s]

Epoch : 27, Train Accuracy : 0.9763833333333334, Test Accuracy : 0.9685


3000it [01:37, 30.71it/s]
3it [00:00, 28.15it/s]

Epoch : 28, Train Accuracy : 0.9767, Test Accuracy : 0.9691


3000it [01:38, 30.49it/s]
3it [00:00, 27.95it/s]

Epoch : 29, Train Accuracy : 0.9764, Test Accuracy : 0.9682


3000it [01:38, 30.32it/s]
3it [00:00, 27.88it/s]

Epoch : 30, Train Accuracy : 0.9763166666666667, Test Accuracy : 0.9682


3000it [01:38, 30.38it/s]
3it [00:00, 28.64it/s]

Epoch : 31, Train Accuracy : 0.9774166666666667, Test Accuracy : 0.97


3000it [01:38, 30.58it/s]
3it [00:00, 28.17it/s]

Epoch : 32, Train Accuracy : 0.97715, Test Accuracy : 0.9695


3000it [01:38, 30.60it/s]
3it [00:00, 28.77it/s]

Epoch : 33, Train Accuracy : 0.9779666666666667, Test Accuracy : 0.9688


3000it [01:38, 30.56it/s]
3it [00:00, 28.55it/s]

Epoch : 34, Train Accuracy : 0.9784166666666667, Test Accuracy : 0.97


3000it [01:38, 30.55it/s]
3it [00:00, 28.22it/s]

Epoch : 35, Train Accuracy : 0.97825, Test Accuracy : 0.9704


3000it [01:38, 30.51it/s]
3it [00:00, 28.83it/s]

Epoch : 36, Train Accuracy : 0.9794666666666667, Test Accuracy : 0.971


3000it [01:38, 30.61it/s]
3it [00:00, 28.04it/s]

Epoch : 37, Train Accuracy : 0.9785333333333334, Test Accuracy : 0.9697


3000it [01:38, 30.55it/s]
3it [00:00, 28.14it/s]

Epoch : 38, Train Accuracy : 0.9785333333333334, Test Accuracy : 0.9702


3000it [01:38, 30.56it/s]
3it [00:00, 28.08it/s]

Epoch : 39, Train Accuracy : 0.97845, Test Accuracy : 0.9702


3000it [01:37, 30.65it/s]
3it [00:00, 27.94it/s]

Epoch : 40, Train Accuracy : 0.9790166666666666, Test Accuracy : 0.9693


3000it [01:38, 30.55it/s]
3it [00:00, 28.26it/s]

Epoch : 41, Train Accuracy : 0.9789166666666667, Test Accuracy : 0.9704


3000it [01:38, 30.57it/s]
3it [00:00, 28.40it/s]

Epoch : 42, Train Accuracy : 0.9788333333333333, Test Accuracy : 0.9703


3000it [01:38, 30.53it/s]
3it [00:00, 28.71it/s]

Epoch : 43, Train Accuracy : 0.9789166666666667, Test Accuracy : 0.9709


3000it [01:38, 30.52it/s]
3it [00:00, 28.11it/s]

Epoch : 44, Train Accuracy : 0.9791833333333333, Test Accuracy : 0.9709


3000it [01:37, 30.85it/s]
3it [00:00, 28.97it/s]

Epoch : 45, Train Accuracy : 0.9789166666666667, Test Accuracy : 0.9694


3000it [01:37, 30.89it/s]
3it [00:00, 27.86it/s]

Epoch : 46, Train Accuracy : 0.9789, Test Accuracy : 0.9705


3000it [01:37, 30.72it/s]
3it [00:00, 28.60it/s]

Epoch : 47, Train Accuracy : 0.9789, Test Accuracy : 0.9706


3000it [01:37, 30.69it/s]
3it [00:00, 27.76it/s]

Epoch : 48, Train Accuracy : 0.9791, Test Accuracy : 0.9706


3000it [01:37, 30.68it/s]
3it [00:00, 27.96it/s]

Epoch : 49, Train Accuracy : 0.9792333333333333, Test Accuracy : 0.9707


3000it [01:39, 30.01it/s]


Epoch : 50, Train Accuracy : 0.9793666666666667, Test Accuracy : 0.9705


In [8]:
x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)
x = activation_inverse(x.view(x.size(0),-1).T, "sigmoid")
y_one_hot = F.one_hot(y, 10).to(device).T
y_one_hot = 0.94 * y_one_hot + 0.03 * torch.ones(*y_one_hot.shape, device = device)

neurons = model.fast_forward(x)
mode = "train"
if mode == "train":
    neurons[-1] = y_one_hot.to(torch.float)
    
neurons = model.run_neural_dynamics( x, y, neurons, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                            neural_lr_decay_multiplier, neural_dynamic_iterations)

layers = [x] + neurons  # concatenate the input to other layers
layers_after_activation = [list(model.activation_func(layers[jj], model.activation_type)) for jj in range(len(layers))]
error_layers = [(layers[jj+1] - (model.Wff[jj]['weight'] @ layers_after_activation[jj][0] + model.Wff[jj]['bias'])) / model.variances[jj + 1] for jj in range(len(layers) - 1)]


In [9]:
error_layers[2].shape, layers_after_activation[2][0].shape

(torch.Size([10, 20]), torch.Size([500, 20]))

In [10]:
(1/20) * (error_layers[jj] @ layers_after_activation[0][0].T)

NameError: name 'jj' is not defined

In [None]:
jj = 0
torch.mean(outer_prod_broadcasting(error_layers[jj].T, layers_after_activation[jj][0].T), axis = 0)

In [None]:
error_layers[0]

In [None]:
model.Wff[0]["weight"].shape

In [None]:
model.Wff[0]['weight'].shape

In [None]:
model.activation_func(x, model.activation_type)[0].shape

In [None]:
neurons = model.fast_forward(activation_func(x, "sigmoid")[0])
neurons[-1] = y_one_hot.to(torch.float)
neurons = model.run_neural_dynamics(x, y, neurons, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                          neural_lr_decay_multiplier, neural_dynamic_iterations)

In [None]:
tst_acc = evaluatePC(  model, test_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                 neural_lr_decay_multiplier,
                                 neural_dynamic_iterations, device, printing = True)

In [None]:
Wff = model.Wff
Wff[0]

In [None]:
model.batch_step(x, y_one_hot, lr_start, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                          neural_lr_decay_multiplier, neural_dynamic_iterations)

In [None]:
model.Wff

In [None]:
evalua

In [None]:
neurons = model.fast_forward(activation_func(x, "sigmoid")[0])

In [None]:
layers_after_activation, error_layers, grads = model.calculate_neural_dynamics_grad(x, y, neurons)

In [None]:
grads

In [None]:
len(layers_after_activation), len(error_layers), len(grads)

In [None]:
layers_after_activation[jj + 1][1].shape

In [None]:
jj = 0
error_layers[jj + 1].shape, model.Wff[jj + 1]['weight'].shape

In [None]:
len(layers_after_activation)

In [None]:
jj = 0
Wff[jj]['weight'] @ layers_after_activation[jj][0]

In [None]:
variances

In [None]:
Wff = model.Wff
variances = model.variances
layers = [x] + neurons

[(layers[jj+1] - (Wff[jj]['weight'] @ layers_after_activation[jj][0] + Wff[jj]['bias'])) / variances[jj + 1] for jj in range(len(layers) - 1)]

In [None]:
def activation_func(x, type_ = "linear"):
    if type_ == "linear":
        f_x = x
        fp_x = torch.ones(*x.shape, device = x.device)
    elif type_ == "tanh":
        f_x = torch.tanh(x)
        fp_x = torch.ones(*x.shape, device = x.device) - f_x ** 2
    elif type_ == "sigmoid":
        ones_vec = torch.ones(*x.shape, device = x.device)
        f_x = 1 / (ones_vec + torch.exp(-x))
        fp_x = f_x * (ones_vec - f_x)
    elif type_ == "relu":
        f_x = torch.maximum(x, torch.tensor([0], device = x.device))
        fp_x = 1 * (x > 0)
    elif type_ == "exp":
        f_x = torch.exp(x)
        fp_x = f_x
    else: # Use linear
        f_x = x
        fp_x = torch.ones(*x.shape, device = x.device)
        
    return f_x, fp_x

In [None]:
x = torch.randn(3,1, device = "cuda")
x

In [None]:
x = torch.randn(3,1, device = "cuda")
print(x)
activation_func(x, type_ = "sigmoid")

In [None]:
device = "cuda"

In [None]:
(2 * torch.rand(3, 3, requires_grad = False).to(device) - 1) * (4 * np.sqrt(6 / (3 + 3)))