In [1]:
import sys
sys.path.append("../src")
import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F

import glob
import os
from datetime import datetime
import time
import math
from tqdm import tqdm

from itertools import repeat
from torch.nn.parameter import Parameter
import collections
import matplotlib
from torch_utils import *
from ExplicitModels import *
from visualization import *
# matplotlib.use('Agg')

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),])

mnist_dset_train = torchvision.datasets.MNIST('./data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('./data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [8]:
# activation = F.relu
# architecture = [784, 128, 64, 10]
# lambda_ = 0.99999
# epsilon = 0.2
# supervised_lambda_weight = 1e-3
# neural_lr_start = 0.001 
# neural_lr_stop = 0.0005 
# neural_lr_rule = "constant"
# neural_lr_decay_multiplier = 0.005
# neural_dynamic_iterations = 50
activation = F.relu
architecture = [784, 128, 64, 10]
lambda_ = 0.99999
epsilon = 0.01
supervised_lambda_weight = 1e-1
neural_lr_start = 0.001 
neural_lr_stop = 0.0 
neural_lr_rule = "constant"
neural_lr_decay_multiplier = 0.005
neural_dynamic_iterations = 15

model = CorInfoMaxBiDirectionalNudged(architecture, lambda_, epsilon, activation, use_stepLR = True, 
                                      sgd_nesterov = False, optimizer_type = "sgd", 
                                      optim_lr_ff = 0.01, optim_lr_fb = 0.01, stepLR_step_size = 10*3000,)

In [9]:
trn_acc_list = []
tst_acc_list = []
random_sign = False
n_epochs = 50

for epoch_ in range(n_epochs):
    Bcopy = torch.clone(model.B[0]["weight"])
    for idx, (x, y) in tqdm(enumerate(train_loader)):
        x, y = x.to(device), y.to(device)
        x = x.to(device).view(x.size(0),-1).T
        y_one_hot = F.one_hot(y, 10).to(device).T
        #y_one_hot = 0.94 * y_one_hot + 0.03 * torch.ones(*y_one_hot.shape, device = device)
        if random_sign:
            rnd_sgn = 2*np.random.randint(2) - 1
            supervised_lambda_weight = rnd_sgn * supervised_lambda_weight

        model.batch_step(  x, y_one_hot, supervised_lambda_weight,
                           neural_lr_start, neural_lr_stop, neural_lr_rule,
                           neural_lr_decay_multiplier, neural_dynamic_iterations,
                        )

    trn_acc = evaluatePC(model, train_loader, device, False, 
                         printing = False)
    tst_acc = evaluatePC(model, test_loader, device, False, 
                         printing = False)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))
    print("B_1 update difference : {}".format(torch.norm(model.B[0]['weight'] - Bcopy)))

3000it [01:37, 30.87it/s]
3it [00:00, 27.99it/s]

Epoch : 1, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098
B_1 update difference : nan


3000it [01:37, 30.71it/s]
3it [00:00, 28.58it/s]

Epoch : 2, Train Accuracy : 0.09871666666666666, Test Accuracy : 0.098
B_1 update difference : nan


1488it [00:48, 30.60it/s]


KeyboardInterrupt: 

In [None]:
model.Wff

In [None]:
plt.imshow(torch2numpy(model.B[0]['weight']))

In [None]:
torch2numpy(model.B[0]['weight'])

In [None]:
model.B.copy()

In [None]:
# activation = F.relu
# architecture = [784, 128, 64, 10]
# lambda_ = 0.9999
# epsilon = 0.01
# supervised_lambda_weight = 1e-3
# neural_lr_start = 0.001 
# neural_lr_stop = 0.0005 
# neural_lr_rule = "constant"
# neural_lr_decay_multiplier = 0.005
# neural_dynamic_iterations = 50

# model = CorInfoMaxBiDirectionalNudged(architecture, lambda_, epsilon, activation, use_stepLR = True, 
#                                       sgd_nesterov = False, optimizer_type = "sgd", 
#                                       optim_lr_ff = 1, optim_lr_fb = 0.1, stepLR_step_size = 10*3000,)

activation = F.relu
architecture = [784, 128, 64, 10]
lambda_ = 0.99999
epsilon = 0.1
supervised_lambda_weight = 1e-3
neural_lr_start = 0.001 
neural_lr_stop = 0.0005 
neural_lr_rule = "constant"
neural_lr_decay_multiplier = 0.005
neural_dynamic_iterations = 50

model = CorInfoMaxBiDirectionalNudged(architecture, lambda_, epsilon, activation, use_stepLR = True, 
                                      sgd_nesterov = False, optimizer_type = "sgd", 
                                      optim_lr_ff = 1, optim_lr_fb = 0.5, stepLR_step_size = 10*3000,)

x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)
x = x.to(device).view(x.size(0),-1).T
y_one_hot = F.one_hot(y, 10).to(device).T

neurons = model.fast_forward(x, no_grad = True)
# for jj in range(len(neurons)):
#     neurons[jj] = neurons[jj].requires_grad_()
    
# layers = [x] + neurons

# layers_copy = model.copy_neurons(layers)
neurons = model.fast_forward(x, no_grad = True)
neurons = model.run_neural_dynamics(x, y_one_hot, neurons, supervised_lambda_weight, 
                          neural_lr_start, neural_lr_stop, lr_rule = neural_lr_rule, 
                          lr_decay_multiplier = neural_lr_decay_multiplier, 
                          neural_dynamic_iterations = neural_dynamic_iterations)

corinfo_cost = model.CorInfo_Cost(x, y, neurons).sum()
corinfo_cost.backward()

(model.Wff[0]['weight'].grad)

In [None]:
plt.imshow(torch2numpy((model.Wff[0]['weight'].grad)))

In [None]:
mbs = x.size(1)
for jj in range(len(neurons)):
    neurons[jj] = neurons[jj].requires_grad_()
corinfo_cost = model.CorInfo_Cost(x, y, neurons)
init_grads = torch.tensor([1 for i in range(mbs)], dtype=torch.float, device=device, requires_grad=True) #Initializing gradients
grads = torch.autograd.grad(corinfo_cost, neurons, grad_outputs=init_grads, create_graph=False) # dPhi/ds

In [None]:
one_over_epsilon = model.one_over_epsilon
gam_ = model.gam_

x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)
x = x.to(device).view(x.size(0),-1).T
y_one_hot = F.one_hot(y, 10).to(device).T

Wff = model.Wff
B = model.B

neurons = model.fast_forward(x, no_grad = True)

layers = [x] + neurons
for jj in range(len(Wff)):
    if jj == 0:
        error = - one_over_epsilon * (layers[jj + 1] - (Wff[jj]['weight'] @ layers[jj] + Wff[jj]['bias'])) 
    else:
        error = - one_over_epsilon * (layers[jj + 1] - (Wff[jj]['weight'] @ model.activation(layers[jj]) + Wff[jj]['bias']))

    lateral_term = gam_ * 0.5 * (layers[jj + 1].T @ B[jj]['weight'] @ layers[jj + 1])
    corinfo_cost = torch.sum(error * error, 0)
    


In [None]:
error.shape
torch.sum(error * error, 0).shape

In [None]:
outer_prod_broadcasting((B[jj]['weight'] @ layers[jj + 1]), layers[jj + 1].T).shape

In [None]:
layers[jj + 1][:,2].T @ B[jj]['weight'] @ layers[jj + 1][:,2]

In [None]:
torch.sum((B[jj]['weight'] @ layers[jj + 1]) * layers[jj + 1], 0)

In [None]:
(B[jj]['weight'] @ layers[jj + 1]).shape, layers[jj + 1].shape

In [None]:
trn_acc_list = []
tst_acc_list = []
random_sign = False
n_epochs = 50

for epoch_ in range(n_epochs):
    for idx, (x, y) in tqdm(enumerate(train_loader)):
        x, y = x.to(device), y.to(device)
        x = x.to(device).view(x.size(0),-1).T
        y_one_hot = F.one_hot(y, 10).to(device).T
        #y_one_hot = 0.94 * y_one_hot + 0.03 * torch.ones(*y_one_hot.shape, device = device)
        if random_sign:
            rnd_sgn = 2*np.random.randint(2) - 1
            supervised_lambda_weight = rnd_sgn * supervised_lambda_weight

        model.batch_step(  x, y_one_hot, supervised_lambda_weight,
                           neural_lr_start, neural_lr_stop, neural_lr_rule,
                           neural_lr_decay_multiplier, neural_dynamic_iterations,
                        )

    trn_acc = evaluatePC(model, train_loader, device, False, 
                         printing = False)
    tst_acc = evaluatePC(model, test_loader, device, False, 
                         printing = False)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))