In [1]:
import sys
sys.path.append("../src")
import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F

import glob
import os
from datetime import datetime
import time
import math
from tqdm import tqdm

from itertools import repeat
from torch.nn.parameter import Parameter
import collections
import matplotlib
from torch_utils import *
from models import *
from visualization import *
# matplotlib.use('Agg')

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.0,), std=(1.0,))])

mnist_dset_train = torchvision.datasets.MNIST('./data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('./data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [4]:
activation = hard_sigmoid
architecture = [784, 500, 10]

x,y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y_one_hot = F.one_hot(y, 10).to(device).T

beta = 1
lambda_ = 0.999
epsilon = 0.1#0.5
one_over_epsilon = 1 / epsilon
lr_start = {'ff' : 0.01, 'fb': 0.001, 'lat': 1e-3}
neural_lr_start = 0.1
neural_lr_stop = 1e-3
neural_lr_rule = "divide_by_slow_loop_index"
neural_lr_decay_multiplier = 0.1
model = CorInfoMax(architecture = architecture, lambda_ = lambda_,
                           epsilon = epsilon, activation = activation)

In [5]:
neurons = model.init_neurons(20)
neurons

[tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0'),
 tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [6]:
grads = model.calculate_neural_dynamics_grad(x, y_one_hot, neurons, 0)
grads

[tensor([[-2.4885,  0.4672, -0.0859,  ..., -3.8411, -0.9001, -0.9166],
         [-4.4560,  4.8609, -1.3593,  ..., -0.2791, -5.1989,  1.1242],
         [-3.5220, -0.0646, -3.4889,  ...,  0.7695, -5.6820, -2.0572],
         ...,
         [-1.3956, -0.6573, -1.7025,  ..., -1.2321,  3.5394, -1.5662],
         [ 2.5555, -2.5297, -5.6591,  ...,  2.3994,  0.2526, -3.9241],
         [ 8.9963, -0.5278,  1.8709,  ..., -1.6381,  3.8061,  4.1860]],
        device='cuda:0'),
 tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [7]:
neurons = model.init_neurons(20)
neurons[-1] = y_one_hot.to(torch.float)
neurons = model.run_neural_dynamics(x, y_one_hot, neurons, neural_lr_start, neural_lr_stop, neural_lr_rule,
                                    neural_lr_decay_multiplier, neural_dynamic_iterations, beta)
torch.argmax(neurons[-1], dim=0).squeeze()

tensor([0, 1, 6, 9, 5, 4, 0, 5, 6, 2, 4, 8, 9, 2, 7, 4, 8, 3, 5, 6],
       device='cuda:0')

In [8]:
model.fast_forward(x)

[tensor([[-0.2489,  0.0467, -0.0086,  ..., -0.3841, -0.0900, -0.0917],
         [-0.4456,  0.4861, -0.1359,  ..., -0.0279, -0.5199,  0.1124],
         [-0.3522, -0.0065, -0.3489,  ...,  0.0769, -0.5682, -0.2057],
         ...,
         [-0.1396, -0.0657, -0.1703,  ..., -0.1232,  0.3539, -0.1566],
         [ 0.2555, -0.2530, -0.5659,  ...,  0.2399,  0.0253, -0.3924],
         [ 0.8996, -0.0528,  0.1871,  ..., -0.1638,  0.3806,  0.4186]],
        device='cuda:0'),
 tensor([[ 0.2031, -0.2748,  0.9648, -0.0395, -0.0969,  0.4200,  0.1278,  0.1575,
           0.7930,  0.0178,  0.0760, -0.4702,  0.1120,  0.5362,  0.2541,  0.3529,
          -0.0212,  0.0138, -0.0928,  0.7035],
         [-0.0627, -0.2970, -0.3474, -0.2350, -0.4529, -0.2391,  0.1234,  0.0575,
          -0.2118, -0.6514,  0.0062,  0.2661, -0.1857, -0.5614,  0.0467, -0.0579,
           0.0090, -0.5980, -0.0037,  0.0798],
         [-0.0884, -0.2127, -0.4401,  0.4362, -0.0316,  0.2202, -0.6029, -0.4336,
          -0.6775,  0.1473,  

In [10]:
neurons[-1].shape, y_one_hot.shape

(torch.Size([10, 20]), torch.Size([10, 20]))

In [11]:
neurons = model.batch_step(x, y_one_hot, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                           neural_lr_decay_multiplier, 5, 1)
torch.argmax(neurons[-1], dim=0).squeeze(), y

(tensor([0, 1, 6, 9, 5, 4, 0, 5, 6, 2, 4, 8, 9, 2, 7, 4, 8, 3, 5, 6],
        device='cuda:0'),
 tensor([0, 1, 6, 9, 5, 4, 0, 5, 6, 2, 4, 8, 9, 2, 7, 4, 8, 3, 5, 6]))

In [13]:
neurons = model.fast_forward(x)
torch.argmax(neurons[-1], dim=0).squeeze(), y

(tensor([9, 5, 0, 5, 9, 7, 9, 5, 0, 7, 9, 1, 5, 0, 9, 9, 5, 5, 6, 9],
        device='cuda:0'),
 tensor([0, 1, 6, 9, 5, 4, 0, 5, 6, 2, 4, 8, 9, 2, 7, 4, 8, 3, 5, 6]))