In [1]:
import sys
sys.path.append("../src")
import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F

import glob
import os
from datetime import datetime
import time
import math
from tqdm import tqdm

from itertools import repeat
from torch.nn.parameter import Parameter
import collections
import matplotlib
from torch_utils import *
from models import *
from visualization import *
# matplotlib.use('Agg')

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.0,), std=(1.0,))])

mnist_dset_train = torchvision.datasets.MNIST('./data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('./data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [4]:
activation = hard_sigmoid
architecture = [784, 500, 10]

x,y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y_one_hot = F.one_hot(y, 10).to(device).T

beta = 1
lambda_ = 0.999
epsilon = 0.1#0.5
one_over_epsilon = 1 / epsilon
lr_start = {'ff' : 0.01, 'fb': 0.001, 'lat': 1e-3}
neural_lr_start = 0.1
neural_lr_stop = 1e-3
neural_lr_rule = "divide_by_slow_loop_index"
neural_lr_decay_multiplier = 0.1
neural_dynamic_iterations = 10
model = CorInfoMax(architecture = architecture, lambda_ = lambda_,
                           epsilon = epsilon, activation = activation)

In [5]:
neurons = model.init_neurons(20)
neurons

[tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0'),
 tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [6]:
grads = model.calculate_neural_dynamics_grad(x, y_one_hot, neurons, 0)
grads

[tensor([[ 0.7000,  0.0278, -3.0337,  ..., -1.0185, -2.3567, -1.6107],
         [ 7.6684,  7.1523,  2.2729,  ...,  8.1805,  5.5208,  4.8485],
         [ 2.3818,  0.9725, -1.2124,  ..., -1.9394,  0.8576, -3.4499],
         ...,
         [-1.6879, -1.8486, -2.2555,  ...,  0.1105, -1.8685,  3.8641],
         [-1.9641, -2.2766, -0.4007,  ..., -1.8700,  0.7408, -5.1555],
         [ 1.7551,  1.4266,  1.9611,  ...,  3.6627,  1.6223,  2.4620]],
        device='cuda:0'),
 tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [7]:
neurons = model.init_neurons(20)
neurons[-1] = y_one_hot.to(torch.float)
neurons = model.run_neural_dynamics(x, y_one_hot, neurons, neural_lr_start, neural_lr_stop, neural_lr_rule,
                                    neural_lr_decay_multiplier, neural_dynamic_iterations, beta)
torch.argmax(neurons[-1], dim=0).squeeze()

tensor([8, 6, 6, 8, 9, 8, 8, 1, 8, 0, 6, 9, 8, 8, 0, 0, 8, 8, 6, 1],
       device='cuda:0')

In [8]:
model.fast_forward(x)

[tensor([[ 0.0700,  0.0028, -0.3034,  ..., -0.1018, -0.2357, -0.1611],
         [ 0.7668,  0.7152,  0.2273,  ...,  0.8180,  0.5521,  0.4848],
         [ 0.2382,  0.0973, -0.1212,  ..., -0.1939,  0.0858, -0.3450],
         ...,
         [-0.1688, -0.1849, -0.2256,  ...,  0.0111, -0.1869,  0.3864],
         [-0.1964, -0.2277, -0.0401,  ..., -0.1870,  0.0741, -0.5155],
         [ 0.1755,  0.1427,  0.1961,  ...,  0.3663,  0.1622,  0.2462]],
        device='cuda:0'),
 tensor([[-0.1105,  0.5995, -0.5097, -0.4320,  0.3181, -0.0328, -0.1241,  0.2450,
           0.0405,  0.1832,  0.6010,  0.1143,  0.0193, -0.5576,  0.2244, -0.1121,
           0.1382,  0.0604, -0.4643, -0.1235],
         [ 0.7400, -0.5330, -0.9301, -0.8341, -0.6046,  0.1549, -0.2467, -0.0705,
          -0.2651, -0.2879, -0.9589, -0.1188, -0.3752, -0.1275,  0.5654, -0.4458,
          -0.0269,  0.4367, -1.0961, -0.3917],
         [-0.0339, -0.9653, -0.2359, -0.0989, -0.6299, -0.4312, -0.3142,  0.1167,
          -0.1097, -0.4945, -

In [9]:
neurons[-1].shape, y_one_hot.shape

(torch.Size([10, 20]), torch.Size([10, 20]))

In [10]:
neural_lr_rule

'divide_by_slow_loop_index'

In [19]:
activation = hard_sigmoid
architecture = [784, 500, 10]

x,y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y_one_hot = F.one_hot(y, 10).to(device).T

beta = 1
lambda_ = 0.999
epsilon = 0.5#0.5
one_over_epsilon = 1 / epsilon
lr_start = {'ff' : 0.01, 'fb': 0.001, 'lat': 1e-3}
neural_lr_start = 0.1
neural_lr_stop = 1e-3
# neural_lr_rule = "divide_by_slow_loop_index"
neural_lr_rule = "constant"
neural_lr_decay_multiplier = 0.1
neural_dynamic_iterations = 50
model = CorInfoMax(architecture = architecture, lambda_ = lambda_,
                           epsilon = epsilon, activation = activation)

neurons = model.batch_step(x, y_one_hot, lr_start, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                           neural_lr_decay_multiplier, 5, 1, "train")
torch.argmax(neurons[-1], dim=0).squeeze(), y

(tensor([0, 9, 3, 6, 8, 6, 4, 7, 0, 5, 1, 5, 3, 2, 8, 1, 7, 9, 0, 0],
        device='cuda:0'),
 tensor([0, 9, 3, 6, 8, 6, 4, 7, 0, 5, 1, 5, 3, 2, 8, 1, 7, 9, 0, 0]))

In [12]:
neurons = model.fast_forward(x)
torch.argmax(neurons[-1], dim=0).squeeze(), y

(tensor([1, 0, 6, 2, 9, 8, 8, 8, 5, 9, 0, 9, 8, 8, 1, 5, 5, 1, 2, 6],
        device='cuda:0'),
 tensor([4, 3, 1, 2, 2, 4, 9, 5, 0, 0, 2, 9, 9, 2, 6, 0, 4, 8, 7, 1]))