In [1]:
import sys
import os
sys.path.append("../src")

import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F
import argparse
import matplotlib
# matplotlib.use('Agg')
from tqdm import tqdm
import glob
from PIL import Image
import os
from datetime import datetime
import time
import math
import sys
sys.path.append("../src")
from models import *
from torch_utils import *
from visualization import *

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.0,), std=(1.0,))])

mnist_dset_train = torchvision.datasets.MNIST('./data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('./data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [5]:
activation = hard_sigmoid
criterion = torch.nn.MSELoss(reduction='none').to(device)

architecture = [784, 500, 10]

x,y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y_one_hot = F.one_hot(y, 10).to(device).T

lambda_h = 0.99
lambda_y = 0.99
epsilon = 0.1
one_over_epsilon = 1 / epsilon
lr = {'ff' : 0.05, 'fb': 0.05, 'lat': 1e-3}
neural_lr = 0.05
beta = 1
model = TwoLayerCorInfoMax(architecture = architecture, lambda_h = lambda_h, lambda_y = lambda_y, 
                           epsilon = epsilon, activation = activation)

In [10]:
neural_dynamic_iterations_free = 20
neural_dynamic_iterations_nudged = 4

x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)
x = x.view(x.size(0),-1).T
y_one_hot = F.one_hot(y, 10).to(device).T

y_label = y_one_hot

h, y_hat = model.init_neurons(x.size(1), device = model.device)

h, y_hat = model.run_neural_dynamics(x, h, y_hat, y_label, neural_lr, 
                                    neural_dynamic_iterations_free, 0, )
neurons1 = [h, y_hat].copy()

h, y_hat = model.run_neural_dynamics(x, h, y_hat, y_label, neural_lr, 
                                    neural_dynamic_iterations_nudged, beta,)
neurons2 = [h, y_hat].copy()

In [17]:
model.B[0]['weight'].shape

torch.Size([500, 500])

In [13]:
neurons2[0].shape

torch.Size([500, 20])

In [52]:
model.B[0]['weight'].shape

torch.Size([500, 500])

In [54]:
neurons1[0][:,0].shape

torch.Size([500])

In [50]:
neurons1[0].shape

torch.Size([500, 20])

In [62]:
torch.norm(torch.mean(model.B[0]['weight'] @ neurons1[0], 1) - z)

tensor(2.8434e-07, device='cuda:0')

torch.Size([500])

In [45]:
z = (1/20) * sum([model.B[0]['weight'] @ neurons1[0][:,jj] for jj in range(20)])
torch.outer(z, z)

tensor([[ 0.0271,  0.0223, -0.0113,  ...,  0.0309,  0.0240,  0.0027],
        [ 0.0223,  0.0183, -0.0093,  ...,  0.0254,  0.0197,  0.0022],
        [-0.0113, -0.0093,  0.0047,  ..., -0.0128, -0.0100, -0.0011],
        ...,
        [ 0.0309,  0.0254, -0.0128,  ...,  0.0352,  0.0273,  0.0031],
        [ 0.0240,  0.0197, -0.0100,  ...,  0.0273,  0.0212,  0.0024],
        [ 0.0027,  0.0022, -0.0011,  ...,  0.0031,  0.0024,  0.0003]],
       device='cuda:0')

In [48]:
model.B[0]['weight'] @ torch.mean(outer_prod_broadcasting(neurons1[0].T, neurons1[0].T), axis = 0) @ model.B[0]['weight'].T

tensor([[ 0.0343,  0.0217, -0.0134,  ...,  0.0322,  0.0228, -0.0008],
        [ 0.0217,  0.0257, -0.0102,  ...,  0.0258,  0.0229,  0.0020],
        [-0.0134, -0.0102,  0.0125,  ..., -0.0164, -0.0158, -0.0015],
        ...,
        [ 0.0322,  0.0258, -0.0164,  ...,  0.0559,  0.0314,  0.0031],
        [ 0.0228,  0.0229, -0.0158,  ...,  0.0314,  0.0380,  0.0043],
        [-0.0008,  0.0020, -0.0015,  ...,  0.0031,  0.0043,  0.0086]],
       device='cuda:0')

In [None]:
trn_acc_list = []
tst_acc_list = []
neural_dynamic_iterations_free = 20
neural_dynamic_iterations_nudged = 4
# lambda_h = 0.01
# lambda_y = 0.01
# epsilon = 1
# one_over_epsilon = 1 / epsilon
n_epochs = 50
# lr = {'ff' : 1e-3, 'fb': 1e-3, 'lat': 1e-3}
# neural_lr = 0.25

for epoch_ in range(n_epochs):
    for idx, (x, y) in tqdm(enumerate(train_loader)):
        x, y = x.to(device), y.to(device)
        x = x.view(x.size(0),-1).T
        y_one_hot = F.one_hot(y, 10).to(device).T

        h, y_hat = model.batch_step(  x, y_one_hot, lr, neural_lr, neural_dynamic_iterations_free, 
                                      neural_dynamic_iterations_nudged, beta, output_sparsity = True)

    trn_acc = evaluateCorInfoMax(model, train_loader, neural_lr, 20, device = 'cuda', printing = False)
    tst_acc = evaluateCorInfoMax(model, test_loader, neural_lr, 20, device = 'cuda', printing = False)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    lr = {'ff' : 0.1, 'fb': 0.1, 'lat': 1e-2}
    
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))

In [8]:
next(iter(train_loader))

[tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         ...,
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ..

In [22]:
x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)
x = x.view(x.size(0),-1) # flattening the input
neurons = model.init_neurons(x.size(0), device)
layers = [x] + neurons

In [16]:
model.W[idx].weight.data.shape

torch.Size([500, 784])

In [18]:
layers[idx + 1].shape

torch.Size([20, 500])

In [21]:
layers[idx].shape

torch.Size([20, 1, 28, 28])

In [24]:
model.W[idx](layers[idx]).shape

torch.Size([20, 500])

In [28]:
(model.W[idx](layers[idx]) - layers[idx + 1]).shape

torch.Size([20, 500])

In [29]:
idx = 0
torch.norm(model.W[idx](layers[idx]) - layers[idx + 1], dim = 1)

tensor([ 9.9167,  9.3459,  6.2711,  7.1550,  8.3149,  6.3540,  7.4546,  6.7009,
         7.8377,  6.9372,  7.8099,  5.9722,  7.2506,  7.0621, 10.4032,  8.6917,
         6.2146,  6.4138,  7.8474, 11.1637], device='cuda:0',
       grad_fn=<CopyBackwards>)

In [35]:
torch.matrix_rank(model.M[0].weight.data)

  torch.matrix_rank(model.M[0].weight.data)


tensor(495, device='cuda:0')

In [59]:
aa = torch.randn(20, 500, requires_grad = True)
aa.shape

torch.Size([20, 500])

In [64]:
(aa[..., None] * aa[:, None]).mean(0)

tensor([[ 1.1250, -0.1152,  0.0715,  ..., -0.1319, -0.3065,  0.1999],
        [-0.1152,  1.0950, -0.1908,  ..., -0.3904, -0.2277, -0.1993],
        [ 0.0715, -0.1908,  0.6777,  ...,  0.0735,  0.3597,  0.0956],
        ...,
        [-0.1319, -0.3904,  0.0735,  ...,  1.3210,  0.1744, -0.1298],
        [-0.3065, -0.2277,  0.3597,  ...,  0.1744,  1.4838, -0.0350],
        [ 0.1999, -0.1993,  0.0956,  ..., -0.1298, -0.0350,  1.1093]],
       grad_fn=<MeanBackward1>)

In [49]:
torch.outer(layers[1], layers[1])

RuntimeError: outer: Expected 1-D argument self, but got 2-D