In [1]:
import sys
import os
sys.path.append("../src")

import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F
import argparse
import matplotlib
# matplotlib.use('Agg')
from tqdm import tqdm
import glob
from PIL import Image
import os
from datetime import datetime
import time
import math
import sys
sys.path.append("../src")
from models import *
from torch_utils import *
from visualization import *

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.0,), std=(1.0,))])

mnist_dset_train = torchvision.datasets.MNIST('./data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('./data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [5]:
activation = hard_sigmoid
criterion = torch.nn.MSELoss(reduction='none').to(device)

architecture = [784, 500, 10]

x,y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y_one_hot = F.one_hot(y, 10).to(device).T

lambda_h = 0.99
lambda_y = 0.99
epsilon = 0.1
one_over_epsilon = 1 / epsilon
lr = {'ff' : 0.05, 'fb': 0.05, 'lat': 1e-3}
neural_lr = 0.05
beta = 1
model = TwoLayerCorInfoMax(architecture = architecture, lambda_h = lambda_h, lambda_y = lambda_y, 
                           epsilon = epsilon, activation = activation)

In [10]:
neural_dynamic_iterations_free = 20
neural_dynamic_iterations_nudged = 4

x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)
x = x.view(x.size(0),-1).T
y_one_hot = F.one_hot(y, 10).to(device).T

y_label = y_one_hot

h, y_hat = model.init_neurons(x.size(1), device = model.device)

h, y_hat = model.run_neural_dynamics(x, h, y_hat, y_label, neural_lr, 
                                    neural_dynamic_iterations_free, 0, )
neurons1 = [h, y_hat].copy()

h, y_hat = model.run_neural_dynamics(x, h, y_hat, y_label, neural_lr, 
                                    neural_dynamic_iterations_nudged, beta,)
neurons2 = [h, y_hat].copy()

In [17]:
model.B[0]['weight'].shape

torch.Size([500, 500])

In [13]:
neurons2[0].shape

torch.Size([500, 20])

In [18]:
model.B[0]['weight'] @ neurons2[0][:,0]

tensor([ 0.1383, -0.0150,  0.0197,  0.1102, -0.0116,  0.2284,  0.2792,  0.1199,
         0.0851,  0.2255, -0.0687,  0.0250,  0.1007,  0.0431, -0.0054,  0.1592,
         0.0494, -0.0812,  0.0137, -0.1206,  0.2386,  0.0522,  0.0618, -0.0302,
         0.0584,  0.4233,  0.0796, -0.1759, -0.0589,  0.2406,  0.2869,  0.2264,
        -0.1019, -0.0474, -0.0646,  0.0281,  0.1068,  0.1232,  0.0773, -0.0943,
         0.0597,  0.0030,  0.2092,  0.0696, -0.0195, -0.0565,  0.0768, -0.0114,
         0.1649, -0.3336,  0.1061,  0.0980, -0.1135,  0.0232,  0.2619, -0.0636,
        -0.0420,  0.1504,  0.2714, -0.0675,  0.1464,  0.0843,  0.0759, -0.0979,
         0.0373,  0.0684,  0.1076, -0.0538, -0.0018,  0.1458, -0.0058, -0.0072,
         0.0375,  0.1893,  0.2215, -0.1074, -0.0835, -0.0535, -0.0070, -0.0326,
         0.0850,  0.2370,  0.1216, -0.1104,  0.2762,  0.0196,  0.2197,  0.0133,
         0.3140,  0.2002,  0.2148,  0.0320,  0.6513,  0.0805,  0.1257,  0.2593,
        -0.0165,  0.1099, -0.0959,  0.12

In [None]:
trn_acc_list = []
tst_acc_list = []
neural_dynamic_iterations_free = 20
neural_dynamic_iterations_nudged = 4
# lambda_h = 0.01
# lambda_y = 0.01
# epsilon = 1
# one_over_epsilon = 1 / epsilon
n_epochs = 50
# lr = {'ff' : 1e-3, 'fb': 1e-3, 'lat': 1e-3}
# neural_lr = 0.25

for epoch_ in range(n_epochs):
    for idx, (x, y) in tqdm(enumerate(train_loader)):
        x, y = x.to(device), y.to(device)
        x = x.view(x.size(0),-1).T
        y_one_hot = F.one_hot(y, 10).to(device).T

        h, y_hat = model.batch_step(  x, y_one_hot, lr, neural_lr, neural_dynamic_iterations_free, 
                                      neural_dynamic_iterations_nudged, beta, output_sparsity = True)

    trn_acc = evaluateCorInfoMax(model, train_loader, neural_lr, 20, device = 'cuda', printing = False)
    tst_acc = evaluateCorInfoMax(model, test_loader, neural_lr, 20, device = 'cuda', printing = False)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    lr = {'ff' : 0.1, 'fb': 0.1, 'lat': 1e-2}
    
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))

In [8]:
next(iter(train_loader))

[tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         ...,
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ..

In [22]:
x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)
x = x.view(x.size(0),-1) # flattening the input
neurons = model.init_neurons(x.size(0), device)
layers = [x] + neurons

In [16]:
model.W[idx].weight.data.shape

torch.Size([500, 784])

In [18]:
layers[idx + 1].shape

torch.Size([20, 500])

In [21]:
layers[idx].shape

torch.Size([20, 1, 28, 28])

In [24]:
model.W[idx](layers[idx]).shape

torch.Size([20, 500])

In [28]:
(model.W[idx](layers[idx]) - layers[idx + 1]).shape

torch.Size([20, 500])

In [29]:
idx = 0
torch.norm(model.W[idx](layers[idx]) - layers[idx + 1], dim = 1)

tensor([ 9.9167,  9.3459,  6.2711,  7.1550,  8.3149,  6.3540,  7.4546,  6.7009,
         7.8377,  6.9372,  7.8099,  5.9722,  7.2506,  7.0621, 10.4032,  8.6917,
         6.2146,  6.4138,  7.8474, 11.1637], device='cuda:0',
       grad_fn=<CopyBackwards>)

In [35]:
torch.matrix_rank(model.M[0].weight.data)

  torch.matrix_rank(model.M[0].weight.data)


tensor(495, device='cuda:0')

In [59]:
aa = torch.randn(20, 500, requires_grad = True)
aa.shape

torch.Size([20, 500])

In [64]:
(aa[..., None] * aa[:, None]).mean(0)

tensor([[ 1.1250, -0.1152,  0.0715,  ..., -0.1319, -0.3065,  0.1999],
        [-0.1152,  1.0950, -0.1908,  ..., -0.3904, -0.2277, -0.1993],
        [ 0.0715, -0.1908,  0.6777,  ...,  0.0735,  0.3597,  0.0956],
        ...,
        [-0.1319, -0.3904,  0.0735,  ...,  1.3210,  0.1744, -0.1298],
        [-0.3065, -0.2277,  0.3597,  ...,  0.1744,  1.4838, -0.0350],
        [ 0.1999, -0.1993,  0.0956,  ..., -0.1298, -0.0350,  1.1093]],
       grad_fn=<MeanBackward1>)

In [49]:
torch.outer(layers[1], layers[1])

RuntimeError: outer: Expected 1-D argument self, but got 2-D