In [1]:
import sys
sys.path.append("../src")
import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F

import glob
import os
from datetime import datetime
import time
import math
from tqdm import tqdm

from itertools import repeat
from torch.nn.parameter import Parameter
import collections
import matplotlib
from torch_utils import *
from modelsnew import *
from visualization import *
# matplotlib.use('Agg')

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.0,), std=(1.0,))])

mnist_dset_train = torchvision.datasets.MNIST('./data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('./data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [4]:
activation = hard_sigmoid
# activation = F.relu
criterion = torch.nn.MSELoss(reduction='none').to(device)

In [5]:
architecture = [784, 500, 10]

x,y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y_one_hot = F.one_hot(y, 10).to(device).T

beta = 1
lambda_h = 0.999
lambda_y = 0.999
epsilon = 0.5#0.5
psiv=0.01 #0.9
one_over_epsilon = 1 / epsilon
lr_start = {'ff' : 0.01, 'fb': 0.01, 'lat': 1e-3}
neural_lr = 0.25
model = TwoLayerCorInfoMax(architecture = architecture, lambda_h = lambda_h, lambda_y = lambda_y, psiv=psiv,
                           epsilon = epsilon, activation = activation)

In [None]:
trn_acc_list = []
tst_acc_list = []
neural_dynamic_iterations_free = 20
neural_dynamic_iterations_nudged = 50

n_epochs = 50

for epoch_ in range(n_epochs):
    lr = {'ff' : lr_start['ff'] * (0.99)**epoch_, 'fb' : lr_start['fb'] * (0.99)**epoch_}
    for idx, (x, y) in tqdm(enumerate(train_loader)):
        x, y = x.to(device), y.to(device)
        x = x.view(x.size(0),-1).T
        y_one_hot = F.one_hot(y, 10).to(device).T

        h, y_hat = model.batch_step(  x, y_one_hot, lr, neural_lr, neural_dynamic_iterations_free, 
                                      neural_dynamic_iterations_nudged, beta)

    trn_acc = evaluateCorInfoMax(model, train_loader, neural_lr, neural_dynamic_iterations_free, device = 'cuda', printing = False)
    tst_acc = evaluateCorInfoMax(model, test_loader, neural_lr, neural_dynamic_iterations_free, device = 'cuda', printing = False)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))
    

3000it [01:01, 49.05it/s]
5it [00:00, 46.97it/s]

Epoch : 1, Train Accuracy : 0.77905, Test Accuracy : 0.7869


3000it [01:01, 48.95it/s]
5it [00:00, 47.01it/s]

Epoch : 2, Train Accuracy : 0.7919333333333334, Test Accuracy : 0.8036


3000it [01:01, 48.76it/s]
5it [00:00, 47.13it/s]

Epoch : 3, Train Accuracy : 0.7969666666666667, Test Accuracy : 0.8033


3000it [01:01, 49.02it/s]
5it [00:00, 48.67it/s]

Epoch : 4, Train Accuracy : 0.7871333333333334, Test Accuracy : 0.7949


3000it [01:01, 48.97it/s]
5it [00:00, 47.42it/s]

Epoch : 5, Train Accuracy : 0.8075333333333333, Test Accuracy : 0.8134


3000it [01:01, 48.40it/s]
5it [00:00, 48.94it/s]

Epoch : 6, Train Accuracy : 0.81175, Test Accuracy : 0.8237


3000it [01:01, 48.57it/s]
5it [00:00, 47.35it/s]

Epoch : 7, Train Accuracy : 0.8039666666666667, Test Accuracy : 0.8121


3000it [01:00, 49.23it/s]
5it [00:00, 47.50it/s]

Epoch : 8, Train Accuracy : 0.8115666666666667, Test Accuracy : 0.8207


3000it [01:01, 48.62it/s]
5it [00:00, 48.85it/s]

Epoch : 9, Train Accuracy : 0.8057166666666666, Test Accuracy : 0.8172


3000it [01:01, 48.54it/s]
5it [00:00, 49.02it/s]

Epoch : 10, Train Accuracy : 0.81795, Test Accuracy : 0.8227


3000it [01:01, 49.03it/s]
5it [00:00, 47.45it/s]

Epoch : 11, Train Accuracy : 0.8090833333333334, Test Accuracy : 0.8152


3000it [01:01, 48.98it/s]
5it [00:00, 48.22it/s]

Epoch : 12, Train Accuracy : 0.8039833333333334, Test Accuracy : 0.8091


3000it [01:01, 48.63it/s]
5it [00:00, 48.39it/s]

Epoch : 13, Train Accuracy : 0.7882, Test Accuracy : 0.7945


3000it [01:01, 48.50it/s]
5it [00:00, 48.77it/s]

Epoch : 14, Train Accuracy : 0.7971833333333334, Test Accuracy : 0.8053


3000it [01:02, 48.07it/s]
5it [00:00, 47.50it/s]

Epoch : 15, Train Accuracy : 0.79715, Test Accuracy : 0.8046


3000it [01:01, 49.12it/s]
5it [00:00, 47.42it/s]

Epoch : 16, Train Accuracy : 0.8075, Test Accuracy : 0.8171


3000it [01:01, 48.81it/s]
5it [00:00, 48.84it/s]

Epoch : 17, Train Accuracy : 0.80595, Test Accuracy : 0.812


3000it [01:01, 48.82it/s]
5it [00:00, 48.94it/s]

Epoch : 18, Train Accuracy : 0.7995333333333333, Test Accuracy : 0.8069


2948it [01:00, 49.16it/s]

In [None]:
torch.trace(model.B[0]['weight'])/architecture[1]

In [None]:
plt.plot(trn_acc_list)
plt.plot(tst_acc_list)
plt.grid()
plt.show()