In [1]:
import sys
sys.path.append("../src")
import torch
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch.nn.functional as F

import glob
import os
from datetime import datetime
import time
import math
from tqdm import tqdm

from itertools import repeat
from torch.nn.parameter import Parameter
import collections
import matplotlib
from torch_utils import *
from ContrastiveModels import *
from visualization import *
# matplotlib.use('Agg')

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), 
                                            torchvision.transforms.Normalize(mean=(0.0,), std=(1.0,))])

mnist_dset_train = torchvision.datasets.MNIST('./data', train=True, transform=transform, target_transform=None, download=True)
train_loader = torch.utils.data.DataLoader(mnist_dset_train, batch_size=20, shuffle=True, num_workers=0)

mnist_dset_test = torchvision.datasets.MNIST('./data', train=False, transform=transform, target_transform=None, download=True)
test_loader = torch.utils.data.DataLoader(mnist_dset_test, batch_size=20, shuffle=False, num_workers=0)

In [4]:
activation = hard_sigmoid
architecture = [784, 500, 10]

x,y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y_one_hot = F.one_hot(y, 10).to(device).T

beta = 1
lambda_ = 0.9999
epsilon = 0.15
one_over_epsilon = 1 / epsilon
lr_start = {'ff' : np.array([0.0475, 0.035]), 'fb': np.array([0.025, 0.01])}

neural_lr_start = 0.05
neural_lr_stop = 0.001
neural_lr_rule = "constant"
neural_lr_decay_multiplier = 0.01
neural_dynamic_iterations_nudged = 5
neural_dynamic_iterations_free = 20

model = ContrastiveCorInfoMax_wCWU(architecture = architecture, lambda_ = lambda_, 
                              epsilon = epsilon, activation = activation)

In [5]:
_ = evaluateContrastiveCorInfoMax(model, train_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                              neural_lr_decay_multiplier, neural_dynamic_iterations_free, device)

Train accuracy :	 0.0706


In [None]:
trn_acc_list = []
tst_acc_list = []

n_epochs = 50

for epoch_ in range(n_epochs):
    lr = {'ff' : lr_start['ff'] * (0.99)**epoch_, 'fb' : lr_start['fb'] * (0.99)**epoch_}
    for idx, (x, y) in tqdm(enumerate(train_loader)):
        x, y = x.to(device), y.to(device)
        x = x.view(x.size(0),-1).T
        y_one_hot = F.one_hot(y, 10).to(device).T
        if True:
            rnd_sgn = 2*np.random.randint(2) - 1
            beta = rnd_sgn*beta
            
        neurons = model.batch_step(  x, y_one_hot, lr, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                     neural_lr_decay_multiplier, neural_dynamic_iterations_free,
                                     neural_dynamic_iterations_nudged, beta)
    
    trn_acc = evaluateContrastiveCorInfoMax(model, train_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                            neural_lr_decay_multiplier, neural_dynamic_iterations_free, device, printing = False)
    tst_acc = evaluateContrastiveCorInfoMax(model, test_loader, neural_lr_start, neural_lr_stop, neural_lr_rule, 
                                            neural_lr_decay_multiplier, neural_dynamic_iterations_free, device, printing = False)
    trn_acc_list.append(trn_acc)
    tst_acc_list.append(tst_acc)
    
    print("Epoch : {}, Train Accuracy : {}, Test Accuracy : {}".format(epoch_+1, trn_acc, tst_acc))

3000it [01:22, 36.18it/s]
4it [00:00, 34.14it/s]

Epoch : 1, Train Accuracy : 0.7397166666666667, Test Accuracy : 0.7449


3000it [01:22, 36.32it/s]
4it [00:00, 33.69it/s]

Epoch : 2, Train Accuracy : 0.7479833333333333, Test Accuracy : 0.7531


3000it [01:22, 36.32it/s]
4it [00:00, 33.42it/s]

Epoch : 3, Train Accuracy : 0.17886666666666667, Test Accuracy : 0.1827


3000it [01:22, 36.50it/s]
4it [00:00, 34.59it/s]

Epoch : 4, Train Accuracy : 0.09895, Test Accuracy : 0.098


3000it [01:21, 36.65it/s]
4it [00:00, 34.23it/s]

Epoch : 5, Train Accuracy : 0.09886666666666667, Test Accuracy : 0.098


3000it [01:21, 36.68it/s]
4it [00:00, 33.97it/s]

Epoch : 6, Train Accuracy : 0.09881666666666666, Test Accuracy : 0.098


3000it [01:21, 36.61it/s]
4it [00:00, 33.97it/s]

Epoch : 7, Train Accuracy : 0.09878333333333333, Test Accuracy : 0.098


3000it [01:22, 36.55it/s]
4it [00:00, 34.71it/s]

Epoch : 8, Train Accuracy : 0.09881666666666666, Test Accuracy : 0.098


3000it [01:21, 36.82it/s]
4it [00:00, 33.25it/s]

Epoch : 9, Train Accuracy : 0.09881666666666666, Test Accuracy : 0.098


361it [00:09, 37.14it/s]

In [None]:
plot_convergence_plot(trn_acc_list, xlabel = 'Number of Epochs', ylabel = 'Accuracy %',
                      title = 'Contrastive CorInfoMax Train Accuracy w.r.t. Epochs', 
                      figsize = (12,8), fontsize = 25, linewidth = 3)

In [None]:
plot_convergence_plot(tst_acc_list, xlabel = 'Number of Epochs', ylabel = 'Accuracy %',
                      title = 'Contrastive CorInfoMax Test Accuracy w.r.t. Epochs', 
                      figsize = (12,8), fontsize = 25, linewidth = 3)

In [None]:
def copy_neurons(neurons):
    copy = []
    for n in neurons:
        copy.append(torch.empty_like(n).copy_(n.data).requires_grad_())
    return copy

In [None]:
x, y = next(iter(train_loader))
x = x.view(x.size(0),-1).to(device).T
y = y.to(device)

In [None]:
# x, y = next(iter(train_loader))
# x = x.view(x.size(0),-1).to(device).T
# y = y.to(device)
T = neural_dynamic_iterations_free
neurons = model.init_neurons(x.size(1), device = model.device)
neurons = model.run_neural_dynamics(x, 0, neurons, neural_lr_start, neural_lr_stop, neural_lr_rule, neural_lr_decay_multiplier, T, beta = 0) 

# pred = torch.argmax(neurons[-1], dim=0).squeeze()  # in this case prediction is done directly on the last (output) layer of neurons
# pred, y

Wff = model.Wff
Wfb = model.Wfb
layers = [x] + neurons
layers_copy = copy_neurons(layers)

for jj in range(len(Wff)):
    if jj != len(Wff) - 1:
        neurons[jj] = 0.5*(Wff[jj]['weight'] @ layers[jj] + Wff[jj]['bias'] + Wfb[jj+1]['weight'] @ layers[jj+2] + Wfb[jj+1]['bias']) 
    else:
        neurons[jj] = Wff[jj]['weight'] @ layers[jj] + Wff[jj]['bias']
layers = [x] + neurons
layers_copy = copy_neurons(layers)

for jj in range(len(Wff)):
    if jj == 0:
        error = (layers[jj + 1] - (Wff[jj]['weight'] @ layers_copy[jj] + Wff[jj]['bias']))
    else:
        error = (layers[jj + 1] - (Wff[jj]['weight'] @ layers_copy[jj] + Wff[jj]['bias']))
- torch.sum(error * error, 0)