#### Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
sys.path.append("..")

In [2]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [3]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

In [4]:
from tqdm import tqdm

In [5]:
from bptt_tgeb_mnist_architecture import *

#### Test for CUDA

In [6]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('No GPU, training on CPU')
    dev = torch.device('cpu')
else:
    print('GPU found, training on GPU')
    dev = torch.device('cuda')

No GPU, training on CPU


#### Load MNIST

In [7]:
## Make sure batch_size = 1 for now!!

def load_mnist(batch_size=1, shuffle_train=True):
    transform = torchvision.transforms.Compose(
        [torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.5,), (0.5,))])
    train_set = torchvision.datasets.MNIST("../data", train=True, download=True, transform=transform)
    test_set = torchvision.datasets.MNIST("../data", train=False, download=True, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle_train)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

In [8]:
mnist_train_loader, mnist_test_loader = load_mnist()

#### Architectural Initialisations

In [9]:
n_classes = 10

In [10]:
input_dim = 784
hidden_dim = 100
output_dim = 1

In [11]:
## Gating vector
tvec_hh = torch.zeros(n_classes,hidden_dim)
for ii in range(n_classes):
    t_half = torch.randint(0, 2, (1, hidden_dim//2)).float()*2 - 1
    tvec_hh[ii,::2] = t_half
    tvec_hh[ii,1::2] = -t_half

In [12]:
# ## Gating vector
# tvec_ih = torch.zeros(n_classes,hidden_dim)
# for ii in range(n_classes):
#     t_half = torch.randint(0, 2, (1, hidden_dim//2)).float()*2 - 1
#     tvec_ih[ii,::2] = t_half
#     tvec_ih[ii,1::2] = -t_half

In [13]:
tvec_ih = tvec_hh

#### Architecture

In [14]:
# cell = RNNModule(input_dim, hidden_dim, output_dim, tvec_ih, tvec_hh)
# rnn = RNN(cell)

#### Loss

In [15]:
criterion_ce = nn.CrossEntropyLoss()

#### Training loop

In [16]:
# params = [rnn.cell.Wih]+[rnn.cell.Whh]+[rnn.cell.Woh]

In [17]:
# optimizer = optim.SGD(params, lr=5e-3)

In [18]:
epochs = 10

In [19]:
nRuns = 3

In [20]:
train_losses = np.zeros((nRuns,epochs))
train_acc = np.zeros((nRuns,epochs))

acc_classes = np.zeros(n_classes)

In [21]:
for run in range(nRuns):
    
    cell = RNNModule(input_dim, hidden_dim, output_dim, tvec_ih, tvec_hh)
    rnn = RNN(cell)
    
    params = [rnn.cell.Whh]+[rnn.cell.Woh]
    
    optimizer = optim.SGD(params, lr=5e-3)
    
    for e in range(epochs):
        
        running_loss = 0
        running_acc = 0

        for image, label in tqdm(mnist_test_loader):

            ## Clear older gradients
            optimizer.zero_grad()

            ## Change to appropriate shapes!!
            image = torch.squeeze(image).view(1,-1)
            image = image.repeat(n_classes,1)

            xs = image
            hp = torch.zeros(cell.hid_dim) ## very first hidden state is the zero vector
            ts = torch.LongTensor(label)

            if train_on_gpu:
                xs, hp, ts = xs.cuda(), hp.cuda(), ts.cuda()

            ## Forward pass
            ys, hs = rnn.forward(xs, hp)
            loss = criterion_ce(ys.float().view(1, -1),ts)

            ## Compute gradients w/ Backprop (autograd)
            loss.backward()

            ## update weights
            optimizer.step()

            ## update loss
            running_loss += loss.item()

            ##check if sample is correctly classified
            pred_class = torch.argmax(ys)
            true_class = ts
            if (pred_class-true_class) == 0:
                running_acc +=1
            acc_classes[int(true_class[0])] += 1

        
        train_losses[run,e] = running_loss/len(mnist_test_loader)
        train_acc[run,e] = running_acc/len(mnist_test_loader)
        print(f"Training loss: {running_loss/len(mnist_test_loader)}")
        print(f"Training acc: {running_acc/len(mnist_test_loader)}")

100%|██████████| 10000/10000 [02:13<00:00, 74.96it/s]


Training loss: 2.1324407143235207
Training acc: 0.2704


100%|██████████| 10000/10000 [01:13<00:00, 136.49it/s]


Training loss: 1.8075749909758567
Training acc: 0.6097


100%|██████████| 10000/10000 [01:20<00:00, 124.78it/s]


Training loss: 1.6905937846302985
Training acc: 0.7225


100%|██████████| 10000/10000 [01:23<00:00, 120.28it/s]


Training loss: 1.6522288741350173
Training acc: 0.7738


100%|██████████| 10000/10000 [01:39<00:00, 100.42it/s]


Training loss: 1.6312007909059525
Training acc: 0.8051


100%|██████████| 10000/10000 [01:42<00:00, 97.87it/s]


Training loss: 1.6231135908603669
Training acc: 0.8051


100%|██████████| 10000/10000 [01:40<00:00, 99.32it/s]


Training loss: 1.6188625543951989
Training acc: 0.7772


100%|██████████| 10000/10000 [01:57<00:00, 85.33it/s]


Training loss: 1.6254006400585175
Training acc: 0.7652


100%|██████████| 10000/10000 [01:52<00:00, 88.68it/s]


Training loss: 1.6385094878792763
Training acc: 0.7539


100%|██████████| 10000/10000 [01:45<00:00, 94.83it/s]


Training loss: 1.6325308441758155
Training acc: 0.7055


100%|██████████| 10000/10000 [01:44<00:00, 96.02it/s]


Training loss: 1.71136925085783
Training acc: 0.6368


100%|██████████| 10000/10000 [01:48<00:00, 92.04it/s]


Training loss: 1.6442047278404235
Training acc: 0.7243


100%|██████████| 10000/10000 [01:47<00:00, 93.44it/s]


Training loss: 1.6597280303001405
Training acc: 0.7088


100%|██████████| 10000/10000 [01:39<00:00, 100.20it/s]


Training loss: 1.6295541038751602
Training acc: 0.7381


100%|██████████| 10000/10000 [01:31<00:00, 108.79it/s]

Training loss: 1.6226059031367301
Training acc: 0.7471





In [23]:
np.save('train-losses-sub-weights-same-hh-ih-bptt',train_losses)
np.save('train-accs-sub-weights-same-hh-ih-bptt',train_acc)