In [1]:
#Imports 
import torch
import ipdb #ipython debugger

In [2]:
### Activation functions

def sigma(x):
    return torch.tanh(x)

def dsigma(x):
    return 1-torch.pow(sigma(x),2)

In [3]:
### Loss

def loss(x, t):
    return torch.pow(torch.norm(x-t),2)

def dloss(x, t):
    return 2*(x-t)

### Load data

In [None]:
import  dlc_practical_prologue as prologue 

In [16]:
train_input, train_target, test_input, test_target = prologue.load_data(one_hot_labels=True,normalize=True)

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [17]:
### Initialize parameters

n_units = 50 # number of units in the hidden layer
n_classes = train_target.size(1) # Number of classes
n_train_samples = train_input.size(0) #number of instances
n_test_samples = test_input.size(0)
miu = 0 #mean
eps = 1e-6 #std
zeta = 0.9
eta = 0.1/n_train_samples

train_input = train_input*zeta
test_input = test_input*zeta

In [18]:
train_input.size()

torch.Size([1000, 784])

In [None]:
import matplotlib.pyplot as plt, numpy
plt.imshow(train_input.unsqueeze(1)[14].view(28,28))

In [19]:
### weights and biases

w1 = torch.empty((n_units,train_input.shape[1])).normal_(miu,eps)
b1 = torch.empty(n_units).normal_(miu,eps)

w2 = torch.empty(10,n_units).normal_(miu,eps)
b2 =torch.empty(10).normal_(miu,eps)

dl_dw1 = torch.empty(w1.size())
dl_db1 = torch.empty(b1.size())
dl_dw2 = torch.empty(w2.size())
dl_db2 = torch.empty(b2.size())

In [20]:
def forward_pass(w1, b1, w2, b2, x):
    x0 = x
    s1 = torch.mv(w1,x0)+ b1
    x1 = sigma(s1) 
    s2 = torch.mv(w2,x1)+ b2
    x2 = sigma(s2)
    return x0, s1, x1, s2, x2

In [21]:
def backward_pass(w1, b1, w2, b2, t, x, s1, x1, s2, x2, dl_dw1, dl_db1, dl_dw2, dl_db2):
    x0 = x
    dl_dx2 = dloss(x2, t)
    dl_ds2 = dsigma(s2) * dl_dx2
    dl_dx1 = torch.mv(w2.t(),dl_ds2)
    dl_ds1 = dsigma(s1) * dl_dx1
    
    dl_dw2.add_(torch.mm(dl_ds2.view(-1, 1),x1.view(1, -1)))
    dl_db2.add_(dl_ds2)
    dl_dw1.add_(torch.mm(dl_ds1.view(-1, 1),x0.view(1, -1)))
    dl_db1.add_(dl_ds1)

### Train the model

In [22]:
for k in range(500):

    # Back-propagation

    accumulated_loss = 0
    n_train_errors = 0
    
    # set weights to zero at every iteration
    dl_dw1.zero_()
    dl_db1.zero_()
    dl_dw2.zero_()
    dl_db2.zero_()
    
    # Loop over all training examples
    for n in range(n_train_samples):
        x0, s1, x1, s2, x2 = forward_pass(w1, b1, w2, b2, train_input[n])

        pred = x2.max(0)[1].item() # take class with maximum prob(x2 returns prob)
        if train_target[n, pred] < 0.5: n_train_errors = n_train_errors + 1
        accumulated_loss = accumulated_loss + loss(x2, train_target[n])

        backward_pass(w1, b1, w2, b2,
                      train_target[n],
                      x0, s1, x1, s2, x2,
                      dl_dw1, dl_db1, dl_dw2, dl_db2)

    # Update weights and biases(eta is the learning rate/number of training samples)

    w1 = w1 - eta * dl_dw1
    b1 = b1 - eta * dl_db1
    w2 = w2 - eta * dl_dw2
    b2 = b2 - eta * dl_db2

    # Test error

    n_test_errors = 0

    for n in range(n_test_samples):
        _, _, _, _, x2 = forward_pass(w1, b1, w2, b2, test_input[n])

        pred = x2.max(0)[1].item()
        if test_target[n, pred] < 0.5: n_test_errors = n_test_errors + 1

    print('{:d} accumalted_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'
          .format(k,
                  accumulated_loss,
                  (100 * n_train_errors) / train_input.size(0),
                  (100 * n_test_errors) / test_input.size(0)))

0 accumalted_train_loss 1000.00 acc_train_error 90.00% test_error 90.10%
1 accumalted_train_loss 963.68 acc_train_error 88.30% test_error 90.10%
2 accumalted_train_loss 940.46 acc_train_error 88.30% test_error 90.10%
3 accumalted_train_loss 925.61 acc_train_error 88.30% test_error 90.10%
4 accumalted_train_loss 916.12 acc_train_error 88.30% test_error 90.10%
5 accumalted_train_loss 910.03 acc_train_error 88.30% test_error 90.10%
6 accumalted_train_loss 906.13 acc_train_error 88.30% test_error 90.10%
7 accumalted_train_loss 903.63 acc_train_error 88.30% test_error 90.10%
8 accumalted_train_loss 902.02 acc_train_error 88.30% test_error 90.10%
9 accumalted_train_loss 900.98 acc_train_error 88.30% test_error 90.10%
10 accumalted_train_loss 900.32 acc_train_error 88.30% test_error 90.10%
11 accumalted_train_loss 899.88 acc_train_error 88.30% test_error 90.10%
12 accumalted_train_loss 899.61 acc_train_error 88.30% test_error 90.10%
13 accumalted_train_loss 899.43 acc_train_error 88.30% test_

In [None]:
((train_target.argmax(dim=1)==prediction(X2).argmax(dim=1))*1.0).mean().item()

In [None]:
((test_target.argmax(dim=1)==prediction(X2_t1).argmax(dim=1))*1.0).mean().item()