In [1]:
import torch

from torchvision import datasets, transforms as transform

import argparse
import os
from dlc_practical_prologue import *
import dlc_practical_prologue as prologue


Load Data

In [3]:
from torchvision.datasets import MNIST


train_data = MNIST(root = './', train=True, download=True,)
test_data = MNIST(root = './', train=False, download=True)

train_input = train_data.data.view(-1, 1, 28, 28).float()
train_target = train_data.targets 
test_input = test_data.data.view(-1, 1, 28, 28).float()
test_target = test_data.targets 

train_input = train_input.clone().reshape(train_input.size(0), -1)
test_input = test_input.clone().reshape(test_input.size(0), -1)

train_input = train_input.narrow(0, 0, 1000)
train_target = train_target.narrow(0, 0, 1000)
test_input = test_input.narrow(0, 0, 1000)
test_target = test_target.narrow(0, 0, 1000)

train_target = 0.9*convert_to_one_hot_labels(train_input, train_target)
test_target = 0.9*convert_to_one_hot_labels(test_input, test_target)

mu, std = train_input.mean(), train_input.std()
train_input = train_input.sub_(mu).div_(std)
test_input = test_input.sub_(mu).div_(std)

In [4]:
train_input.shape, train_target.shape, test_input.shape, test_target.shape

(torch.Size([1000, 784]),
 torch.Size([1000, 10]),
 torch.Size([1000, 784]),
 torch.Size([1000, 10]))

In [52]:
class Backpropagation:
    def __init__(self,learning_rate=0.1,  eps=0.000001,iterations=1000):

        self.learning_rate = learning_rate
        self.eps = eps
        self.iterations = iterations
        
    def sigma(self, x):
        xx = torch.tanh(x)
        return xx
    
    def dsigma(self, x):
        dx = 1 - (self.sigma(x))**2
        return dx
    
    def loss(self, v,t):

        diff = torch.pow(v-t,2)
        loss = torch.sum(diff)
        return loss
    
    def dloss(self, v, t):
        dloss = 2*(v-t)
        return dloss
        
    def forward_pass(self, x, w1, w2, b1, b2):

        x0 = x

        s1 = torch.matmul(w1, x0) + b1
        x1 = self.sigma(s1)
        s2 = torch.matmul(w2,x1)+ b2
        x2 = self.sigma(s2)
        
        return x0, s1, x1, s2, x2 
    
    def backward_pass(self,w1,b1,w2,b2,dw1, db1, dw2, db2, x, t):

        x,s1,x1,s2,x2 = self.forward_pass(x, w1, w2, b1, b2)

        dloss = self.dloss(x2,t)
        ds2 = torch.mul(dloss, self.dsigma(s2))

        dw2 += torch.mm(ds2, torch.t(x1))
        db2 += ds2
        
        dx1 = torch.mm(torch.t(w2), ds2)
        ds1 = torch.mul(dx1, self.dsigma(s1))
        
        dw1 += torch.mm(ds1, torch.t(x))
        db1 += ds1
          

    def train(self, train, train_target, test, test_target,num_units = 50):

        n_class = test_target.shape[1]
        n_inputs = train.shape[1]

        w1 = torch.empty(num_units, n_inputs).normal_(0, self.eps)
        b1 = torch.zeros(num_units, 1).normal_(0, self.eps)
        w2 = torch.empty(n_class, num_units).normal_(0, self.eps)

        b2 = torch.zeros(n_class, 1).normal_(0, self.eps)
        alpha = self.learning_rate/train.shape[0]

        for i in range(self.iterations):
            dw1 = torch.zeros(num_units, n_inputs)
            db1 = torch.zeros(num_units, 1)
            dw2 = torch.zeros(n_class, num_units)
            db2 = torch.zeros(n_class, 1)

            for index in range(train.shape[0]): # SGD
                
                x = train[index].resize_(train[index].shape[0], 1)
                target = train_target[index].resize_(train_target[index].shape[0], 1)

                
                x0, s1, x1, s2, x2 = self.forward_pass(x, w1, w2, b1, b2)
                                
                self.backward_pass(w1,b1,w2,b2,dw1, db1, dw2, db2, x, target)
                
                t = torch.argmax(train_target[index])
                test_error = self.mse_error(test_target,t)
                
                train_error = self.mse_error(x2,target)
                
            w1 = w1 - alpha * dw1
            w2 = w2 - alpha * dw2
            b1 = b1 - alpha * db1
            b2 = b2 - alpha * db2

        print("Iteration {},Training error :{}".format(i,train_error))
        print("Iteration {}, Test error :{}".format(i,test_error))
        
        return w1, w2, b1, b2, x2
    

    def mse_error(self, v, t):
        N = v.shape[0]
        error = self.loss(v, t)
                
        return error/N      

In [53]:
model = Backpropagation()

In [76]:
w1, w2, b1, b2, predicted = model.train(train_input, train_target, test_input, test_target)

Iteration 999,Training error :0.003
Iteration 999, Test error :0.156


In [77]:
predicted

tensor([[ 0.0132],
        [-0.1531],
        [ 0.1216],
        [-0.0800],
        [ 0.0455],
        [ 0.0343],
        [ 0.8968],
        [-0.0172],
        [-0.0409],
        [-0.0106]])

In [78]:
w1

tensor([[-0.0012, -0.0012, -0.0012,  ..., -0.0012, -0.0012, -0.0012],
        [ 0.0002,  0.0002,  0.0002,  ...,  0.0002,  0.0002,  0.0002],
        [-0.0004, -0.0004, -0.0004,  ..., -0.0004, -0.0004, -0.0004],
        ...,
        [ 0.0056,  0.0056,  0.0056,  ...,  0.0056,  0.0056,  0.0056],
        [-0.0028, -0.0028, -0.0028,  ..., -0.0028, -0.0028, -0.0028],
        [ 0.0032,  0.0032,  0.0032,  ...,  0.0032,  0.0032,  0.0032]])

In [56]:
b1

tensor([[-9.7941e-04],
        [ 6.6378e-04],
        [ 1.6068e-03],
        [ 1.2278e-03],
        [ 8.6740e-05],
        [ 2.1249e-02],
        [ 1.3970e-03],
        [-4.5742e-03],
        [-3.7261e-03],
        [ 1.6534e-03],
        [ 1.8957e-03],
        [ 1.5301e-03],
        [-2.2284e-04],
        [-3.6716e-03],
        [-2.1197e-02],
        [ 7.6263e-04],
        [ 3.6787e-05],
        [-2.7455e-02],
        [-1.1525e-02],
        [ 1.3799e-04],
        [ 2.8993e-05],
        [-2.4378e-02],
        [ 5.1515e-03],
        [-7.1115e-03],
        [ 6.7252e-04],
        [-5.2753e-03],
        [-9.4676e-04],
        [-1.4710e-03],
        [-2.2612e-03],
        [ 1.7205e-02],
        [ 1.3336e-03],
        [ 1.8563e-03],
        [-6.7331e-03],
        [ 3.1675e-03],
        [ 1.3552e-03],
        [ 2.5375e-03],
        [-1.1230e-03],
        [ 1.8233e-02],
        [ 3.4418e-04],
        [-9.1417e-03],
        [ 5.5898e-04],
        [ 9.4665e-03],
        [-2.8957e-03],
        [-2

In [57]:
w2

tensor([[ 9.8764e-03,  7.0765e-02, -2.6535e-02, -2.0309e-02,  1.1099e-02,
         -2.3700e-01, -4.5591e-02,  3.2624e-03,  5.5564e-02, -3.1696e-03,
         -8.2546e-02,  2.8472e-02, -2.1984e-02,  2.6050e-02, -9.0317e-02,
         -2.3351e-02,  1.3918e-02, -5.8427e-02, -3.2789e-02, -5.0187e-02,
          2.0885e-02,  3.8681e-02, -6.4214e-02,  8.1449e-02, -5.4389e-02,
          1.2118e-01,  7.3901e-03,  2.6575e-02,  3.9146e-03,  7.7426e-02,
         -2.6171e-02, -5.1215e-02,  3.9647e-02, -3.4904e-02,  2.8191e-02,
          1.0338e-01,  7.1460e-03, -1.0774e-02,  3.2410e-03,  1.0983e-01,
          9.7273e-02,  8.3466e-02,  1.1983e-02,  3.6453e-02,  7.1161e-02,
         -1.2170e-02,  2.5634e-02,  4.5050e-02,  1.0045e-02,  2.5599e-02],
        [ 8.4306e-04, -6.4482e-02, -5.8040e-02,  8.6422e-03,  3.9857e-03,
         -8.0623e-03,  1.1235e-02, -3.2360e-02,  4.5917e-02, -1.0796e-02,
         -2.5226e-02, -4.1605e-02, -7.3171e-04, -3.4806e-02,  1.3228e-01,
         -3.6007e-03,  9.1834e-02, -6

In [58]:
b2

tensor([[ 0.2388],
        [ 0.2808],
        [ 0.1568],
        [ 0.1602],
        [ 0.1039],
        [ 0.1512],
        [ 0.1858],
        [ 0.2315],
        [-0.2235],
        [ 0.0796]])