In [1]:
import torch

from torchvision import datasets, transforms as transform

import argparse
import os
from dlc_practical_prologue import *
import dlc_practical_prologue as prologue


Load Data

In [84]:
from torchvision.datasets import MNIST


train_data = MNIST(root = './', train=True, download=True,)
test_data = MNIST(root = './', train=False, download=True)

train_input = train_data.data.view(-1, 1, 28, 28).float()
train_target = train_data.targets 
test_input = test_data.data.view(-1, 1, 28, 28).float()
test_target = test_data.targets 

train_input = train_input.clone().reshape(train_input.size(0), -1)
test_input = test_input.clone().reshape(test_input.size(0), -1)

train_input = train_input.narrow(0, 0, 1000)
train_target = train_target.narrow(0, 0, 1000)
test_input = test_input.narrow(0, 0, 1000)
test_target = test_target.narrow(0, 0, 1000)

train_target = 0.9*convert_to_one_hot_labels(train_input, train_target)
test_target = 0.9*convert_to_one_hot_labels(test_input, test_target)

mu, std = train_input.mean(), train_input.std()
train_input = train_input.sub_(mu).div_(std)
test_input = test_input.sub_(mu).div_(std)

In [85]:
train_input.shape, train_target.shape, test_input.shape, test_target.shape

(torch.Size([1000, 784]),
 torch.Size([1000, 10]),
 torch.Size([1000, 784]),
 torch.Size([1000, 10]))

In [74]:
class Backpropagation:
    def __init__(self,learning_rate=0.1,  eps=0.000001,iterations=1000):

        self.learning_rate = learning_rate
        self.eps = eps
        self.iterations = iterations
        
    def sigma(self, x):
        xx = torch.tanh(x)
        return xx
    
    def dsigma(self, x):
        dx = 1 - (self.sigma(x))**2
        return dx
    
    def loss(self, v,t):

        diff = torch.pow(v-t,2)
        loss = torch.sum(diff)
        return loss
    
    def dloss(self, v, t):
        dloss = 2*(v-t)
        return dloss
        
    def forward_pass(self, x, w1, w2, b1, b2):

        x0 = x

        s1 = torch.matmul(w1, x0) + b1
        x1 = self.sigma(s1)
        s2 = torch.matmul(w2,x1)+ b2
        x2 = self.sigma(s2)
        
        return x0, s1, x1, s2, x2 
    
    def backward_pass(self,w1,b1,w2,b2,dw1, db1, dw2, db2, x, t):

        x,s1,x1,s2,x2 = self.forward_pass(x, w1, w2, b1, b2)

        dloss = self.dloss(x2,t)
        ds2 = torch.mul(dloss, self.dsigma(s2))

        dw2 += torch.mm(ds2, torch.t(x1))
        db2 += ds2
        
        dx1 = torch.mm(torch.t(w2), ds2)
        ds1 = torch.mul(dx1, self.dsigma(s1))
        
        dw1 += torch.mm(ds1, torch.t(x))
        db1 += ds1
          

    def train(self, train, train_target, test, test_target,num_units = 50):

        n_class = test_target.shape[1]
        n_inputs = train.shape[1]

        w1 = torch.empty(num_units, n_inputs).normal_(0, self.eps)
        b1 = torch.zeros(num_units, 1).normal_(0, self.eps)
        w2 = torch.empty(n_class, num_units).normal_(0, self.eps)

        b2 = torch.zeros(n_class, 1).normal_(0, self.eps)
        alpha = self.learning_rate/train.shape[0]

        for i in range(self.iterations):
            dw1 = torch.zeros(num_units, n_inputs)
            db1 = torch.zeros(num_units, 1)
            dw2 = torch.zeros(n_class, num_units)
            db2 = torch.zeros(n_class, 1)

            for index in range(train.shape[0]): # SGD
                
                x = train[index].resize_(train[index].shape[0], 1)
                t = train_target[index].resize_(train_target[index].shape[0], 1)
                
                x0, s1, x1, s2, x2 = self.forward_pass(x, w1, w2, b1, b2)
                self.backward_pass(w1,b1,w2,b2,dw1, db1, dw2, db2, x, t)
                
            w1 = w1 - alpha * dw1
            w2 = w2 - alpha * dw2
            b1 = b1 - alpha * db1
            b2 = b2 - alpha * db2

        print("Iteration {},Training error :{}".format(i,self.compute_error(train, train_target, w1, b1, w2, b2)))
        print("Iteration {}, Test error :{}".format(i,self.compute_error(test, test_target, w1, b1, w2, b2)))
        
        return w1, w2, b1, b2, x2
    

    def compute_error(self, x, y, w1, b1, w2, b2):
        
        n = x.shape[0]
        
        error = 0
        for i in range(n):
            x = x[i].resize_(x[i].shape[0], 1)
            target = torch.argmax(y[i])
            
            x,s1,x1,s2,x2 = self.forward_pass(x, w1, w2, b1, b2)
            output = torch.argmax(x2, 0)
            
            if target != output:
                error += 1
                
        return error/n      

In [75]:
model = Backpropagation()

In [76]:
w1, w2, b1, b2, predicted = model.train(train_input, train_target, test_input, test_target)

Iteration 999,Training error :0.003
Iteration 999, Test error :0.156


In [77]:
predicted

tensor([[ 0.0132],
        [-0.1531],
        [ 0.1216],
        [-0.0800],
        [ 0.0455],
        [ 0.0343],
        [ 0.8968],
        [-0.0172],
        [-0.0409],
        [-0.0106]])

In [78]:
w1

tensor([[-0.0012, -0.0012, -0.0012,  ..., -0.0012, -0.0012, -0.0012],
        [ 0.0002,  0.0002,  0.0002,  ...,  0.0002,  0.0002,  0.0002],
        [-0.0004, -0.0004, -0.0004,  ..., -0.0004, -0.0004, -0.0004],
        ...,
        [ 0.0056,  0.0056,  0.0056,  ...,  0.0056,  0.0056,  0.0056],
        [-0.0028, -0.0028, -0.0028,  ..., -0.0028, -0.0028, -0.0028],
        [ 0.0032,  0.0032,  0.0032,  ...,  0.0032,  0.0032,  0.0032]])

In [80]:
# b1

In [None]:
#w2

In [None]:
#b2