In [20]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pylab as plt
from sklearn.preprocessing import OneHotEncoder

In [21]:

class NeuralNetworks:
    def __init__(self, layers_size):
        self.layers_size = layers_size
        self.parameters = {}
        self.L = len(self.layers_size)-1
        self.n = 0
        self.cos = []
 
    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))
 


    def sigmoid_derivative(self, Z):
        s = 1 / (1 + np.exp(-Z))
        return s * (1 - s)

 
    def initialize_parameters(self):
        np.random.seed(1)
 
        for l in range(1, len(self.layers_size)):
            self.parameters["W" + str(l)] = np.random.randn(self.layers_size[l], self.layers_size[l - 1]) / np.sqrt(
                self.layers_size[l - 1])
            self.parameters["b" + str(l)] = np.zeros((self.layers_size[l], 1))


    def softmax(self, Z):
        expZ = np.exp(Z - np.max(Z))
        return expZ / expZ.sum(axis=0, keepdims=True)
 
    def forward(self, X):
        store = {}
 
        A = X.T
        for l in range(self.L - 1):
            Z = self.parameters["W" + str(l + 1)].dot(A) + self.parameters["b" + str(l + 1)]
            A = self.sigmoid(Z)
            store["A" + str(l + 1)] = A
            store["W" + str(l + 1)] = self.parameters["W" + str(l + 1)]
            store["Z" + str(l + 1)] = Z
 
        Z = self.parameters["W" + str(self.L)].dot(A) + self.parameters["b" + str(self.L)]
        A = self.softmax(Z)
        store["A" + str(self.L)] = A
        store["W" + str(self.L)] = self.parameters["W" + str(self.L)]
        store["Z" + str(self.L)] = Z
 
        return A, store
 
    def backward(self, X, Y, store):
 
        derivatives = {}
 
        store["A0"] = X.T
 
        A = store["A" + str(self.L)]
        dZ = A - Y.T
 
        dW = dZ.dot(store["A" + str(self.L - 1)].T) / self.n
        db = np.sum(dZ, axis=1, keepdims=True) / self.n
        dAPrev = store["W" + str(self.L)].T.dot(dZ)
 
        derivatives["dW" + str(self.L)] = dW
        derivatives["db" + str(self.L)] = db
 
        for l in range(self.L - 1, 0, -1):
            dZ = dAPrev * self.sigmoid_derivative(store["Z" + str(l)])
            dW = 1. / self.n * dZ.dot(store["A" + str(l - 1)].T)
            db = 1. / self.n * np.sum(dZ, axis=1, keepdims=True)
            if l > 1:
                dAPrev = store["W" + str(l)].T.dot(dZ)
 
            derivatives["dW" + str(l)] = dW
            derivatives["db" + str(l)] = db
 
        return derivatives
 
    def fit(self, X, Y, learning_rate=0.01, n_iterations=2):
        np.random.seed(1)
        
        self.n = X.shape[0]
 
        
 
        
        for loop in range(n_iterations):
            A, store = self.forward(X)
            cost = -np.mean(Y * np.log(A.T+ 1e-8))
            derivatives = self.backward(X, Y, store)
 
            for l in range(1, self.L + 1):
                self.parameters["W" + str(l)] = self.parameters["W" + str(l)] - learning_rate * derivatives[
                    "dW" + str(l)]
                self.parameters["b" + str(l)] = self.parameters["b" + str(l)] - learning_rate * derivatives[
                    "db" + str(l)]
 
            if loop % 100 == 0:
                print("Cost: ", cost, "Train Accuracy:", self.predict(X, Y))
 
            if loop % 10 == 0:
                self.cos.append(cost)
 
    def predict(self, X, Y):
        A, cache = self.forward(X)
        y_hat = np.argmax(A, axis=0)
        Y = np.argmax(Y, axis=1)
        accuracy = (y_hat == Y).mean()
        return accuracy * 100
 
    def plot_cost(self):
        plt.figure()
        plt.plot(np.arange(len(self.cos)), self.cos)
        plt.xlabel("epochs")
        plt.ylabel("cost")
        plt.show()
 

 
 


In [23]:
def pre_processing(y):
    e = OneHotEncoder(sparse=False, categories='auto')
    y = e.fit_transform(y.reshape(len(y), -1))
 
    return y
    
transform = transforms.Compose(
    [transforms.ToTensor()])

tset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
tloader = torch.utils.data.DataLoader(tset, batch_size=100,
                                          shuffle=False)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')


layers_dims = [28*28, 50, 10]

ann = NeuralNetworks(layers_dims)
ann.initialize_parameters()
i=0
for images,labels in tloader:
    images = images[:,0,:,:]
    images = images.reshape(*images.shape[:1], -1)
    images = images.numpy()
    labels= labels.numpy()
    labels = pre_processing(labels)

    ann.fit(images, labels, learning_rate=0.1)
    if i%100==99:
        print("Train Accuracy:", ann.predict(images, labels))
    i+=1
    
    

Cost:  0.23583032224745717 Train Accuracy: 13.0
Cost:  0.23696695511384566 Train Accuracy: 7.000000000000001
Cost:  0.22694063916885984 Train Accuracy: 16.0
Cost:  0.22967568690156123 Train Accuracy: 17.0
Cost:  0.2249309130085911 Train Accuracy: 36.0
Cost:  0.2237612122128557 Train Accuracy: 34.0
Cost:  0.22296929099296284 Train Accuracy: 36.0
Cost:  0.22432157193610525 Train Accuracy: 33.0
Cost:  0.21982987939043336 Train Accuracy: 41.0
Cost:  0.2167514953860244 Train Accuracy: 35.0
Cost:  0.2182297874176711 Train Accuracy: 28.000000000000004
Cost:  0.21601912585118277 Train Accuracy: 30.0
Cost:  0.21665876690020797 Train Accuracy: 36.0
Cost:  0.21438940316030444 Train Accuracy: 45.0
Cost:  0.2133798747518325 Train Accuracy: 45.0
Cost:  0.21310449382332602 Train Accuracy: 54.0
Cost:  0.2081457169825242 Train Accuracy: 66.0
Cost:  0.20766627706797755 Train Accuracy: 63.0
Cost:  0.20485883189237628 Train Accuracy: 66.0
Cost:  0.20430036659542358 Train Accuracy: 56.00000000000001
Cost: 