In [0]:
import numpy as np
import tensorflow as tf
import pickle
import torch

#Shapes
#x_train: (60000, 28, 28)
#x_test: (10000, 28, 28)
#y_train: (60000,)
#y_test: (10000,)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

trainset_labels = y_train
testset_labels = y_test

x_train, x_test = x_train.T.reshape((784,60000)), x_test.T.reshape((784,10000))
x_train = x_train/255 
x_test = x_test/255 

train_labels = []
for i in range(len(y_train)):
    arr = [0,]*10
    label = y_train[i]
    index = label-1
    arr[index]=1
    train_labels.append(arr)
    
test_labels = []
for i in range(len(y_test)):
    arr = [0,]*10
    label = y_train[i]
    index = label-1
    arr[index]=1
    test_labels.append(arr)
    
y_train = np.array(train_labels).T
y_test = np.array(test_labels).T

print("X_train:",x_train.shape, "Y_train:",y_train.shape, "X_test:",x_test.shape, "Y_test:",y_test.shape)
    
#Shapes
#x_train: (784, 60000), 784 = 28px * 28px
#x_test: (784, 10000), 784 = 28px * 28px
#y_train: (10, 60000), 10 = No. of classes 
#y_test: (10, 10000), 10 = No. of classes

x_train = torch.from_numpy(x_train).float().cuda()
y_train = torch.from_numpy(y_train).float().cuda()
x_test = torch.from_numpy(x_test).float().cuda()
y_test = torch.from_numpy(y_test).float().cuda()

X_train: (784, 60000) Y_train: (10, 60000) X_test: (784, 10000) Y_test: (10, 10000)


In [0]:
class Layer():
    
    def __init__(self, n_units, activation="relu", cuda=True):
        self.n_units = n_units
        
        assert(activation in ("relu","sigmoid", "tanh", "leaky-relu")), "InitError: Invalid Activation Function"
        
        self.activationFunction = activation
        self.weights = None
        self.biases = None
        self.activations = None
        self.Z = None
        self.cuda = cuda
        
        
    def applyActivationFunction(self, Z):
        if self.activationFunction=="sigmoid":
            return 1 / (1 + torch.exp(-Z))

        elif self.activationFunction=="relu":
            if self.cuda:
                return torch.max(Z, torch.zeros(Z.shape).cuda())
            else:
                return torch.max(Z, torch.zeros(Z.shape))

        elif self.activationFunction=="leaky-relu":
            if self.cuda:
                return torch.max(0.01*Z, torch.zeros(Z.shape).cuda())
            else:
                return torch.max(0.01*Z, torch.zeros(Z.shape))

        elif self.activationFunction=="tanh":
            return torch.tanh(Z)
    
    
    def isInvalid(self, A):
        if torch.isnan(A).any() or torch.isinf(A).any():
            return True
        else:
            return False
    
    
    def computeActivations(self, A):
        assert(self.weights.shape[1]==A.shape[0]), "ComputeActivationsError: Incorrect Input Dimensions"
        
        Z = torch.matmul(self.weights,A)+self.biases
        self.Z = Z
        self.activations = self.applyActivationFunction(Z)
        
        assert(not self.isInvalid(self.activations)), "InvalidActivationsError: Invalid Activations Computed"
        return self.activations  

    
    def computeDerivatives(self):
        if self.activationFunction=="sigmoid":
            sigmoidZ = self.activations
            derivative = torch.mul(sigmoidZ, (1-sigmoidZ))

        elif self.activationFunction=="relu":
            ones = torch.ones(self.Z.shape)
            zeros = torch.zeros(self.Z.shape)
            if self.cuda:
                ones = ones.cuda()
                zeros = zeros.cuda()
            derivative = torch.where(self.Z<0, zeros,ones)

        elif self.activationFunction=="leaky-relu":
            ones = torch.ones(self.Z.shape)
            zeros = torch.ones(self.Z.shape)*0.01
            if self.cuda:
                ones = ones.cuda()
                zeros = zeros.cuda()
            derivative = torch.where(self.Z<0, zeros,ones)

        elif self.activationFunction=="tanh":
            tanhZ = self.activation(self.Z)
            derivative = 1 - torch.pow(tanhZ, 2)

        return derivative

In [0]:
class NN():

    def __init__(self, learningRate=0.005, lossFunction = "cross-entropy", cuda=True):
        self.layerDims = []
        self.layers = []
        self.nLayers = 0
        self.learningRate = learningRate
        self.lossFunction = lossFunction
        self.cuda = cuda
                

    def addLayer(self, layer):
        layer.cuda = self.cuda
        self.layerDims.append(layer.n_units)
        self.layers.append(layer)
        self.nLayers+=1


    def initializeParams(self, weights=None):
        assert(len(self.layerDims)>1), "NotEnoughLayersException: Network must have atleast 2 layers"
        
        if weights==None:
            for i in range(1, self.nLayers):
                layer = self.layers[i]
                if self.cuda:
                    layer.weights = (torch.rand(self.layerDims[i], self.layerDims[i-1]) * 0.01).cuda()
                    layer.biases = torch.zeros(self.layerDims[i], 1).cuda()
                else:
                    layer.weights = torch.rand(self.layerDims[i], self.layerDims[i-1]) * 0.01
                    layer.biases = torch.zeros(self.layerDims[i], 1)
        else:
            for i in range(1, self.nLayers):
                layer = self.layers[i]
                layer.weights = weights[i]['weights']
                layer.biases = weights[i]['biases']

    
    def forward(self, trainingExamples):
        activation = trainingExamples
        for i in range(1, self.nLayers):
            layer = self.layers[i]
            activation = layer.computeActivations(activation)
            layer.activations = activation


    def getAvgLoss(self, Y):
        m = Y.shape[1]
        outputLayer = self.layers[-1]
        A = outputLayer.activations 
        
        assert(Y.shape == A.shape), "InvalidLabelFormatException: Labels should have dimensions (nClasses, nExamples)"
        
        cost = -(1/m)*torch.sum(torch.mul(Y,torch.log(A)) + torch.mul((1-Y),torch.log(1-A)))  
        return cost
    

    """ Initialize backward propagation """
    def computeDA(self, Y):
        outputLayer = self.layers[-1]
        A = outputLayer.activations
        if self.lossFunction=="cross-entropy":
            DA =  - (torch.div(Y, A) - torch.div(1 - Y, 1 - A))
        return DA


    def backward(self, labels):
        """ Output Layer's DA """
        
        DA = self.computeDA(labels)
        for i in range(self.nLayers-1,0,-1):
            layer = self.layers[i]
            DZ = torch.mul(DA,layer.computeDerivatives())
            DA = torch.matmul(layer.weights.t(), DZ)
            
            prevLayer = self.layers[i-1]
            m = prevLayer.activations.shape[1]

            DW = (1/m)*torch.matmul(DZ, prevLayer.activations.t())
            """ keepdims maintains the dimension of the resultant matrix. """
            DB = 1 / m * (torch.sum(DZ, dim = 1,keepdim = True)) 
            
            layer.weights -=  self.learningRate*DW
            layer.biases -= self.learningRate*DB
            
        
    def train(self, dataset, labels, weights=None, stopWhenAvgLossEquals=0.05, printLossAfterEvery=50, nIterations=1000, mode='auto'):
        
        if weights==None:
            self.initializeParams()
        else:
            self.initializeParams(weights)
        
        """ Normalizing the dataset. """
        if (dataset>1).any():
            dataset = dataset/torch.max(dataset)
        
        self.layers[0].activations = dataset
        
        assert(mode in ('auto','manual')), "InvalidModeException: Mode can be either 'auto' or 'manual'"
        
        if mode=='manual':

            for i in range(nIterations):
                self.forward(dataset)

                avgLoss = self.getAvgLoss(labels)

                if i%printLossAfterEvery==0:
                    print("Avg Loss at iteration "+str(i)+": "+str(avgLoss))

                self.backward(labels)
        
        elif mode=="auto":
            
            iterNo = 0

            while True:
                self.forward(dataset)
                avgLoss = self.getAvgLoss(labels)

                if iterNo%printLossAfterEvery==0:
                    print("Avg Loss at iteration "+str(iterNo)+": "+str(avgLoss))

                nDecimalsToRound = len(str(stopWhenAvgLossEquals).split('.')[1])
                self.backward(labels)

                if stopWhenAvgLossEquals==(torch.round(avgLoss* 10**nDecimalsToRound)/(10**nDecimalsToRound)):
                    break 

                iterNo+=1

        print("\nTraining completed successfully")
        
        ''' First layer is the input layer. Hence it doesn't contain any weights. '''
        weights = [None,]
        
        for i in range(1, self.nLayers):
            layer = self.layers[i]
            weights.append({'weights':layer.weights, 'biases':layer.biases})

        return weights


    def predict(self, example):
        try:
            nExamples = example.shape[1]
        except:
            example = example.reshape(example.shape[0],1)
        self.forward(example)
        outputLayer = self.layers[-1]
        activations = outputLayer.activations
        return torch.argmax(activations, dim=0)[0]

In [0]:
n = NN(learningRate=0.1, cuda=True)
n.addLayer(Layer(784))
n.addLayer(Layer(128))
n.addLayer(Layer(64))
n.addLayer(Layer(10,activation="sigmoid"))

n.learningRate=0.05

In [0]:
weights =  n.train(x_train, y_train, nIterations=500, printLossAfterEvery=100, mode='manual', weights=None)
weights =  n.train(x_train, y_train, nIterations=500, printLossAfterEvery=100, mode='manual', weights=weights)

total = y_test.shape[1]
count = 0

for i in range(total):
    example = x_test.T[i]
    
    label = testset_labels[i]
    prediction = n.predict(example)+1
    
    if prediction==label:
        count+=1

print("\nTest Set Accuracy: "+str((count/total)*100)+"%")

#n.saveWeights("/content/sample_data/model-torch", weights=weights)

Avg Loss at iteration 0: tensor(7.3658, device='cuda:0')
Avg Loss at iteration 100: tensor(3.3519, device='cuda:0')
Avg Loss at iteration 200: tensor(3.3294, device='cuda:0')
Avg Loss at iteration 300: tensor(3.3147, device='cuda:0')
Avg Loss at iteration 400: tensor(3.3025, device='cuda:0')

Training completed successfully
Avg Loss at iteration 0: tensor(3.2916, device='cuda:0')
Avg Loss at iteration 100: tensor(3.2815, device='cuda:0')
Avg Loss at iteration 200: tensor(3.2719, device='cuda:0')
Avg Loss at iteration 300: tensor(3.2625, device='cuda:0')
Avg Loss at iteration 400: tensor(3.2526, device='cuda:0')

Training completed successfully

Test Set Accuracy: 16.259999999999998%
