In [1]:
import torch
import torch.autograd
from torch.autograd import Variable
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def logsumexp(b):
    return torch.log(1 + torch.exp(b))

def mlp(x, Weight, Bias):
    f = Variable(x, requires_grad=False) # Initializing f with x

    # f = sigmoid(w.dot(x) + b)
    for i in range(len(Weight)):
        f = torch.sigmoid(torch.matmul(Weight[i], f) + Bias[i])
    return f

def generateWeight(sizes):
    Weight = []
    
    for i in range(len(sizes)-1):
        # For layer i, Weights are a sizes[i+1] x sizes[i] matrix
        Weight.append(Variable(torch.randn(sizes[i+1], sizes[i]).double(), requires_grad=True))

    return Weight

def generateBias(sizes):
    Bias = []

    for i in range(len(sizes)-1):
        # For layer i, Biases are a sizes[i+1] x 1 vector
        Bias.append(Variable(torch.randn(sizes[i+1], 1).double(), requires_grad=True))
        
    return Bias

In [2]:
num_rows_fetched = 5000
num_rows_selected = 500

df_mnist = pd.read_csv(u'mnist_train.csv', sep=',', header=None, nrows=num_rows_fetched)
x = torch.from_numpy(np.array(df_mnist.iloc[:, [i+1 for i in range(784)]])).double()
y = torch.from_numpy(np.array(df_mnist.iloc[:, 0])).double()
y[y>1] = 1

In [6]:
sizes = [784, 5, 1]

Weight = generateWeight(sizes)
Bias = generateBias(sizes)

eta = 0.0007
MAX_ITER = 300
EE = []

for epoch in range(MAX_ITER):
    idx = np.random.choice(len(x), num_rows_selected, replace=True)

    for i in idx:
        f = mlp(x[i,:].reshape(sizes[0], 1), Weight, Bias)

        E = -y[i].reshape(sizes[-1], 1) * f + logsumexp(f)
        EE.append(E.data.numpy())

        # Compute the derivative of the error with respect to Weights and Biases
        E.backward() 

        # Take the step and reset weights
        for j in range(len(sizes)-1):
            Weight[j].data.add_(-eta*Weight[j].grad.data)
            Bias[j].data.add_(-eta*Bias[j].grad.data)
            Weight[j].grad.zero_()
            Bias[j].grad.zero_()
            
    if(epoch%25==0):
        print("In epoch ", epoch, ", EE is:", EE[epoch])
        
print("In epoch ", epoch, ", EE is:", EE[epoch])

In epoch  0 , EE is: [[0.51929872]]
In epoch  25 , EE is: [[0.38006157]]
In epoch  50 , EE is: [[0.96392683]]
In epoch  75 , EE is: [[0.4801879]]
In epoch  100 , EE is: [[0.39449553]]
In epoch  125 , EE is: [[0.36538554]]
In epoch  150 , EE is: [[0.42323163]]
In epoch  175 , EE is: [[0.53407228]]
In epoch  200 , EE is: [[0.40472132]]
In epoch  225 , EE is: [[0.45005968]]
In epoch  250 , EE is: [[0.56949653]]
In epoch  275 , EE is: [[0.40444124]]
In epoch  299 , EE is: [[0.49759882]]


In [7]:
test_rows_fetched = 5000

df_mnist_test = pd.read_csv(u'mnist_test.csv', sep=',', header=None, nrows=test_rows_fetched)
x_test = torch.from_numpy(np.array(df_mnist_test.iloc[:, [i+1 for i in range(784)]])).double()
y_test = torch.from_numpy(np.array(df_mnist_test.iloc[:, 0])).double()
y_test[y_test>1] = 1

result = torch.zeros(len(y_test))
        
for i in range(len(y_test)):
    result[i] = torch.round(mlp(x_test[i,:].reshape(sizes[0], 1), Weight, Bias))
    
print("Result:\n", result, "\n")

confusion_matrix = torch.zeros((2,2))

for i in range(len(result)):
    confusion_matrix[int(y_test[i]), int(result[i])] += 1
    
print("Confusion matrix:\n", confusion_matrix, "\n")
    
accuracy = torch.sum(torch.diag(confusion_matrix))/torch.sum(confusion_matrix)

print("Accuracy:", accuracy*100, "%\n")

Result:
 tensor([1., 1., 1.,  ..., 1., 1., 1.], grad_fn=<CopySlices>) 

Confusion matrix:
 tensor([[   0.,  460.],
        [   0., 4540.]]) 

Accuracy: tensor(90.8000) %

