In [1]:
import torch
import torch.autograd
from torch.autograd import Variable

import pandas as pd
import numpy as np

def logsumexp(b):
    return torch.log(1 + torch.exp(b))

def mlp(x, Weight, Bias, Func):
    f = Variable(x, requires_grad=False) # Initializing f with x

    # f = sigmoid(w.dot(x) + b)
    for i in range(len(Weight)):
        f = Func[i](torch.matmul(Weight[i], f) + Bias[i])
    
    return f

def generate(sizes):
    Weight = []
    Bias = []
    Func = []
    
    for i in range(len(sizes)-1):
        # For layer i, Weights are a sizes[i+1] x sizes[i] matrix
        Weight.append(Variable(torch.randn(sizes[i+1], sizes[i]).double(), requires_grad=True))
        # For layer i, Biases are a sizes[i+1] x 1 vector
        Bias.append(Variable(torch.randn(sizes[i+1], 1).double(), requires_grad=True))
        Func.append(torch.sigmoid)

    return Weight, Bias, Func

In [2]:
# Variables
sizes = [784, 28, 8, 1]

eta = 0.1
MAX_ITER = 250
EE = []

num_rows_fetched = 50000
num_rows_selected = 250

In [3]:
df_mnist = pd.read_csv(u'mnist_train.csv', sep=',', header=None, nrows=num_rows_fetched)
x = torch.from_numpy(np.array(df_mnist.iloc[:, [i+1 for i in range(784)]])).double()
y = torch.from_numpy(np.array(df_mnist.iloc[:, 0])).double()
y[y>1] = 1

In [4]:
#Error = torch.nn.BCELoss(reduction='elementwise_mean')

Weight, Bias, Func = generate(sizes)

for epoch in range(MAX_ITER):
    idx = np.random.choice(len(x), num_rows_selected, replace=False)

    for i in idx:
        f = mlp(x[i,:].reshape(sizes[0], 1), Weight, Bias, Func)

        E = -y[i].reshape(sizes[-1], 1) * f + logsumexp(f)
        #E = Error(f, y[i].reshape(sizes[-1], 1))
        EE.append(E.data.numpy())

        # Compute the derivative of the error with respect to Weights and Biases
        E.backward() 

        # Take the step and reset weights
        for j in range(len(sizes)-1):
            Weight[j].data.add_(-eta*Weight[j].grad.data)
            Bias[j].data.add_(-eta*Bias[j].grad.data)
        Weight[j].grad.zero_()
        Bias[j].grad.zero_()
            
    if(epoch%10==0):
        print("In epoch ", epoch, ", EE is:", EE[epoch])
        
print("In last epoch, EE is:", EE[epoch])

In epoch  0 , EE is: [[0.48743122]]
In epoch  10 , EE is: [[0.37080437]]
In epoch  20 , EE is: [[0.35799824]]
In epoch  30 , EE is: [[0.36064521]]
In epoch  40 , EE is: [[0.3386434]]
In epoch  50 , EE is: [[0.33286545]]
In epoch  60 , EE is: [[0.32903061]]
In epoch  70 , EE is: [[0.32826569]]
In epoch  80 , EE is: [[0.32288055]]
In epoch  90 , EE is: [[0.32427654]]
In epoch  100 , EE is: [[0.32133737]]
In epoch  110 , EE is: [[0.32368378]]
In epoch  120 , EE is: [[0.32364539]]
In epoch  130 , EE is: [[0.32299619]]
In epoch  140 , EE is: [[0.31998255]]
In epoch  150 , EE is: [[0.31954335]]
In epoch  160 , EE is: [[1.29662998]]
In epoch  170 , EE is: [[0.32049927]]
In epoch  180 , EE is: [[0.31886042]]
In epoch  190 , EE is: [[0.31905814]]
In epoch  200 , EE is: [[1.29913319]]
In epoch  210 , EE is: [[0.31842659]]
In epoch  220 , EE is: [[0.31949156]]
In epoch  230 , EE is: [[0.31786267]]
In epoch  240 , EE is: [[0.31800101]]
In last epoch, EE is: [[0.3177125]]


In [5]:
test_rows_fetched = 5000

df_mnist_test = pd.read_csv(u'mnist_test.csv', sep=',', header=None, nrows=test_rows_fetched)
x_test = torch.from_numpy(np.array(df_mnist_test.iloc[:, [i+1 for i in range(784)]])).double()
y_test = torch.from_numpy(np.array(df_mnist_test.iloc[:, 0])).double()
y_test[y_test>1] = 1

result = torch.zeros(len(y_test))
        
for i in range(len(y_test)):
    result[i] = torch.round(mlp(x_test[i,:].reshape(sizes[0], 1), Weight, Bias, Func))
    
print("Result:\n", result, "\n")

confusion_matrix = torch.zeros((2,2))

for i in range(len(result)):
    confusion_matrix[int(y_test[i]), int(result[i])] += 1
    
print("Confusion matrix:\n", confusion_matrix, "\n")
    
accuracy = torch.sum(torch.diag(confusion_matrix))/torch.sum(confusion_matrix)

print("Accuracy:", accuracy*100, "%\n")

Result:
 tensor([1., 1., 1.,  ..., 1., 1., 1.], grad_fn=<CopySlices>) 

Confusion matrix:
 tensor([[   0.,  460.],
        [   0., 4540.]]) 

Accuracy: tensor(90.8000) %

