In [None]:
import numpy as np
from numpy import array
import random
import math
TRAIN_FILE="mnist_train.csv"
TEST_FILE="mnist_test.csv"
num_layers=3                         # number of layers, including one output layer
num_itr=500                          # number of iterations of the whole data
input_num=784
hidden_layer_neuron = 100            # number of neurons in hidden layers
result_class=10                      # number of output neurons at end
step = 0.1                           # learning rate
bias=0                               # bias is zero for all values
lmbda = 1                            # lambda for sigmoid fn
model_file="model.csv"

In [None]:
def filldefault(file):
    with open(file,'w') as f:
        for layernum in range(0,num_layers):
            for i in range(0,input_num*hidden_layer_neuron):
                f.write(str(random.uniform(-1,1))+"\t");
            f.write('\n');
        f.close()

In [None]:
def activation_function(value):
    return 1/(1+math.exp(-lmbda*value))

In [None]:
def derivative(value):
    return (1-value)*value*lmbda;

In [None]:
def front_prop(inputs,model):
    input_vals = inputs
    output_vals = np.array([])
    f = np.vectorize(activation_function, otypes=[np.float])
    for layer in range(0,num_layers):
        input_n = len(input_vals)                          # number of inputs of this layer
        if(layer == num_layers - 1):
            output_n = result_class;
        else:
            output_n = hidden_layer_neuron;
        layerweights = model[layer,:input_n*output_n].reshape(output_n,input_n);
        mult = np.dot(layerweights,input_vals) + bias;
        outputs = f(mult)
        output_vals = np.append(output_vals,outputs);
        input_vals = outputs;
    return (output_vals);                                   # giving output as appended array

In [None]:
def delta(inputs,outputs,targets,model):
    deltas =np.empty([num_layers,len(model[0])]);           # make a copy for structure
    this_out = outputs[-result_class:]
    hidden_out = outputs[:-(result_class)].reshape(num_layers-1,hidden_layer_neuron) # layer * out form
    prev_layer = np.subtract(this_out,targets)              # for last layer!
    vderivative = np.vectorize(derivative)
    for i in range(0,num_layers):
        output_n = len(this_out)
        if(i==num_layers-1):
            prev_out = inputs
        else:
            prev_out = hidden_out[-(i+1)]
        input_n = len(prev_out)
        lmodel = model[-(i+1),:input_n*output_n].reshape(output_n,input_n)
        v = np.multiply(prev_layer,vderivative(this_out))  # contains (all fwd outputs reqd)*(deriv of output of this layer)
        tmpdel = np.outer(v,prev_out);                     # output_n * input_n array === deltas for this layer
        prev_layer = np.dot(lmodel.T,v);
        this_out = prev_out;
        deltas[-(i+1),:input_n*output_n] = tmpdel.flatten()
    return deltas;

In [None]:
def back_prop(inputs,outputs,model,target):
    targets = np.zeros(result_class);
    targets[int(target)] = 1;
    deltas = delta(inputs,outputs,targets,model);
    return np.subtract(model,step*deltas);

In [None]:
def write_back(file,value):
    with open(file,'w') as f:
        for i in range(0,len(value)):
            for j in range(0,len(value[i]) - 1):
                f.write(str(value[i,j])+"\t")
            f.write(str(value[i,-1])+"\n")
        f.close();

In [None]:
def loss_fn(output,target):
    return 0.5*np.sum(np.power(np.subtract(output,target),2));

In [None]:
def loss(outputs,target):
    targets = np.zeros(len(outputs));
    targets[int(target)] = 1;
    return loss_fn(outputs,targets);

In [None]:
def out(outputs):
    return np.where(outputs==outputs.max())[0][0]

In [None]:
def train(model_param):
    itr = num_itr
    inputs = train_data[:,1:]/255.0;
    train_values = train_data[:,0]
    while(itr > 0):
        print('Iteration '+str(num_itr-itr + 1))
        for i in range(0,len(train_data)):
            outputs = front_prop(inputs[i],model_param)
            error = loss(outputs[-result_class:],train_values[i])
            print('ITR ' +str(num_itr-itr + 1) + ', Sample '+str(i)+' :- Error: '+str(error))
            model_param = back_prop(inputs[i],outputs,model_param,train_values[i])
        write_back('iteration'+str(num_itr-itr + 1)+'.csv',model_param)
        itr = itr - 1 
    print('Writing trained model to file!')

In [None]:
filldefault(model_file)

In [None]:
model = np.loadtxt(model_file)

In [None]:
train_data = np.genfromtxt((TRAIN_FILE), delimiter=',');

In [None]:
train(model)