In [1]:
import numpy as np
import pandas as pd

In [2]:
def stochestic_nn(x,y,epoches,lr,lr_schedule,initial_type=0,Layers=[3]):
    
    Num=x.shape[1]
    Loss=[]
    #set initial_type=0:zeros;else:random
    if initial_type==0:
        weights = dict()
        for i in range(len(Layers)):
            #  if it is the first layer
            if i == 0:
                W = np.zeros((Layers[i], x.shape[0]))
                b = np.zeros((Layers[i], 1))
            else:
                W = np.zeros((Layers[i], Layers[i-1]))
                b = np.zeros((Layers[i], 1))

            weights['W' + str(i+1)] = W
            weights['b' + str(i+1)] = b

        #  for the last layer
        W = np.zeros((y.shape[0], Layers[-1]))
        b = np.zeros((y.shape[0], 1))
        weights['W' + str(len(Layers)+1)] = W
        weights['b' + str(len(Layers)+1)] = b

    else: 
        weights = dict()
        dim_y=y.shape[0]
        for i in range(len(Layers)):
            num_hidden=Layers[i]

            #  if it is the first layer
            if i == 0:
                W =np.random.normal(0,1,(num_hidden, x.shape[0]))
                b = np.zeros((num_hidden, 1))
            else:
                W = np.random.normal(0,1,(num_hidden, Layers[i-1]))
                b = np.zeros((num_hidden, 1))

            weights['W' + str(i+1)] = W
            weights['b' + str(i+1)] = b

        #  for the last layer
        W = np.random.normal(0,1,(dim_y, Layers[-1]))
        b = np.zeros((dim_y, 1))
        weights['W' + str(len(Layers)+1)] = W
        weights['b' + str(len(Layers)+1)] = b

    idx=np.arange(Num)
    for epoch in range(epoches):
        np.random.shuffle(idx)
        x=x[:,idx]
        y=y[:,idx]
        
        #SGD
        for i in range(Num):
            #forward pass
            out,temp=forward_pass(x[:,i],weights)
            #calculate loss
            loss=square_loss(y[:,i],out)
            #backward pass
            grad_weights=backward_pass(y[:,i],out,weights,temp)
            lr=lr_schedule(lr,epoch)
            #update the weights
            weights=update_weights(weights,grad_weights,lr)
            
            Loss.append(loss)
            
    return weights

In [3]:
#The mathematical representation of this unit is  Z[l]=W[l]A[l−1]+b[l] .
def forward_pass(x, weights):
    
    temp = dict()
    
    A = x[:, np.newaxis]
    temp['Z0'],temp["A0"] = x[:, np.newaxis],x[:, np.newaxis]
    
    #number of layers in the neural network
    for i in range(len(weights) // 2):
        W = weights['W' + str(i+1)]
        Z = np.dot(W, A)
        A = sigmoid(Z)
        
        temp['Z' + str(i+1)] = Z
        temp['A' + str(i+1)] = A

    return A, temp

In [4]:
def square_loss(y,out):
    loss = -1 * (np.dot(y, np.log(out).T) + np.dot((1-y), np.log(1-out).T))
    return loss

In [5]:
def backward_pass(y, out, weights, temp):
    grads = dict()
    
    dZ = out - y
     # ith layer: gradients.
    for i in reversed(range(len(weights)//2)):
        A = temp['A' + str(i)]
        dW = np.dot(dZ, A.T)
        db = np.sum(dZ, axis=1)
        grads['dW' + str(i+1)] = dW
        grads['db' + str(i+1)] = db
        
        W = weights['W' + str(i+1)]
        dA_prev = np.dot(W.T, dZ)
        Z_prev = temp['Z'+str(i)]
        dZ_prev = dA_prev * sigmoid_prime(Z_prev)
        dZ = dZ_prev

    return grads


In [6]:
def update_weights(weights, grads, lr):
    
    for i in range(len(weights) // 2):

        weights['W' + str(i+1)] =weights['W' + str(i+1)] - lr * grads['dW' + str(i+1)]
        weights['b' + str(i+1)] = weights['b' + str(i+1)]- lr * grads['db' + str(i+1)]

        weights['W' + str(i+1)] = weights['W' + str(i+1)]
        weights['b' + str(i+1)] = weights['b' + str(i+1)]

    return weights

In [7]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [8]:
# schedule function for learning rate
def lr_schedule(r0, t):
    d = 0.1
    r = r0 / (1 + (r0/d)*t)
    return r

In [9]:
def pred(x, weights):
    A = x
    for i in range(len(weights) // 2):
        W = weights['W' + str(i+1)]
        Z = np.dot(W, A)
        A = sigmoid(Z)

    predictions = np.where(A >= 0.5, 1, 0)
    return predictions

def error(y, out):
    m = y.shape[1]
    err=1-np.sum(y==out) / m
    return err

In [10]:
train_data = []
with open('train.csv', 'r') as f:
    for term in f:
        train_data.append(term.strip().split(','))
        
test_data= []
with open('test.csv', 'r') as f:
    for term in f:
        test_data.append(term.strip().split(','))

In [11]:
train_data = np.array(train_data, dtype='float64')
test_data = np.array(test_data, dtype='float64')

In [12]:
train_x= train_data[:, :-1]
train_y = train_data[:, -1].astype(int)
train_y  = train_y [:, np.newaxis]
train_x, train_y = train_x.T, train_y.T
test_x= test_data[:, :-1]
test_y = test_data[:, -1].astype(int)
test_y  = test_y[:, np.newaxis]
test_x, test_y = test_x.T, test_y.T

In [13]:
widths = [5, 10, 25, 50, 100]
for w in widths:
    weights=stochestic_nn(train_x,train_y,epoches=10,lr=0.1,lr_schedule=lr_schedule,initial_type=0,Layers=[w,w])
    pred_train = pred(train_x, weights)
    pred_test = pred(test_x, weights)
    train_error= error(train_y, pred_train)
    test_error = error(test_y, pred_test)
    print(w, 'Train Error', train_error, 'Test Error', test_error)

5 Train Error 0.4461009174311926 Test Error 0.44199999999999995
10 Train Error 0.4461009174311926 Test Error 0.44199999999999995
25 Train Error 0.4461009174311926 Test Error 0.44199999999999995
50 Train Error 0.4461009174311926 Test Error 0.44199999999999995
100 Train Error 0.4461009174311926 Test Error 0.44199999999999995


In [14]:
widths = [5, 10, 25, 50, 100]
for w in widths:
    weights=stochestic_nn(train_x,train_y,epoches=10,lr=0.1,lr_schedule=lr_schedule,initial_type=1,Layers=[w,w])
    pred_train = pred(train_x, weights)
    pred_test = pred(test_x, weights)
    train_error= error(train_y, pred_train)
    test_error = error(test_y, pred_test)
    print(w, 'Train Error', train_error, 'Test Error', test_error)

5 Train Error 0.008027522935779796 Test Error 0.018000000000000016
10 Train Error 0.0011467889908256534 Test Error 0.0020000000000000018
25 Train Error 0.002293577981651418 Test Error 0.0040000000000000036
50 Train Error 0.0 Test Error 0.0
100 Train Error 0.0 Test Error 0.0
