In [81]:
import pandas as pd
import numpy as np

In [82]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [83]:
def sigmoid_grad(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [84]:
def forward_prop(X, theta1, theta2):
    a2 = sigmoid(X @ theta1.T)
    te = np.ones(shape = (a2.shape[0], 1))
    a2 = np.append(te, a2, axis = 1)
    a3 = sigmoid(theta2 @ a2.T)
    return a3 # a3 shape = 3 x 210

In [222]:
def cost(X, y_new, theta1, theta2, lamda):
    hx = forward_prop(X, theta1, theta2)
    J = np.sum(np.sum(y_new * np.log(hx) + (1 - y_new) * np.log(1 - hx)))
    J = (-1 * J) / len(Y)
    reg = lamda * (np.sum(np.sum(theta1[:, 1:] * theta1[:, 1:])) + np.sum(np.sum(theta2[:, 1:] * theta2[:, 1:]))) / (2 * len(Y))
    return J + reg

In [227]:
def back_prop(X, Y, y_new, theta1, theta2, lamda):
    theta1_grad = np.zeros(theta1.shape);
    theta2_grad = np.zeros(theta2.shape);
    for i in range(len(Y)):
        a1 = X[i].reshape((1, X.shape[1]))
        z2 = a1 @ theta1.T
        a2 = sigmoid(z2)  # a2 Shape = 1 x 7
        te = np.ones(shape = (a2.shape[0], 1)) 
        a2 = np.append(te, a2, axis = 1) # a2 Shape = 1 x 8
        a3 = sigmoid(theta2 @ a2.T)
        delta3 = a3 - y_new[:, i].reshape((3, 1))
        z2 = np.append(te, z2, axis = 1) # z2 Shape = 1 x 8
        delta2 = (theta2.T @ delta3) * sigmoid_grad(z2.T)
        delta2 = delta2[1:].reshape((7, 1))
        theta2_grad += delta3 @ a2
        theta1_grad += delta2 @ a1
    theta2_grad /= len(Y)
    theta1_grad /= len(Y)
    theta1_grad[:, 1:] += lamda * theta1[:, 1:] / len(Y)
    theta2_grad[:, 1:] += lamda * theta2[:, 1:] / len(Y)
    return theta1_grad, theta2_grad

In [229]:
def train(X, Y, theta1, theta2, l_rate, epochs, lamda):
    y_new = np.zeros(shape = (3, X.shape[0]))
    for i in range(X.shape[0]):
        y_new[int(Y[i])-1][i] = 1
    for epoch in range(1, epochs+1):
        temp1, temp2 = back_prop(X, Y, y_new, theta1, theta2, lamda)
        theta1 -= l_rate * temp1
        theta2 -= l_rate * temp2
        J = cost(X, y_new, theta1, theta2, lamda)
        if (epoch) % 500 == 0:
            print("epoch " + str(epoch) + ": " + str(J))
    return theta1, theta2

In [127]:
df = pd.read_csv('seeds_dataset.txt', header = None).values
df.shape

(210, 8)

In [128]:
X = df[:, :-1] # Shape = 210 x 7
Y = df[:, -1].reshape((210, 1)) # Shape = 210 x 1

In [129]:
# normalise
for i in range(X.shape[1]):
    X.T[i] = (X.T[i] - X.T[i].mean()) / np.std(X.T[i])
# X
x = np.ones(shape = (X.shape[0], 1))
X = np.append(x, X, axis = 1)
X.shape

(210, 8)

In [237]:
# Hyper parameters
lamda = 0.01
inp_lyr = 7
hid_lyr = 7
out_lyr = 3
l_rate = 0.2
epochs = 5000
epsil = 10 ** (-4)
theta1 = (2 * np.random.rand(hid_lyr, inp_lyr + 1) - 1) * epsil #theta1 shape = 7 x 8
theta2 = (2 * np.random.rand(out_lyr, hid_lyr + 1) - 1) * epsil #theta2 shape = 3 x 8

In [241]:
theta1, theta2 = train(X, Y, theta1, theta2, l_rate, epochs, 0.01)

epoch 500: 0.1270971010692014
epoch 1000: 0.11519330958521798
epoch 1500: 0.10607286033822276
epoch 2000: 0.09877737978200919
epoch 2500: 0.09277765646113696
epoch 3000: 0.08775288298109381
epoch 3500: 0.08348676662263389
epoch 4000: 0.07981749398951966
epoch 4500: 0.07662022237216712
epoch 5000: 0.07380004231184649


In [243]:
theta1, theta2

(array([[-0.89114636,  0.80694609,  0.87291342, -0.18955177, -1.72834775,
          0.65386251,  0.06799953,  1.7698029 ],
        [ 2.09344912,  1.688575  ,  1.93573577,  0.26426086,  1.84872818,
          1.30014122, -0.2198797 , -0.61559682],
        [-0.73686521, -0.46669415, -0.10218619, -0.87752754,  5.0422004 ,
         -1.23510871, -2.06111907, -4.4888921 ],
        [ 1.26288353,  1.6740876 ,  1.76606078,  1.50048815,  0.76034048,
          1.46662226, -1.47213168, -4.82865222],
        [-4.01773058, -2.7750816 , -3.45598069,  0.79944078, -3.75472975,
         -1.45369553,  0.13284491,  4.36710518],
        [-0.96426929,  2.08005386,  2.0510561 , -1.13765742, -5.28571467,
          1.02360469,  0.34763906,  3.34914268],
        [ 1.66581156, -1.76303658, -1.82435085,  0.69432792,  3.27705198,
         -0.68481398, -0.83228721, -3.08645185]]),
 array([[-4.97226287, -2.85944984,  1.38835366,  5.01783193,  4.72005294,
         -6.68952576, -6.6145334 ,  4.03582316],
        [-0.71