In [3]:
import numpy as np

In [4]:
import pandas as pd

In [5]:
d = pd.read_csv('dataset.csv').values
X = d[:, 0:2].reshape(-1, 2)
y = d[:, 2].reshape(-1, 1)

In [11]:
def sigmoid(X):
    return 1/(1+np.exp(-X))


def sigmoid_grad(X):
    return sigmoid(X) * (1-sigmoid(X))


def crossentropy_loss(Yhat, y):
    return -np.mean(y*np.log(Yhat) + (1-y)*np.log(1-Yhat))


def predict(X, W1, b1, W2, b2):
    Z1 = X.dot(W1) + b1
    A1 = sigmoid(Z1)
    Z2 = A1.dot(W2) + b2
    A2 = sigmoid(Z2)
    return A2


def fit(X, y, W1, b1, W2, b2, eta):
    loss_hist = []
    N = X.shape[0]
    for i in range(20000):
        # feedforward
        Z1 = X.dot(W1) + b1 # shape N, d1
        A1 = sigmoid(Z1) # shape N, d1
        Z2 = A1.dot(W2) + b2 # shape N, d2
        A2 = sigmoid(Z2) # shape N, d2

        if i % 4000 == 0:
            loss = crossentropy_loss(A2, y)
            print("loss:", loss)
            loss_hist.append(loss)
            # print(W1)
            # print(W2)

        # back propagation
        # dA = -(y/A2 - (1-y)/(1-A2)) # shape N, d2
        E2 = (A2-y)/N # shape N, d2
        dW2 = np.dot(A1.T, E2) # shape d1, d2
        db2 = np.sum(E2, axis=0) # shape d2
        E1 = np.dot(E2, W2.T) * sigmoid_grad(Z1) # shape N, d1
        dW1 = np.dot(X.T, E1) # shape d0, d1
        db1 = np.sum(E1, axis=0) # shape d1

        # gd update
        W1 -= eta*dW1
        b1 -= eta*db1
        W2 -= eta*dW2
        b2 -= eta*db2
    
    return (W1, b1, W2, b2, loss_hist)

In [12]:
def init(d0, d1, d2):
    W1 = 0.01*np.random.randn(d0, d1)
    b1 = np.zeros(d1)
    W2 = 0.01*np.random.randn(d1, d2)
    b2 = np.zeros(d2)
    return (W1, b1, W2, b2)

In [24]:
d0 = 2
d1 = 1
d2 = 1
eta = 1
(W1, b1, W2, b2) = init(d0, d1, d2)

In [25]:
(W1, b1, W2, b2, loss_hist) = fit(X, y, W1, b1, W2, b2, eta)

loss: 0.6931443056624312
loss: 0.0018736487267469125
loss: 0.0008352170778195561
loss: 0.0005342999975956752
loss: 0.0003917817833780053


In [26]:
y_pred = predict(X, W1, b1, W2, b2)
print(y_pred)

[[9.99875027e-01]
 [9.99872464e-01]
 [9.99873254e-01]
 [9.99412821e-01]
 [9.99876087e-01]
 [9.99088984e-01]
 [9.99876095e-01]
 [9.99876024e-01]
 [9.99834679e-01]
 [9.99875636e-01]
 [4.65822922e-04]
 [2.09713981e-04]
 [3.84289875e-04]
 [4.82777147e-04]
 [3.73364780e-04]
 [3.84289875e-04]
 [1.78020022e-04]
 [1.83979107e-04]
 [2.38529373e-04]
 [7.35630890e-04]]


In [16]:
from sklearn.metrics import accuracy_score

In [27]:
print(100*accuracy_score(y, np.round(y_pred)))

100.0


In [28]:
print(np.round(y_pred))

[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
