In [8]:
import numpy as np
import pandas as pd

In [9]:
# Sigmoid function
def sigmoid(x):
        return 1/(1+np.exp(-x))

In [10]:
# Derivative of sigmoid function
def sigmoid_derivative(x):
    return x*(1-x)

In [11]:
class NeuralNetwork:
    def __init__(self, layers, alpha=0.1):
		# Layer model example [2,2,1]
        self.layers = layers 
      
        # Learning rate parameter
        self.alpha = alpha
		
        # W, b parameters
        self.W = []                                         # Other theta
        self.b = []                                         # Theta of bias

        # Init parameters each layers
        for i in range(0, len(layers)-1):
            w_ = np.random.randn(layers[i], layers[i+1])
            b_ = np.zeros((layers[i+1], 1))
            self.W.append(w_/layers[i])
            self.b.append(b_)

    # Summary NN model
    def __repr__(self):
        return "Neural network [{}]".format("-".join(str(l) for l in self.layers))

        # Train model with data
    def fit_partial(self, x, y):
        A = [x]
        
        # Feedforward
        out = A[-1]
        for i in range(0, len(self.layers) - 1):
            out = sigmoid(np.dot(out, self.W[i]) + (self.b[i].T))
            A.append(out)
        
        # Backpropagation
        y = y.reshape(-1, 1)
        dA = [-(y/A[-1] - (1-y)/(1-A[-1]))]
        dW = []
        db = []
        for i in reversed(range(0, len(self.layers)-1)):
            dw_ = np.dot((A[i]).T, dA[-1] * sigmoid_derivative(A[i+1]))
            db_ = (np.sum(dA[-1] * sigmoid_derivative(A[i+1]), 0)).reshape(-1,1)
            dA_ = np.dot(dA[-1] * sigmoid_derivative(A[i+1]), self.W[i].T)
            dW.append(dw_)
            db.append(db_)
            dA.append(dA_)
        
        # Reverse dW, db
        dW = dW[::-1]
        db = db[::-1]
        
		# Gradient descent
        for i in range(0, len(self.layers)-1):
            self.W[i] = self.W[i] - self.alpha * dW[i]
            self.b[i] = self.b[i] - self.alpha * db[i]

    # epochs: number of times fit the data to calculate gradient descent  
    # verbose: after how many epochs, then print the loss.
    def fit(self, X, y, epochs=20, verbose=10):                 
        for epoch in range(0, epochs):
            self.fit_partial(X, y)
            if epoch % verbose == 0:
                loss = self.calculate_loss(X, y)
                print("Epoch {}, loss {}".format(epoch, loss))
    
	# Prediction
    def predict(self, X):
        for i in range(0, len(self.layers) - 1):
            X = sigmoid(np.dot(X, self.W[i]) + (self.b[i].T))
        return X

	# Calculate loss function
    def calculate_loss(self, X, y):
        y_predict = self.predict(X)
        #return np.sum((y_predict-y)**2)/2
        return -(np.sum(y*np.log(y_predict) + (1-y)*np.log(1-y_predict))) 

In [12]:
data = pd.read_csv('dataset.csv').values
N, d = data.shape # Salary and working time
X = data[:, 0:d-1].reshape(-1, d-1)
y = data[:, 2].reshape(-1, 1)

In [13]:
p = NeuralNetwork([X.shape[1], 2, 1], 0.1)

In [14]:
X.shape[1]

2

In [15]:
p.fit(X, y, 10000, 100)

Epoch 0, loss 13.380891249149206
Epoch 100, loss 11.414507087965601
Epoch 200, loss 9.515272531930922
Epoch 300, loss 18.728518468545577
Epoch 400, loss 15.776248754072032
Epoch 500, loss 8.6447424882366
Epoch 600, loss 9.547556855511301
Epoch 700, loss 8.518028108627805
Epoch 800, loss 5.007415087721405
Epoch 900, loss 3.6462893191619554
Epoch 1000, loss 0.5942339334878142
Epoch 1100, loss 0.17773240229872955
Epoch 1200, loss 0.10331702166861992
Epoch 1300, loss 0.07267521926174804
Epoch 1400, loss 0.05603598804426108
Epoch 1500, loss 0.045600992541337905
Epoch 1600, loss 0.038449081424042346
Epoch 1700, loss 0.03324234799005168
Epoch 1800, loss 0.029282338095831476
Epoch 1900, loss 0.026169053321460207
Epoch 2000, loss 0.02365701206637165
Epoch 2100, loss 0.021587243790360453
Epoch 2200, loss 0.01985227884672915
Epoch 2300, loss 0.018376860966527377
Epoch 2400, loss 0.017106721100495874
Epoch 2500, loss 0.016001738900358223
Epoch 2600, loss 0.015031614019843154
Epoch 2700, loss 0.014