In [1]:
!pip install import-ipynb



In [3]:
import import_ipynb
import numpy as np
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from util import get_binary_data, sigmoid, sigmoid_cost, error_rate, relu


In [57]:
class ANN(object):
    def __init__(self, M):
        self.M = M
        
    def forward(self, X):
        Z = relu(X.dot(self.W1) + self.b1)
        output = sigmoid(Z.dot(self.W2) + self.b2)
        return output, Z
        
    def predict(self, X):
        pY, _ = self.forward(X)
        return np.round(pY)
    
    def score(self, X, Y):
        prediction = self.predict(X)
        return 1 - error_rate(Y, prediction)
        
    def fit(self, Xtrain, Ytrain, Xtest, Ytest, learning_rate=10e-7, reg=1.0, epochs=10000, show_fig=False, valid_len=0.2):
#         X, Y = shuffle(X, Y)
#         Xvalid, Yvalid = X[-int(valid_len*len(X)):], Y[-int(valid_len*len(X)):]
        
        D = Xtrain.shape[1]
        N = Xtrain.shape[0]
        
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M) / np.sqrt(self.M)
        self.b2 = 0
        
        costs = []
        best_validation_error = 1
        
        for i in range(epochs):
            pY, hidden = self.forward(Xtrain)
            
            self.W2 -= learning_rate*(hidden.T.dot(pY - Ytrain) + reg*self.W2)
            self.b2 -= learning_rate*((pY - Ytrain).sum() + reg*self.b2)
            
            dZ = np.outer((pY - Ytrain), self.W2) * (hidden > 0)
            self.W1 -= learning_rate*(Xtrain.T.dot(dZ) + reg*self.W1)
            self.b1 -= learning_rate*(np.sum(dZ, axis=0) + reg*self.b1)
            
            if i%20 == 0:
                pYvalid, _ = self.forward(Xtest)
                c = sigmoid_cost(Ytest, pYvalid)
                costs.append(c)
                b = error_rate(Ytest, np.round(pYvalid))
                if b < best_validation_error:
                    best_validation_error = b
                print(f'cost: {c}, i: {i}')
        print(f'best validation error: {best_validation_error}')
        
        if show_fig:
            plt.plot(costs)
            plt.show()

In [58]:
def main():
    Xtrain, Ytrain, Xtest, Ytest = get_binary_data()
    
    X0 = Xtrain[Ytrain != 1, :]
    Y0 = Ytrain[Ytrain != 1]
    X1 = Xtrain[Ytrain == 1, :]
    Y1 = Ytrain[Ytrain == 1]
    X1 = np.repeat(X1, 9, axis=0)

    Xtrain = np.vstack([X0, X1])
    Ytrain = np.concatenate((Y0, [1]*X1.shape[0]))

    print(Xtrain.shape)
    print(Ytrain.shape)
    
    model = ANN(100)
    model.fit(Xtrain, Ytrain, Xtest, Ytest, show_fig=True)

In [59]:
if __name__ == '__main__':
    main()

(8036, 2304)
(8036,)
cost: 670.8472989171886, i: 0
cost: 705.7279286589844, i: 20
cost: 699.0739972485806, i: 40
cost: 693.9908169037196, i: 60
cost: 690.4511295692537, i: 80
cost: 686.6981287301313, i: 100
cost: 681.6730075545743, i: 120
cost: 677.7550984239674, i: 140
cost: 674.3866933980612, i: 160
cost: 671.0716297223375, i: 180
cost: 668.0437463770678, i: 200
cost: 664.8604026506003, i: 220
cost: 661.9441185655076, i: 240
cost: 658.2352131058913, i: 260
cost: 655.2470700964259, i: 280
cost: 652.5662164640844, i: 300
cost: 650.1115275662288, i: 320
cost: 647.9708821190445, i: 340
cost: 645.7725267371917, i: 360
cost: 643.3084927209444, i: 380
cost: 641.0411844291747, i: 400
cost: 638.7500656344083, i: 420
cost: 635.8660065017096, i: 440
cost: 633.8726295412779, i: 460
cost: 631.8629468224715, i: 480
cost: 630.3902487131957, i: 500
cost: 628.4732474020163, i: 520
cost: 625.6534483959474, i: 540
cost: 623.4740651357288, i: 560
cost: 621.9653315064115, i: 580
cost: 620.0354432462732, 

KeyboardInterrupt: 