In [79]:
# x1 x2 -> z=w1x1 + w2x2 + b -> a=sigmoid(z)
# l=-(yloga + (1-y)log(1-a)) -> da=-(y*(1/a) + (1-y)*(1/(1-a))*-1)=-(y/a - (1-y)/(1-a)) -> dz=da*(a*(1-a))=a-y -> dw1=dzx1=(a-y)x1 db=a-y

import numpy as np
np.random.seed(1337)

class Model:
    def __init__(self, learn_rate):
        self.__learn_rate = learn_rate
        self.__layers = []
    
    def add_layer(self, layer):
        self.__layers.append(layer)
    
    def train(self, X, Y, epoch, batch=None ):
        if batch is None:
            batch = X.shape[0]
            
        for i in range(epoch):
            bi = 0
            while bi < X.shape[0]:
                if bi+batch < X.shape[0]:
                    X_batch = X[bi:bi+batch]
                    Y_batch = Y[bi:bi+batch]
                    bi = bi+batch
                else:
                    X_batch = X[bi:]
                    Y_batch = Y[bi:]
                    bi = X.shape[0]

                A = self.predict(X_batch)
    #             print("A: ", A.shape, A[:5])
                loss = np.sum(-(Y_batch*np.log(A) + (1-Y_batch)*np.log(1-A)))/Y_batch.shape[0]
                print("loss for %s: %s" % (i,loss))
                dA = -(Y_batch/A - (1-Y_batch)/(1-A))            
                for l in self.__layers[::-1]:
                    dA = l.backward(dA, self.__learn_rate)                

        for l in self.__layers:
            l.clean() 
    
    def evaluate(self, X, Y):
        A = self.predict(X)
        A1 = (A>0.5).astype(np.int)
        Y1 = (A1==Y).astype(np.int)
        print("Accurace: %s" % (np.count_nonzero(Y1)/Y1.shape[0]))
        
    def predict(self, X):
        for l in self.__layers:
            X = l.forward(X)
        return X
        
    def show(self):
        for l in self.__layers:
            l.show()
            
class Layer:
    def __init__(self, name, input_size, output_size):
        self.__name = name
        self.__W = 0.01*np.random.rand(input_size, output_size)
        self.__B = np.zeros((1, output_size))
    
    def clean(self):
        self.__X = None
        self.__A = None
    
    def forward(self, X):
        self.__X = X
        Z = np.dot(X, self.__W) + self.__B
#         print("Z:", Z[:5])
        A = 1/(1+np.exp(-Z))
#         print("A:", A[:5])
        self.__A = A
        return A
    
    def backward(self, dA, learn_rate):
#         print("dA: ", dA.shape)
        dZ = dA*self.__A*(1-self.__A)
#         print("dZ: ", dZ.shape)
        dW = np.dot(self.__X.T, dZ)/self.__X.shape[0]
#         print("dW: ", dW.shape)
        dB = (dZ.sum(axis=0)/self.__X.shape[0]).reshape(self.__B.shape)
        dX = np.dot(dZ, self.__W.T)
#         print("dX: ", dX.shape)        
        self.__W = self.__W - learn_rate*dW
#         print("dB:", dB.shape)
        self.__B = self.__B - learn_rate*dB
        return dX
    
    def show(self):
        print("%s W:" % self.__name)
        print(self.__W)
        print("%s B:" % self.__name)
        print(self.__B)
        print("\n")


In [87]:
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits

%matplotlib inline

X, Y = load_digits(n_class=2, return_X_y=True)
Y = Y.reshape((Y.shape[0],1))
print("X: ", X.shape, X[:5])
print("Y: ", Y.shape, Y[:5])

X_train, Y_train = X[:300], Y[:300]
X_test, Y_test = X[300:], Y[300:]

model = Model(0.01)
model.add_layer(Layer("L1", 64, 1))
# model.add_layer(Layer("L2", 16, 1))

model.train(X_train, Y_train, 10)
model.evaluate(X_test, Y_test)

X:  (360, 64) [[  0.   0.   5.  13.   9.   1.   0.   0.   0.   0.  13.  15.  10.  15.
    5.   0.   0.   3.  15.   2.   0.  11.   8.   0.   0.   4.  12.   0.
    0.   8.   8.   0.   0.   5.   8.   0.   0.   9.   8.   0.   0.   4.
   11.   0.   1.  12.   7.   0.   0.   2.  14.   5.  10.  12.   0.   0.
    0.   0.   6.  13.  10.   0.   0.   0.]
 [  0.   0.   0.  12.  13.   5.   0.   0.   0.   0.   0.  11.  16.   9.
    0.   0.   0.   0.   3.  15.  16.   6.   0.   0.   0.   7.  15.  16.
   16.   2.   0.   0.   0.   0.   1.  16.  16.   3.   0.   0.   0.   0.
    1.  16.  16.   6.   0.   0.   0.   0.   1.  16.  16.   6.   0.   0.
    0.   0.   0.  11.  16.  10.   0.   0.]
 [  0.   0.   1.   9.  15.  11.   0.   0.   0.   0.  11.  16.   8.  14.
    6.   0.   0.   2.  16.  10.   0.   9.   9.   0.   0.   1.  16.   4.
    0.   8.   8.   0.   0.   4.  16.   4.   0.   8.   8.   0.   0.   1.
   16.   5.   1.  11.   3.   0.   0.   0.  12.  12.  10.  10.   0.   0.
    0.   0.   1.  10.  13.   3.   0.