In [2]:
import numpy as np

# 퍼셉트론 활성화 함수

In [3]:
logistic = lambda x: 1/(1 + np.exp(-x.clip(-708, 709)))
logistic_prime = lambda x: 2 * logistic(x) * (1 - logistic(x))

relu = lambda x: np.maximum(0, x)
relu_prime = lambda x: (x > 0).astype(float)

tanh = lambda x: np.tanh(x)
tanh_prime = lambda x: logistic_prime(2*x) * 2

elu = lambda x: np.where(x>0, x, np.exp(x)-1)
elu_prime = lambda x: np.where(x>0, 1, np.exp(x))

In [4]:
ACTFUNC = {
    'logistic': (logistic, logistic_prime),
    'relu': (relu, relu_prime),
    'tanh' : (tanh, tanh_prime),
    'elu' : (elu, elu_prime)
}

# 손실함수 $L(Y, \hat{Y})$ (학습오차)

In [5]:
def residual (Y, Y_hat):
    '''L = - Y log Y_hat - (1-Y) log (1-Y_hat)'''
    eps = np.finfo(float).eps
    return - (( Y @ np.log(Y_hat.clip(eps).T)) + ((1-Y) @ np.log(Y_hat.clip(eps).T))) / Y.shape[1]
    #return -(np.dot(Y, np.log(Y_hat.clip(eps)).T) + np.dot(1-Y, np.log((1-Y_hat).clip(eps)).T)) / Y.shape[1]

def residual_prime (Y, Y_hat):
    '''dY/dY_hat'''
    eps = np.finfo(float).eps
    return - np.divide(Y, Y_hat.clip(eps)) + np.divide(1-Y, (1-Y_hat).clip(eps))


In [6]:
LOSSFUNC = {
        'residual': (residual, residual_prime)
}

In [7]:
ACTFUNC['logistic'][1](np.array([-1, 1, 94, 5, -3, -5]))

array([0.39322387, 0.39322387, 0.        , 0.01329611, 0.09035332,
       0.01329611])

In [8]:
import enum


class ANN:
    '''Artificial Neural Network Using Numpy'''

    def __init__(self, layersizes, activations, lossfunc='residual'):
        
        self.layersizes = tuple(layersizes)
        self.activations = tuple(activations)
        self.lossfunc = lossfunc

        assert len(self.layersizes)-1 == len(self.activations), \
            "NN number of layers and the activation function spec does not match"
        
        assert all(f in ACTFUNC for f in activations), \
            "Unrecognized activation function used"

        assert all(isinstance(n, int) and n>=1 for n in layersizes), \
            "Only positive integral number of perceptrons is allowed in each layer"
        
        assert lossfunc in LOSSFUNC, \
            "Unrecognized loss function used"

        # parameter, each is a row vector

        L = len(self.layersizes)
        self.Z = [None] * L
        self.w = [None] * L
        self.b = [None] * L
        self.A = [None] * L
        self.dZ = [None] * L
        self.dw = [None] * L
        self.db = [None] * L
        self.dA = [None] * L

    def init_nn(self, seed=42):
        "Initial weight create"

        np.random.seed(seed)
        sigma = 0.1
        for l, (insize, outsize) in enumerate(zip(self.layersizes, self.layersizes[1:]), 1):
            self.w[l] = np.random.randn(outsize, insize).clip(-6, 6) * sigma
            self.b[l] = np.random.randn(outsize, 1).clip(-6, 6) * sigma


    def forward(self, X):
        """Feed forward the NN using existing W and b, and overwrite the result variables A and Z
        Args:
            X (numpy.ndarray): Input data to feed forward
        """
        self.A[0] = X

        for l, funcname in enumerate(self.activations, 1):
            # Z = W A + b, with A as output from previous layer
            # W is of size rxs and A the size sxn with n the number of data instances, Z the size rxn
            # b is rx1 and broadcast to each column of Z
            g = ACTFUNC[funcname][0]
            self.Z[l] = np.dot(self.W[l], self.A[l-1]) + self.b[l]

            self.A[l] = g(self.Z[l])

        return self.A[-1]

    def backward(self, Y, Y_hat):
        """Back propagation using NN"""

        assert Y.shape[0] == self.layersizes[-1], "Output size mismatch NN"
        assert Y.shape == Y_hat.shape, "Output size mismatch reference(Y_hat)"

        self.dA[-1] = LOSSFUNC[self.lossfunc][1](Y, Y_hat)

        for l, funcname in reversed(list(enumerate(self.activations, 1))):

            m = self.layersizes[l]
            g_prime = ACTFUNC[funcname][1]

            self.dZ[l] = self.dA[l] * g_prime(self.Z[l])
            self.dw[l] = np.dot(self.dZ[l], self.A[l-1].T) / m
            self.db[l] = np.sum(self.dZ[l], axis=1, keepdims=True) / m
            self.dA[l-1] = np.dot(self.w[l].T, self.dZ[l])

    def update(self, alpha):
        """Updates W and b
        Args:
            alpha (float): Learning rate
        """

        for l in range(1, len(self.W)):
            self.w[l] -= alpha * self.dw[l]
            self.b[l] -= alpha * self.db[l]

    def fit(self, X, Y, epochs, alpha, printfreq=0):
        """Train a NN
        Args:
            X: input data, of size mxn which n is the number of data instances and m the number of
                features
            Y: reference output, of size nxm which n is the number of data instances and m the size
                of each output
            alpha: the learning rate
            epochs:
        """
        self.init_nn()
        lossfunc = LOSSFUNC[self.lossfunc][0]
        # train for each epoch
        for j in range(epochs):
            self.forward(X)
            Y_hat = self.A[-1]
            self.backward(Y, Y_hat)
            self.update(alpha)
            if printfreq and j % printfreq == 0:
                loss = float(lossfunc(Y, Y_hat))
                print("Iteration {} - loss value {}".format(j, loss))
        # report loss value
        return lossfunc(Y, Y_hat)

In [18]:
import pandas as pd
from sklearn.datasets import load_boston
from warnings import filterwarnings
filterwarnings('ignore')

In [19]:
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['target'] = boston.target

df.tail(10)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
496,0.2896,0.0,9.69,0.0,0.585,5.39,72.9,2.7986,6.0,391.0,19.2,396.9,21.14,19.7
497,0.26838,0.0,9.69,0.0,0.585,5.794,70.6,2.8927,6.0,391.0,19.2,396.9,14.1,18.3
498,0.23912,0.0,9.69,0.0,0.585,6.019,65.3,2.4091,6.0,391.0,19.2,396.9,12.92,21.2
499,0.17783,0.0,9.69,0.0,0.585,5.569,73.5,2.3999,6.0,391.0,19.2,395.77,15.1,17.5
500,0.22438,0.0,9.69,0.0,0.585,6.027,79.7,2.4982,6.0,391.0,19.2,396.9,14.33,16.8
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.12,76.7,2.2875,1.0,273.0,21.0,396.9,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.9,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0
505,0.04741,0.0,11.93,0.0,0.573,6.03,80.8,2.505,1.0,273.0,21.0,396.9,7.88,11.9


In [20]:
Y = df.target
Y

0      24.0
1      21.6
2      34.7
3      33.4
4      36.2
       ... 
501    22.4
502    20.6
503    23.9
504    22.0
505    11.9
Name: target, Length: 506, dtype: float64