In [1]:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

In [2]:
epochs = 1000
lr = 0.1

In [27]:
#유틸 함수들
def sigmoid(x):
    return 1/ (1 + np.exp(-x))

def mean_squared_error(pred_y, true_y):
    return np.mean(np.sum(np.square((true_y - pred_y))))

def cross_entropy_error(pred_y, true_y):
    if true_y.ndim == 1:
        true_y = true_y.reshape(1, -1)
        pred_y = pred_y.reshape(1, -1)
        
    delta = 1e-7
    return -np.sum(true_y * np.log(pred_y + delta))

def cross_entropy_error_for_batch(pred_y, true_y):
    if true_y.ndim == 1:
        true_y = true_y.reshape(1, -1)
        pred_y = pred_y.reshape(1, -1)
        
    delta = 1e-7
    batch_size = pred_y.shape[0]
    return -np.sum(true_y * np.log(pred_y + delta)) / batch_size

def cross_entropy_error_for_bin(pred_y, true_y):
    return 0.5 * np.sum((-true_y * np.log(pred_y) - (1 - true_y) * np.log(1 - pred_y)))

def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    return exp_a / sum_exp_a

def differential(f, x):
    eps = 1e-5
    diff_value = np.zeros_like(x)

    for i in range(x.shape[0]):
        temp_val = x[i]
        
        x[i] = temp_val + eps
        f_h1 = f(x)
        
        x[i] = temp_val - eps
        f_h2 = f(x)
        
        diff_value[i] = (f_h1 - f_h2) / (2 * eps)
        x[i] = temp_val
        
    return diff_value

In [28]:
#신경망
class LogicGateNet():
    
    def __init__(self):
        def weight_init():
            np.random.seed(1)
            weights = np.random.randn(2)
            bias = np.random.rand(1)
            
            return weights, bias
        
        self.weights, self.bias = weight_init()
        
    def predict(self, x):
        W = self.weights.reshape(-1, 1)
        b = self.bias
        
        pred_y = sigmoid(np.dot(x, W) + b)
        return pred_y
    
    def loss(self, x, true_y):
        pred_y = self.predict(x)
        return cross_entropy_error_for_bin(pred_y, true_y)
    
    def get_gradient(self, x, t):
        def loss_grad(grad):
            return self.loss(x, t)
        
        grad_W = differential(loss_grad, self.weights)
        grad_b = differential(loss_grad, self.bias)
        
        return grad_W, grad_b
    

In [29]:
#AND Gate
AND = LogicGateNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [0], [0], [1]])

train_loss_list = list()

for i in range(epochs):
    grad_W, grad_b = AND.get_gradient(X, Y)
    
    AND.weights -= lr * grad_W
    AND.bias -= lr * grad_b
    
    loss = AND.loss(X, Y)
    train_loss_list.append(loss)
    
    if i % 100 == 99:
        print("Epoch: {}, Cost: {}, Weights: {}, Bias: {}".format(i+1, loss, AND.weights, AND.bias))

Epoch: 100, Cost: 0.6886489498071491, Weights: [1.56426876 0.79168393], Bias: [-2.14871589]
Epoch: 200, Cost: 0.4946368603064415, Weights: [2.01360719 1.71241131], Bias: [-3.07894028]
Epoch: 300, Cost: 0.3920165980757418, Weights: [2.42841657 2.29753793], Bias: [-3.79103207]
Epoch: 400, Cost: 0.3257214374791936, Weights: [2.794852   2.73235738], Bias: [-4.37257095]
Epoch: 500, Cost: 0.27863601334755067, Weights: [3.11636193 3.08408364], Bias: [-4.86571237]
Epoch: 600, Cost: 0.24328504683831248, Weights: [3.40015395 3.38235762], Bias: [-5.29433736]
Epoch: 700, Cost: 0.21572536552468008, Weights: [3.65300561 3.64264217], Bias: [-5.67349792]
Epoch: 800, Cost: 0.19363244428365756, Weights: [3.88044124 3.87412053], Bias: [-6.01340133]
Epoch: 900, Cost: 0.1755321312790001, Weights: [4.08680123 4.08279091], Bias: [-6.32133891]
Epoch: 1000, Cost: 0.1604392693330146, Weights: [4.27548114 4.27284863], Bias: [-6.6027234]


In [30]:
print(AND.predict(X))

[[0.00135483]
 [0.08867878]
 [0.08889176]
 [0.87496677]]


In [31]:
OR = LogicGateNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [1]])

train_loss_list = list()

for i in range(epochs):
    grad_W, grad_b = OR.get_gradient(X, Y)
    
    OR.weights -= lr * grad_W
    OR.bias -= lr * grad_b
    
    loss = OR.loss(X, Y)
    train_loss_list.append(loss)
    
    if i % 100 == 99:
        print("Epoch: {}, Cost: {}, Weights: {}, Bias: {}".format(i+1, loss, OR.weights, OR.bias))

Epoch: 100, Cost: 0.49580923848195635, Weights: [2.45484353 1.40566594], Bias: [-0.14439625]
Epoch: 200, Cost: 0.3398674231515118, Weights: [2.98631846 2.39448393], Bias: [-0.67661178]
Epoch: 300, Cost: 0.2573360986187996, Weights: [3.45016595 3.08431266], Bias: [-1.03721585]
Epoch: 400, Cost: 0.20630142190075948, Weights: [3.85230067 3.60865952], Bias: [-1.30598633]
Epoch: 500, Cost: 0.1716549922113493, Weights: [4.20195872 4.03000824], Bias: [-1.52060015]
Epoch: 600, Cost: 0.1466501884550824, Weights: [4.50867681 4.38171478], Bias: [-1.6994397]
Epoch: 700, Cost: 0.12779768649454676, Weights: [4.78049264 4.68334611], Bias: [-1.8527641]
Epoch: 800, Cost: 0.11310517185413338, Weights: [5.0237707 4.9472786], Bias: [-1.98691756]
Epoch: 900, Cost: 0.10135180918376233, Weights: [5.24347159 5.18181684], Bias: [-2.10611973]
Epoch: 1000, Cost: 0.09174843008614178, Weights: [5.44346811 5.39279833], Bias: [-2.21332947]


In [32]:
print(OR.predict(X))

[[0.09855987]
 [0.9600543 ]
 [0.96195283]
 [0.9998201 ]]


In [33]:
NAND = LogicGateNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[1], [1], [1], [0]])

train_loss_list = list()

for i in range(epochs):
    grad_W, grad_b = NAND.get_gradient(X, Y)
    
    NAND.weights -= lr * grad_W
    NAND.bias -= lr * grad_b
    
    loss = NAND.loss(X, Y)
    train_loss_list.append(loss)
    
    if i % 100 == 99:
        print("Epoch: {}, Cost: {}, Weights: {}, Bias: {}".format(i+1, loss, NAND.weights, NAND.bias))

Epoch: 100, Cost: 0.7911738653769252, Weights: [-0.48972722 -1.25798774], Bias: [1.74566135]
Epoch: 200, Cost: 0.5430490957885361, Weights: [-1.51545093 -1.80261804], Bias: [2.79151756]
Epoch: 300, Cost: 0.4212591302740578, Weights: [-2.14614496 -2.26642639], Bias: [3.56506179]
Epoch: 400, Cost: 0.3456117101527486, Weights: [-2.607325   -2.66303355], Bias: [4.18521187]
Epoch: 500, Cost: 0.2931298605179329, Weights: [-2.97696333 -3.00501941], Bias: [4.70528682]
Epoch: 600, Cost: 0.2543396786002071, Weights: [-3.28850585 -3.30365261], Bias: [5.1539571]
Epoch: 700, Cost: 0.22443918596775067, Weights: [-3.55912171 -3.56778782], Bias: [5.54869527]
Epoch: 800, Cost: 0.20067626330853877, Weights: [-3.7989077  -3.80411461], Bias: [5.90108417]
Epoch: 900, Cost: 0.18134125517637367, Weights: [-4.01441395 -4.01767547], Bias: [6.21926514]
Epoch: 1000, Cost: 0.1653094408173465, Weights: [-4.21019696 -4.21231432], Bias: [6.50920952]


In [34]:
print(NAND.predict(X))

[[0.99851256]
 [0.90861957]
 [0.90879523]
 [0.12861037]]


In [35]:
#단층 신경망으로는 구현이 불가능한 케이스 존재
XOR = LogicGateNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

train_loss_list = list()

for i in range(epochs):
    grad_W, grad_b = XOR.get_gradient(X, Y)
    
    XOR.weights -= lr * grad_W
    XOR.bias -= lr * grad_b
    
    loss = XOR.loss(X, Y)
    train_loss_list.append(loss)
    
    if i % 100 == 99:
        print("Epoch: {}, Cost: {}, Weights: {}, Bias: {}".format(i+1, loss, XOR.weights, XOR.bias))

Epoch: 100, Cost: 1.4026852245456056, Weights: [ 0.47012771 -0.19931523], Bias: [-0.16097708]
Epoch: 200, Cost: 1.3879445622848308, Weights: [ 0.1572739  -0.03387161], Bias: [-0.07321056]
Epoch: 300, Cost: 1.386492030048381, Weights: [0.05525161 0.00089673], Bias: [-0.03330094]
Epoch: 400, Cost: 1.3863236205351948, Weights: [0.02049628 0.00504503], Bias: [-0.01514784]
Epoch: 500, Cost: 1.3862994743646844, Weights: [0.0080051  0.00361297], Bias: [-0.00689034]
Epoch: 600, Cost: 1.3862953430687464, Weights: [0.00326661 0.00201812], Bias: [-0.00313421]
Epoch: 700, Cost: 1.3862945581495083, Weights: [0.00137938 0.00102449], Bias: [-0.00142566]
Epoch: 800, Cost: 1.38629440139037, Weights: [0.00059716 0.00049628], Bias: [-0.00064849]
Epoch: 900, Cost: 1.3862943694120307, Weights: [0.00026303 0.00023435], Bias: [-0.00029498]
Epoch: 1000, Cost: 1.386294362832352, Weights: [0.0001172  0.00010905], Bias: [-0.00013418]


In [36]:
print(XOR.predict(X))

[[0.49996646]
 [0.49999372]
 [0.49999575]
 [0.50002302]]


In [37]:
#2층 신경망으로 XOR 게이트 구현
class XORNet():
    
    def __init__(self):
        np.random.seed(1)
        
        def weight_init():
            params = {}
            params['w_1'] = np.random.randn(2)
            params['b_1'] = np.random.rand(2)
            params['w_2'] = np.random.randn(2)
            params['b_2'] = np.random.rand(1)
            return params
        
        self.params = weight_init()
        
    def predict(self, x):
        W_1, W_2 = self.params['w_1'].reshape(-1,1), self.params['w_2'].reshape(-1,1)
        B_1, B_2 = self.params['b_1'], self.params['b_2']
        
        A1 = np.dot(x, W_1) + B_1
        Z1 = sigmoid(A1)
        A2 = np.dot(Z1, W_2) + B_2
        pred_y = sigmoid(A2)
        
        return pred_y
    
    def loss(self, x, true_y):
        pred_y = self.predict(x)
        return cross_entropy_error_for_bin(pred_y, true_y)
    
    def get_gradient(self, x, t):
        def loss_grad(grad):
            return self.loss(x, t)
        
        grads = {}
        grads['w_1'] = differential(loss_grad, self.params['w_1'])
        grads['b_1'] = differential(loss_grad, self.params['b_1'])
        grads['w_2'] = differential(loss_grad, self.params['w_2'])
        grads['b_2'] = differential(loss_grad, self.params['b_2'])
        
        return grads

In [40]:
lr = 0.3

XOR = XORNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

train_loss_list = list()

for i in range(epochs):
    grads = XOR.get_gradient(X, Y)
    
    for key in ('w_1', 'b_1', 'w_2', 'b_2'):
        XOR.params[key] -= lr * grads[key]
        
    loss = XOR.loss(X, Y)
    train_loss_list.append(loss)
    
    if i % 100 == 99:
        print('Epoch: {}, cost: {}'.format(i+1, loss))

Epoch: 100, cost: 1.3535614442470036
Epoch: 200, cost: 1.2827154568316697
Epoch: 300, cost: 0.8968907892186366
Epoch: 400, cost: 0.33871971411928997
Epoch: 500, cost: 0.18121344476191775
Epoch: 600, cost: 0.11991186457358068
Epoch: 700, cost: 0.08861936864741338
Epoch: 800, cost: 0.06992180653088811
Epoch: 900, cost: 0.0575804135303371
Epoch: 1000, cost: 0.04886093568413276


In [41]:
print(XOR.predict(X))

[[0.0217367 ]
 [0.96884394]
 [0.97816819]
 [0.0217794 ]]
