In [80]:
import numpy as np
from itertools import product

In [242]:
tmp = [[0,1]]

X = np.array([[i,j] for i,j in product(*tmp, repeat=2)])
y_nand = np.array([[1],[1],[1],[0]])
y_or = np.array([[0],[1],[1],[1]])
y_and = np.array([[0],[0],[0],[1]])
y_xor = np.array([[0],[1],[1],[0]])

In [108]:
X

array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]])

In [96]:
def step(x):
    return np.where(x>0, 1, 0)

def linear(x):
    return x

def sigmoid(x):
    return 1/(1+np.exp(-x))

class Activation:
    def __init__(self, method):
        self.method = method
        
    def __call__(self, x):
        if self.method == 'step':
            result = step(x)
        elif self.method == 'sigmoid':
            result = sigmoid(x)
        elif self.method == 'linear':
            result = linear(x)
        
        return result

In [83]:
Activation('step')(np.dot(X, np.random.randn(2,1)))

array([[0],
       [0],
       [1],
       [1]])

In [84]:
## 1. 가설함수
## 2. 예측
## 3. 예측결과 실제값을 비교함수(LOSS함수) --> 변수(W,b) 미분
## 4. loss함수를 수치적 미분
## 5. 미분값을 하강 --> gradient descent
## 6. 2~5번 반복

In [85]:
## X와 W(weight) 내적연산 후 b를 더한다
## 활성화함수(activation Function) 적용
#2
def predict(*args):
    W, b = args
    result = np.dot(X,W) + b
    result = Activation('sigmoid')(result)
    return result

In [86]:
W = np.random.randn(2,1)
b = 0
predict(W,b)

array([[0.5       ],
       [0.62579495],
       [0.72737021],
       [0.81690829]])

In [121]:
#3 loss function
def mean_squared_error(*args):
    y_hat = predict(*args)
    return np.mean(np.square(y_nand-y_hat))

def binary_crossentropy(*args):
    y_hat = predict(*args)
    return -np.mean(y_xor*np.log(y_hat) + (1-y_xor)*np.log(1-y_hat))


In [88]:
mean_squared_error(W,b)

0.2829238923797043

In [89]:
#4
def grad(loss, *args):
    h = 1e-4
    W, b = args
    b = np.array([b])
    grad_W =np.zeros_like(W)
    rows = range(W.shape[0])
    cols = range(W.shape[1])
    for row in rows:
        for col in cols:
            tmp = W[row, col] ## 편미분 시 나머지 값들을 미분하기 위해 현재 값을 저장
            fx = loss(W,b)
            W[row,col] += h
            fxh = loss(W,b)
            dW = (fxh - fx)/h
            grad_W[row,col] = dW
            W[row, col] = tmp ## 1번의 값을 대체
            
    grad_b = np.zeros_like(b)
    for i in range(b.size):
        tmp = b[i]
        fx = loss(W,b)
        b[i] += h
        fxh = loss(W,b)
        db = (fxh-fx)/h
        grad_b[i] = db
        b[i] = tmp
        
    return grad_W, grad_b

In [90]:
grad(mean_squared_error , W, b)

(array([[0.03406073],
        [0.01727783]]),
 array([0]))

In [127]:
# def gradient_descent(epochs, learning_rate, func):
#     for i in range(epochs):
#         print(W,b)
#         W -= learning_rate*grad(mean_squared_error , W, b)[0]
#         b -= learning_rate*grad(mean_squared_error , W, b)[1][0]
#         print(W,b)

In [124]:
np.random.seed(10)
W = np.random.randn(2,1)
b = 0
epochs = 10000
learning_rate = 1e-2

for _ in range(epochs):
    W -= learning_rate*grad(binary_crossentropy , W, b)[0]
    b -= learning_rate*grad(binary_crossentropy , W, b)[1][0]
#     # print(binary_crossentropy(W,b))
#     # print(W,b)

In [125]:
np.where(predict(W,b)>0.5, 1, 0)

array([[0],
       [0],
       [0],
       [1]])

In [113]:
W

array([[-2.9313222 ],
       [-2.93848774]])

In [114]:
b

4.623067461007704

In [251]:
class Net:
    def __init__(self, x, y, activation):
        self.x = x
        self.W = np.random.randn(x.shape[1],1)
        self.b = np.array(x.shape[1]).reshape(-1,1).astype('float32')
        self.y = y
        self.activation = activation
        self.count = 0
    
    def predict(self, x=None):
        if x == None:
            self.y_pred = np.dot(self.x, self.W) + self.b
            self.y_pred = Activation(self.activation)(self.y_pred)
            self.count += 1
            print(f'predict =========>{self.count}')
        else:
            self.y_pred = np.dot(x, self.W) + self.b
            self.y_pred = Activation(self.activation)(self.y_pred)
            self.y_pred = np.where(self.y_pred > 0.5, 1, 0)
        return self.y_pred
    
    def loss(self):
        y_hat = self.predict()
        return -np.mean(self.y*np.log(y_hat) + (1-self.y)*np.log(1-y_hat))
    
    def grad(self):
        h = 1e-4
        self.grad_W =np.zeros_like(W)
        rows = range(W.shape[0])
        cols = range(W.shape[1])
        for row in rows:
            for col in cols:
                tmp = self.W[row, col] 
                fx = self.loss()
                self.W[row,col] += h
                fxh = self.loss()
                dW = (fxh - fx)/h
                self.grad_W[row,col] = dW
                self.W[row, col] = tmp 

        self.grad_b = np.zeros_like(self.b)
        for i in range(b.size):
            tmp = self.b[i]
            fx = self.loss()
            self.b[i] += h
            fxh = self.loss()
            db = (fxh-fx)/h
            self.grad_b[i] = db
            self.b[i] = tmp
        
        return self.grad_W, self.grad_b
    
    
    
    def gradient_descent(self, epochs, learning_rate):
        for _ in range(epochs):
            self.W -= learning_rate*self.grad()[0]
            self.b -= learning_rate*self.grad()[1][0]
        

In [251]:
class Net:
    def __init__(self, x, y):
        self.x = x
    
    def add(self, W):
        
    
    def predict(self, x=None):
        if x == None:
            self.y_pred = np.dot(self.x, self.W) + self.b
            self.y_pred = Activation(self.activation)(self.y_pred)
            self.count += 1
            print(f'predict =========>{self.count}')
        else:
            self.y_pred = np.dot(x, self.W) + self.b
            self.y_pred = Activation(self.activation)(self.y_pred)
            self.y_pred = np.where(self.y_pred > 0.5, 1, 0)
        return self.y_pred
    
    def loss(self):
        y_hat = self.predict()
        return -np.mean(self.y*np.log(y_hat) + (1-self.y)*np.log(1-y_hat))
    
    def grad(self):
        h = 1e-4
        self.grad_W =np.zeros_like(W)
        rows = range(W.shape[0])
        cols = range(W.shape[1])
        for row in rows:
            for col in cols:
                tmp = self.W[row, col] 
                fx = self.loss()
                self.W[row,col] += h
                fxh = self.loss()
                dW = (fxh - fx)/h
                self.grad_W[row,col] = dW
                self.W[row, col] = tmp 

        self.grad_b = np.zeros_like(self.b)
        for i in range(b.size):
            tmp = self.b[i]
            fx = self.loss()
            self.b[i] += h
            fxh = self.loss()
            db = (fxh-fx)/h
            self.grad_b[i] = db
            self.b[i] = tmp
        
        return self.grad_W, self.grad_b
    
    
    
    def gradient_descent(self, epochs, learning_rate):
        for _ in range(epochs):
            self.W -= learning_rate*self.grad()[0]
            self.b -= learning_rate*self.grad()[1][0]
        

In [252]:
network = Net(X, y_and, 'sigmoid')

In [253]:
network.W

array([[0.98132079],
       [0.51421884]])

In [254]:
network.loss()



1.9449121042777426

In [255]:
network.gradient_descent(1, 1e-2)



IndexError: index 1 is out of bounds for axis 1 with size 1

In [221]:
network.predict([[1,1]])

array([[1]])

In [None]:
isinstance([1], None)

In [176]:
network.predict(X)

array([[0.88079708],
       [0.98769013],
       [0.90602773],
       [0.99053868]])

In [243]:
np.random.seed(100)
w_nand = np.random.randn(2,1)
b_nand = 0
w_or = np.random.randn(2,1)
b_or = 0

s1 = np.dot(X, w_nand) + b_nand
s2 = np.dot(X, w_or) + b_or

In [245]:
np.random.seed(100)
w_nand = np.random.randn(2,1)
w_or = np.random.randn(2,1)
W = np.c_[w_nand, w_or]
s_ = np.dot(X,W) + np.array([b_nand, b_or])

In [248]:
s = np.hstack([s1,s2])

In [249]:
s

array([[ 0.        ,  0.        ],
       [ 0.3426804 , -0.25243604],
       [-1.74976547,  1.1530358 ],
       [-1.40708507,  0.90059977]])

In [250]:
s_

array([[ 0.        ,  0.        ],
       [ 0.3426804 , -0.25243604],
       [-1.74976547,  1.1530358 ],
       [-1.40708507,  0.90059977]])