In [619]:
import numpy as np

In [620]:
from sklearn.datasets import load_iris

In [621]:
X = load_iris()["data"]
y = load_iris()["target"]

In [622]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [623]:
def softmax(x):
    c = np.max(x)
    x = x-c
    return np.exp(x)/np.sum(np.exp(x))

In [624]:
# def softmax(a):
#     exp_a = np.exp(a)
#     sum_exp_a = np.sum(exp_a)
#     y = exp_a / sum_exp_a
    
#     return y

In [625]:
# np.sum(softmax(X))

1.0

In [626]:
def categorical_crossentropy(t,y):
    return np.mean(-t*np.log(y))

In [627]:
# def make_one(x):
#     result = np.zeros((x.size, np.unique(x).size))
#     for idx, val in enumerate(x):
#         result[idx][val] = 1
#     return result

In [628]:
def make_one(x):
    result = np.zeros((x.size, np.unique(x).size))
    for idx1,idx2 in enumerate(x):
        result[idx1,idx2] = 1
    return result

In [629]:
X.shape

(150, 4)

In [630]:
y = make_one(y)

In [631]:
input_shape = X.shape[1:]
output_shape = y.shape[1:]

In [632]:
W1 = np.random.randn(4,50)
b1 = np.zeros(50)
W2 = np.random.randn(50,3)
b2 = np.zeros(3)

In [633]:
def predict(x):
    W1 = np.random.randn(4,50)
    b1 = np.zeros(50)
    W2 = np.random.randn(50,3)
    b2 = np.zeros(3)
    layer1 = np.dot(x,W1) + b1
    z1 = sigmoid(layer1)
    layer2 = np.dot(z1,W2) + b2
    out = softmax(layer2)
    return out

In [634]:
y_hat = predict(X)
y_hat

array([[1.45210875e-05, 6.42060936e-03, 2.49654904e-06],
       [1.33967268e-05, 7.34885636e-03, 1.97171722e-06],
       [1.41248920e-05, 6.35091740e-03, 2.56029988e-06],
       [1.36897624e-05, 6.94310606e-03, 2.26066506e-06],
       [1.48413108e-05, 6.08359958e-03, 2.77390808e-06],
       [1.25757260e-05, 6.44258305e-03, 1.93102571e-06],
       [1.33453811e-05, 5.86791245e-03, 2.54988485e-06],
       [1.41544911e-05, 6.75304781e-03, 2.30702446e-06],
       [1.34881781e-05, 6.89155956e-03, 2.29549114e-06],
       [1.48150477e-05, 7.49133601e-03, 2.26323634e-06],
       [1.46782635e-05, 6.58033793e-03, 2.38554234e-06],
       [1.40818532e-05, 6.82756588e-03, 2.35766530e-06],
       [1.47820044e-05, 7.39768230e-03, 2.32732863e-06],
       [1.51561742e-05, 5.95249572e-03, 3.35577062e-06],
       [1.58516175e-05, 5.85113917e-03, 2.97102778e-06],
       [1.43795537e-05, 5.37357118e-03, 2.75171072e-06],
       [1.33927447e-05, 5.48865363e-03, 2.48001814e-06],
       [1.33385685e-05, 6.27864

In [635]:
categorical_crossentropy(y,y_hat)

3.409286154603345

In [636]:
def numerical_gradient(f,x):
    h = 1e-4
    grad = np.zeros_like(x)
    it = np.nditer(x, flags=["multi_index"],op_flags=["readwrite"])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh = f(x)
        x[idx] = tmp_val
        fx = f(x)
        grad[idx] = (fxh-fx)/h
        it.iternext()
    return grad

In [637]:
# def gradient_descent(x,t):
#     y = predict(x)
#     W_loss = lambda W: categorical_crossentropy(y,t)
#     dW1 = numerical_gradient(W_loss,W1)
#     db1 = numerical_gradient(W_loss,b1)
#     dW2 = numerical_gradient(W_loss,W2)
#     db2 = numerical_gradient(W_loss,b2)
#     W1 -= dW1*lr
#     b1 -= db1*lr
#     W2 -= dW2*lr
#     b2 -= db2*lr

In [638]:
W1 = np.random.randn(4,50)
b1 = np.zeros(50)
W2 = np.random.randn(50,3)
b2 = np.zeros(3)

In [639]:
layer1 = np.dot(X,W1) + b1
z1 = sigmoid(layer1)
layer2 = np.dot(z1,W2) + b2
out = softmax(layer2)
lr = 1e-4
W_loss = lambda W: categorical_crossentropy(y,out)
dW1 = numerical_gradient(W_loss,W1)
db1 = numerical_gradient(W_loss,b1)
dW2 = numerical_gradient(W_loss,W2)
db2 = numerical_gradient(W_loss,b2)
W1 -= dW1*lr
b1 -= db1*lr
W2 -= dW2*lr
b2 -= db2*lr
categorical_crossentropy(y,out)

4.080036621501369

In [640]:
dW1

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.]])

In [641]:
y_hat = predict(X)
categorical_crossentropy(y_hat,y)

  return np.mean(-t*np.log(y))


inf

# Class 생성

In [658]:
class Network:
    def __init__(self):
        self.W1 = np.random.randn(4,50)
        self.b1 = np.zeros(50)
        self.W2 = np.random.randn(50,3)
        self.b2 = np.zeros(3)
        self.err = None
        
    def predict(self,x):
        y = np.dot(x, self.W1) + self.b1
        y = sigmoid(y)
        y = np.dot(y, self.W2) + self.b2
        y = softmax(y)
        return y
        # return np.argmax(y, axis=1)
    
    def loss(self,x,t):
        y = self.predict(x)
        err = categorical_crossentropy(t,y)
        self.err = err
        return self.err
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        t = np.argmax(t,axis=1)
        acc = np.sum(y==t)/t.size
        return acc
    
    def gradient(self,x,t):
        # self.loss(x,t)
        lr = 1e-4
        W_loss = lambda W : self.loss(x,t)
        dW1 = numerical_gradient(W_loss,self.W1)
        db1 = numerical_gradient(W_loss,self.b1)
        dW2 = numerical_gradient(W_loss,self.W2)
        db2 = numerical_gradient(W_loss,self.b2)
        self.W1 -= dW1*lr
        self.b1 -= db1*lr
        self.W2 -= dW2*lr
        self.b2 -= db2*lr
        return self.err

In [659]:
model = Network()

In [660]:
model.predict(X[0:1])

array([[9.99648698e-01, 3.42113802e-04, 9.18827257e-06]])

In [661]:
np.argmax(model.predict(X[0:1]))

0

In [662]:
model.loss(X,y)

3.6580091652471114

In [663]:
model.gradient(X,y)

3.6580091652471114

In [664]:
# epochs = 10000
# for epoch in range(epochs):
#     model.gradient(X,y)
#     if epoch % 100 == 0:
#         print(model.err)

3.6577326456271577


In [665]:
model.accuracy(X,y)

0.3333333333333333

In [667]:
epochs = 10000
for epoch in range(epochs):
    model.gradient(X,y)
    if epoch % 100 == 0:
        print(f'loss : {model.err} === accuracy : {model.accuracy(X,y)}')

loss : 3.6301139564151543 === accuracy : 0.3333333333333333
loss : 3.603351746922548 === accuracy : 0.3333333333333333
loss : 3.5771378522477475 === accuracy : 0.3333333333333333
loss : 3.551440901574507 === accuracy : 0.3333333333333333
loss : 3.526230304722645 === accuracy : 0.3333333333333333
loss : 3.5014765879919882 === accuracy : 0.3333333333333333
loss : 3.4771516367446584 === accuracy : 0.3333333333333333
loss : 3.453228860217893 === accuracy : 0.3333333333333333
loss : 3.429683292889241 === accuracy : 0.3333333333333333
loss : 3.4064916451245724 === accuracy : 0.3333333333333333
loss : 3.383632314089135 === accuracy : 0.3333333333333333
loss : 3.3610853641661547 === accuracy : 0.3333333333333333
loss : 3.3388324845141444 === accuracy : 0.3333333333333333
loss : 3.3168569299585497 === accuracy : 0.3333333333333333
loss : 3.295143450178737 === accuracy : 0.3333333333333333
loss : 3.2736782111169163 === accuracy : 0.3333333333333333
loss : 3.2524487116873533 === accuracy : 0.3333

KeyboardInterrupt: 