<a href="https://colab.research.google.com/github/ByeonJaeseong/DeepLearningProject/blob/main/loss_function.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [53]:
from sklearn.datasets import  fetch_openml
import numpy as np
from PIL import Image
import matplotlib as mpl
import matplotlib.pyplot as plt
import pickle
def sum_squares_error(y, t):
    return 0.5 * np,sum((y-t)**2)


def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    batch_size = y.shape[0]
    return -np.sum(t*np.log(y+1e-7))/batch_size 

def numerical_diff(f,x):
    h = 1e-4
    return(f(x+h)-f(x-h))/(2*h)

def function_1(x):
    return 0.01*x**2+0.1*x

def function_2(x):
    return x[0]**2 + x[1]**2

def numerical_gradient(f,x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = tmp_val +h
        fxh1 = f(x)

        x[idx] = tmp_val-h
        fxh2 = f(x)

        grad[idx] = (fxh1-fxh2) / (2*h)
        x[idx] = tmp_val

    return grad

def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x

    for i in range(step_num):
        grad = numerical_gradient(f,x)
        x-= lr*grad
    return x

def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a-c)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y


class simpleNet:
    def __init__(self):
        self.W = np.random.randn(2, 3)

    def predict(self, x):
        return np.dot(x, self.W)

    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)

        return loss      

In [7]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist["data"], mnist["target"]
x_train, x_test, t_train, t_test = X[:60000], X[60000:], y[:60000], y[60000:]


In [8]:
print(x_train.shape)
print(t_train.shape)
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
np.random.choice(60000,10)

(60000, 784)
(60000,)


array([29231, 28086,  2844, 51285, 43598, 10720, 17483, 28754, 59390,
       14631])

In [26]:
print(numerical_gradient(function_2, np.array([3.0, 4.0])))
print(numerical_gradient(function_2, np.array([0.0, 2.0])))
print(numerical_gradient(function_2, np.array([3.0, 0.0])))


[6. 8.]
[0. 4.]
[6. 0.]


In [30]:
print(gradient_descent(function_2, init_x=np.array([-3.0, 4.0]), lr=0.1, step_num=100))
print(gradient_descent(function_2, init_x=np.array([-3.0, 4.0]), lr=10, step_num=100)) #발산
print(gradient_descent(function_2, init_x=np.array([-3.0, 4.0]), lr=1e-10, step_num=100)) #수렴못함

[-6.11110793e-10  8.14814391e-10]
[-2.58983747e+13 -1.29524862e+12]
[-2.99999994  3.99999992]


In [55]:
net = simpleNet()
print(net.W)
x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)

np.argmax(p)

t = np.array([0,0,1])

print()
softmax(net.predict(x))

net.loss(x, t)


[[ 1.31738361  0.41058017  0.96290367]
 [-1.98094259 -0.62120651  1.00153851]]
[-0.99241817 -0.31273776  1.47912686]



0.2240257695289369