In [1]:
import sys
import os
import numpy as np
import matplotlib.pyplot as plt

sys.path.append(os.path.join(os.pardir, "my_lib"))
sys.path.append(os.pardir)

In [2]:
from utils import softmax, cross_entropy_error, numerical_gradient, sigmoid
from dataset.mnist import load_mnist

In [3]:
class simpleNet:    
    def __init__(self):
        self.W = np.random.randn(2, 3)
    
    def predict(self, x):
        return np.dot(x, self.W)

    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
    
        return loss

In [4]:
net = simpleNet()
print("Weight :\n", net.W)

x = np.array([0.6, 0.9])
p = net.predict(x)
print("Predict :", p)
print("Index of max value :", np.argmax(p))

t = np.array([0, 0, 1])
print("Loss :", net.loss(x, t))

Weight :
 [[-0.25598878 -0.3011204   1.09098519]
 [-0.67852713  0.91358687 -1.35167827]]
Predict : [-0.76426769  0.64155594 -0.56191933]
Index of max value : 1
Loss : 1.6387021075664976


In [5]:
dW = numerical_gradient(lambda w: net.loss(x, t), net.W)
print(dW)

[[ 0.09519031  0.38827032 -0.48346063]
 [ 0.14278547  0.58240548 -0.72519094]]


In [6]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, 
                 output_size, weight_init_std=0.01):
        self.params = {}
        self.params["w1"] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["w2"] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params["b2"] = np.zeros(output_size)
        
    def predict(self, x):
        w1, w2 = self.params["w1"], self.params["w2"]
        b1, b2 = self.params["b1"], self.params["b2"]
        a1 = np.dot(x, w1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, w2) + b2
        y = softmax(a2)
        return y
    
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)
        
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)    
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_w = lambda w: self.loss(x, t)
        grads = {}
        grads["w1"] = numerical_gradient(loss_w, self.params["w1"])
        grads["b1"] = numerical_gradient(loss_w, self.params["b1"])
        grads["w2"] = numerical_gradient(loss_w, self.params["w2"])
        grads["b2"] = numerical_gradient(loss_w, self.params["b2"])
        return grads

In [7]:
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
print(net.params["w1"].shape)
print(net.params["b1"].shape)
print(net.params["w2"].shape)
print(net.params["b2"].shape)

(784, 100)
(100,)
(100, 10)
(10,)


In [8]:
(x_train, y_train), (x_test, y_test) = load_mnist(normalize=True, 
                                                  one_hot_label=True)

In [None]:
net = TwoLayerNet(784, 50, 10)

train_loss_list = []
train_acc_list = []
test_acc_list = []

# Hyper parameters
iters_num = 3000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    y_batch = y_train[batch_mask]
    
    grad = net.numerical_gradient(x_batch, y_batch)
    
    for key in ("w1", "b1", "w2", "b2"):
        net.params[key] -= learning_rate * grad[key]
    
    loss = net.loss(x_batch, y_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0 or iter_per_epoch == iters_num - 1:
        train_acc = net.accuracy(x_train, y_train)
        test_acc = net.accuracy(x_test, y_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(f"train_acc : {train_acc}, test_acc: {test_acc}")
print("Done.")

train_acc : 0.10441666666666667, test_acc: 0.1028
train_acc : 0.7745833333333333, test_acc: 0.7849
train_acc : 0.88095, test_acc: 0.8856
