In [14]:
import numpy as np

In [15]:
# activation function
'''
x : any dimension
'''
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

'''
x : (batch_size, classes_num)

return (batch_size, classes_num)
'''
def softmax(x):
    x = np.exp(x)
    exp_sum = np.sum(x, axis=1)
    if exp_sum.ndim == 1:
        exp_sum = exp_sum.reshape(exp_sum.size, 1)
    return x / exp_sum

# Loss function
'''
y : (batch_size, classes_num)
t : (batch_size, )

return scalar
'''
def cross_entropy(y, t):
    epsilon = 1e-7
    return - (np.sum(np.log(y[np.arange(len(t)), t] + epsilon)))
    
# gradient with numerical differential
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x)
        
        x[idx] = float(tmp_val) - h
        fxh2 = f(x)
        
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val
        it.iternext()
    return grad

In [16]:
class network:
    def __init__(self, intput_size=784, hidden_size=100, output_size=10):
        self.params = {}
        self.params['W1'] = np.random.rand(input_size, hidden_size)
        self.params['W2'] = np.random.rand(hidden_size, output_size)
        self.params['b1'] = np.ones(1, hidden_size)
        self.params['b2'] = np.ones(1, output_size)
    
    def forward(self, x):
        if x.ndim == 1:
            x = x.reshape(1, x.size)
        x = np.dot(x, self.params['W1']) + self.params['b1']
        x = sigmoid(x)
        x = np.dot(x, self.params['W2']) + self.params['b2']
        return softmax(x)
    
    def loss(self, x, t):
        y = self.forward(x)
        return cross_entropy(y, t)
    
    def accuracy(self, x, t):
        y = self.forward(x)
        predicted = np.argmax(y, axis=1)
        corrects = (t == predicted).astype(int).sum()
        total = x.shape[0]
        return corrects / total
        
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
        

In [22]:
import os
import pandas as pd

parent_dir = os.path.dirname(os.path.abspath(os.getcwd()))
data_dir = os.path.join(parent_dir, "digit-recognizer")

df = pd.read_csv(os.path.join(data_dir, "train.csv"), dtype=np.int8)