In [1]:
import numpy as np
import os
import torch
import os
import struct
import matplotlib.pyplot as plt
from collections import OrderedDict

In [2]:
def softmax(a):
    c = np.max(a, axis=1, keepdims=True)
    exp_a = np.exp(a - c)
    sum_exp_a = np.sum(exp_a, axis=1, keepdims=True)
    y = exp_a / sum_exp_a

    return y

def cross_entropy(y, t):
    delta = 1e-7
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    batch_size = y.shape[0]
    return -np.sum(t * np.log(y + delta)) / batch_size

In [3]:
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
cross_entropy(np.array(y), np.array(t))

0.510825457099338

In [4]:
class MNIST:
    def __init__(self, path, is_train=True, normalize=True, split=0.0):
        self.path = path
        self.is_train = is_train
        self.split = split
        self.images, self.labels = self._load_mnist()
        self.labels = self._one_hot(self.labels)
        if normalize:
            self.images = self.images / 255
        self.train_idx, self.valid_idx = self._split_validation(self.split)

    def _load_mnist(self):
        prefix_str = "train" if self.is_train else "t10k"
        labels_path = os.path.join(self.path, f'{prefix_str}-labels-idx1-ubyte')
        images_path = os.path.join(self.path, f'{prefix_str}-images-idx3-ubyte')
        with open(labels_path, 'rb') as lbpath:
            magic, n = struct.unpack('>II', lbpath.read(8))
            labels = np.fromfile(lbpath, dtype=np.uint8)
        with open(images_path, 'rb') as imgpath:
            magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16))
            images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
        
        return images, labels

    def _split_validation(self, split):
        if split == 0.0:
            return None, None
        data_len = len(self.images)
        idx_full = np.arange(data_len)
        np.random.seed(0)
        np.random.shuffle(idx_full)
        len_valid = int(data_len * split)
        valid_idx = idx_full[0:len_valid]
        train_idx = idx_full[len_valid:]

        return train_idx, valid_idx
    
    def _one_hot(self, label):
        one_hot_label = np.zeros((len(self.labels), 10))
        idx_1 = np.arange(len(self.labels))
        idx_2 = self.labels.flatten()
        one_hot_label[idx_1, idx_2] = 1
        
        return one_hot_label

    def get_data_set(self):
        if self.train_idx is not None:
            return self.images[self.train_idx], self.labels[self.train_idx], self.images[self.valid_idx], self.labels[self.valid_idx]
        return self.images, self.labels, None, None

In [5]:
class ReLu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx

In [6]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        return dx

In [7]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy(self.y, self.t)

        return self.loss

    def backward(self, d_out=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx

In [14]:
class MLP:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['ReLu1'] = ReLu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        
        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

    def accuracy(self, y, t):
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def train(self, x, t):
        # forward
        y = self.predict(x)
        loss = self.lastLayer.forward(y, t)
        accuracy = self.accuracy(y, t)

        # backward
        d_out = 1
        d_out = self.lastLayer.backward(d_out)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            d_out = layer.backward(d_out)

        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db

        return grads, loss, accuracy

In [15]:
path = "../minist/data/MNIST/raw/"
x_train, t_tarin, x_test, t_test = MNIST(path, is_train=True, split=0.1).get_data_set()

In [16]:
network = MLP(input_size=784, hidden_size=50, output_size=10)
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

In [17]:
iter_per_epoch = max(train_size / batch_size, 1)

In [12]:
train_loss_list = []
train_acc_list = []
test_acc_list = []
for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_tarin[batch_mask]

    grad = network.train(x_batch, t_batch)

    for key in ('W1', 'W2', 'b1', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_tarin)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)
        

0.08598148148148148 0.08633333333333333
0.9007592592592593 0.9061666666666667
0.9204629629629629 0.9218333333333333
0.9271666666666667 0.9255
0.9413148148148148 0.9395
0.9477777777777778 0.9461666666666667
0.9525925925925925 0.9508333333333333
0.9577962962962963 0.9558333333333333
0.9615555555555556 0.9575
0.9644814814814815 0.9608333333333333
0.967 0.9618333333333333
0.9702777777777778 0.9658333333333333
0.9715185185185186 0.9655
0.9733888888888889 0.9673333333333334
0.9752037037037037 0.9656666666666667
0.9759444444444444 0.968
0.9773518518518518 0.968
0.9791111111111112 0.9681666666666666
0.9802407407407407 0.9708333333333333


In [13]:
train_loss_list

[2.299775227104365,
 2.3008773534876847,
 2.300551802992315,
 2.299784696076758,
 2.2975468378468245,
 2.2985986277821033,
 2.297988368808091,
 2.2971282088505443,
 2.295373430842981,
 2.2907490328463083,
 2.2955559560854386,
 2.2930730764197484,
 2.291578469208485,
 2.2919420125429237,
 2.2917730388234934,
 2.28931226945037,
 2.28780842958247,
 2.2905676539222997,
 2.286042824075033,
 2.2873016128842165,
 2.2804371769301923,
 2.27612600489863,
 2.27689769616783,
 2.2798130704800204,
 2.2713246984009907,
 2.2811394272934113,
 2.2634669309704845,
 2.275087076091219,
 2.258998109387375,
 2.271864221002486,
 2.259458762460555,
 2.258341272808191,
 2.226198378741421,
 2.2506631223484077,
 2.2380360078361936,
 2.2517116539000246,
 2.231586331971702,
 2.2332159136884133,
 2.233047673152857,
 2.2092342591390963,
 2.1994985619238903,
 2.184409059335242,
 2.2047483294922356,
 2.164626226600684,
 2.1724631801337395,
 2.133350360025378,
 2.142328403947999,
 2.1364843456934643,
 2.160265865335583,