In [1]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler

In [2]:
X, y = load_digits(return_X_y=True)
y = np.eye(10)[y].astype(np.float32)
X = StandardScaler().fit_transform(X)


In [3]:
x_n = X.copy()[:, None, :]
t_n = y.copy()[:, None, :]

In [4]:
class Linear:
    def __init__(self, n_in, n_out):
        self.w = np.random.randn(n_in, n_out) * 0.1
#         self.w = np.linspace(-0.5, 1, n_in * n_out).reshape([n_in, n_out])
        self.b = np.zeros([n_out])
        
        self.dw = None
        self.db = None
        
        self.x = None
        
        self.out_shape = None
    
    def forward(self, x):
        assert len(x.shape) == 3
        assert len(self.w.shape) == 2
        assert len(self.b.shape) == 1

        self.x = x
        y = x @ self.w + self.b
        self.out_shape = y.shape
        return y
    
    def backward(self, d):
        assert d.shape == self.out_shape, (d.shape, self.out_shape)
        
        self.db = np.sum(d, axis=(0, 1))
        assert self.db.shape == self.b.shape
        
        d_dw = np.zeros([*d.shape, *self.w.shape])
        
        for i in range(d.shape[1]):
            for j in range(d.shape[2]):
                d_dw[:, i, j, :, j] = self.x[:, i, :]
        
        self.dw = np.tensordot(d, d_dw, axes=3)
        assert self.dw.shape == self.w.shape
        
        d_dx = np.zeros([*d.shape, *self.x.shape[1:]])
        for i in range(d.shape[1]):
            for j in range(d.shape[2]):
                d_dx[:, i, j, i, :] = self.w[:, j]

        d = np.tensordot(d, d_dx, axes=([1, 2], [1, 2])).sum(1) / len(d)
        assert d.shape == self.x.shape
        return d

    def step(self, lr):
        self.w = self.w - lr * self.dw
        self.b = self.b - lr * self.db

In [5]:
class Softmax:
    def __init__(self):
        self.a = None
        
    def forward(self, x):
        assert len(x.shape) == 3
        x = x - np.max(x, axis=-1, keepdims=True)
        self.a = np.exp(x) / np.sum(np.exp(x), keepdims=True, axis=-1)
        return self.a
    def backward(self, d):
        
        diag = np.stack([np.diag(self.a[i, 0]) for i in range(len(self.a))])
        op = np.stack([np.outer(self.a[i, 0], self.a[i, 0]) for i in range(len(self.a))])
        J = diag - op
        
        return d @ J

In [6]:
class CrossEntropy:
    def forward(self, y_, y):
        self.y_ = y_
        self.y = y
        
        assert y_.shape == y.shape
        
        l = - np.sum(y * np.log(y_))
        l /= len(y)
        return y_, l
    
    def backward(self):
        y_ = self.y_
        y = self.y
        
        assert y_.shape == y.shape
        d = (- y / y_) / len(y_)
        return d

In [7]:
class LogRes:
    def __init__(self):
        self.linear1 = Linear(64, 10)
        self.softmax = Softmax()
        self.loss = CrossEntropy()
        
    def forward(self, x, y):
        x = self.linear1.forward(x)
        x = self.softmax.forward(x)
        loss = self.loss.forward(x, y)
        return loss
    
    def backward(self):
        d = self.loss.backward()
        d = self.softmax.backward(d)
        d = self.linear1.backward(d)

    
    def step(self, lr):
        self.linear1.step(lr)        

In [8]:
logres = LogRes()

In [9]:
batch_size = 128

In [10]:
for i in range(16):
    for j in range(0, len(x_n), batch_size):
        x_batch = x_n[j: j + batch_size]
        t_batch = t_n[j: j + batch_size]
        y_batch, loss = logres.forward(x_batch, t_batch)
        logres.backward()
        logres.step(lr=0.1)
        if i % 1 == 0 and j == 0:
#             print(j)
            print('Loss', loss)
            print('Accuracy:', (np.argmax(y_batch, axis=-1) == np.argmax(t_batch, axis=-1)).sum() / len(y_batch))

Loss 2.3302958458204133
Accuracy: 0.203125
Loss 1.073794456521439
Accuracy: 0.7578125
Loss 0.7385588104587009
Accuracy: 0.8046875
Loss 0.5932505963822458
Accuracy: 0.8515625
Loss 0.5090887035252033
Accuracy: 0.859375
Loss 0.4517124208825424
Accuracy: 0.859375
Loss 0.40880502030840693
Accuracy: 0.8671875
Loss 0.37494774953357685
Accuracy: 0.875
Loss 0.34734898134110387
Accuracy: 0.8828125
Loss 0.32436836741969566
Accuracy: 0.921875
Loss 0.3049368510900545
Accuracy: 0.921875
Loss 0.28830406221782023
Accuracy: 0.921875
Loss 0.2739169655905265
Accuracy: 0.9296875
Loss 0.2613552531902281
Accuracy: 0.9453125
Loss 0.2502932033904006
Accuracy: 0.9453125
Loss 0.24047501919723024
Accuracy: 0.9453125
