In [153]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler

In [154]:
X, y = load_digits(return_X_y=True)

In [155]:
y = np.eye(10)[y]

In [156]:
X = StandardScaler().fit_transform(X)


In [157]:
class Linear:
    def __init__(self, in_dim, out_dim):
        self.w = np.random.randn(in_dim, out_dim)
        self.b = np.zeros([out_dim])
        self.dw = None
        self.db = None
        self.x = None
        
    def forward(self, x):
        self.x = x
        return np.matmul(x, self.w) + self.b
    
    def backward(self, d):
        self.dw = np.matmul(self.x.T, d)
        self.db = np.sum(d, axis=0)
        assert self.dw.shape == self.w.shape
        assert self.db.shape == self.b.shape

        d = d @ self.w.T
        return d
        
    def step(self, lr):
        self.w = self.w - lr * self.dw
        self.b = self.b - lr * self.db

In [158]:
class Softmax:
    def __init__(self):
        self.a = None
        
    def forward(self, x):
        assert len(x.shape) == 2
        x = x - np.max(x, axis=-1, keepdims=True)
        self.a = np.exp(x) / np.sum(np.exp(x), keepdims=True, axis=-1)
        return self.a
    def backward(self, d):
        sigma = np.repeat(self.a[:, np.newaxis, ...], self.a.shape[1], 1)
        j = np.swapaxes(sigma, 1, 2) * (np.eye(sigma.shape[1])[np.newaxis, ...] - sigma)
        return (j @ d[..., np.newaxis]).squeeze(-1)


In [159]:
class CrossEntropy:
    def __init__(self, average=True): 
        self.average = average
    def forward(self, y_, y):
        
        l = - np.sum(y * np.log(y_ + 1e-9))
        if self.average:
            l /= len(y)
        return y_, l
    
    def backward(self, y_, y):
        assert y_.shape == y.shape
        d =  - y / (y_ + 1e-9)
        if self.average:
            d /= len(y)
        return d

In [160]:
class LogisticRegression:
    def __init__(self):
        self.linear = Linear(64, 10)
        self.softmax = Softmax()
        self.loss = CrossEntropy()
        
    def forward(self, x, y):
        x = self.linear.forward(x)
        x = self.softmax.forward(x)
        loss = self.loss.forward(x, y)
        return loss
    
    def backward(self, x, y_, y):
        d = self.loss.backward(y_, y)
        
        d = self.softmax.backward(d)
        d = self.linear.backward(d)
    
    def step(self, lr):
        self.linear.step(lr)

        

In [161]:
# X = np.array([[2, 1], [1, 2]])
# y = np.array([[0, 1], [1, 0]])


In [162]:
logreg = LogisticRegression()

In [163]:
for i in range(10000):
    y_, loss = logreg.forward(X, y)
    logreg.backward(X, y_, y)
    logreg.step(0.1)
    if i % 100 == 0:
        print(loss, 'loss')
        print((np.argmax(y_, axis=-1) == np.argmax(y, axis=-1)).sum() / len(y_))

10.736324423454974 loss
0.07122982749026155
1.6944195903444617 loss
0.66110183639399
0.869685744662302 loss
0.8107957707289928
0.6055544076819385 loss
0.8580968280467446
0.47360765645736497 loss
0.889259877573734
0.3911171241791025 loss
0.907623817473567
0.33480955338928864 loss
0.9193099610461881
0.294011762150846 loss
0.9282136894824707
0.26390494318298247 loss
0.9371174179187535
0.2419629791438179 loss
0.9421257651641625
0.22492487048933418 loss
0.9476905954368392
0.2110209941185868 loss
0.9510294936004452
0.1993603699884053 loss
0.9532554257095158
0.18938460356197648 loss
0.9532554257095158
0.1807091269597363 loss
0.9554813578185866
0.17305996627284764 loss
0.9588202559821926
0.16624016661964125 loss
0.9627156371730662
0.16010745813945165 loss
0.9632721202003339
0.15455769868718086 loss
0.9643850862548692


KeyboardInterrupt: 