In [28]:
import numpy as np

In [29]:
class Sigmoid:
    def __init__(self):
        self.params = []
        self.grads = []
        self.out = None

    def forward(self,x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out
    def backward(self,dout):
         dx = dout * (1-self.out) * self.out
         return dx
         

    

In [30]:
class Affine:
    def __init__(self,W,b):
        self.params = [W,b]
        self.grads = [np.zeros_like(W),np.zeros_like(b)]
        self.matmul_layer = MatMul(W)
        self.x = None

    def forward(self,x):
        W,b = self.params
        out = self.matmul_layer.forward(x) + b        
        self.x = x
        return out        

    def backward(self,dout):
        W,b = self.params
        
        dx = self.matmul_layer.backward(dout)
        dW = self.matmul_layer.grads[0]
        db = np.sum(dout,axis=0)
        
        self.grads[0][...] = dW
        self.grads[1][...] = db
        return dx

In [31]:
class TwoLayerNet:
    def __init__(self, input_size,hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size


        W1 = 0.01 * np.random.randn(I,H)
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H,O)
        b2 = np.zeros(O)

        self.layers = [Affine(W1,b1),Sigmoid(),Affine(W2,b2)]

        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [] , []

        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
    
    def forward(self,x,t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score,t)
        return loss
    
    
    
    def predict(self,x):
        for layer in self.layers:
            x = layer.forward(x)
        return x 

    def backward(self,dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
        

In [32]:
class SoftmaxWithLoss:
    def __init__(self):
        self.params, self.grads = [], []
        self.y = None  # softmax的输出
        self.t = None  # 监督标签

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)

        # 在监督标签为one-hot向量的情况下，转换为正确解标签的索引
        if self.t.size == self.y.size:
            self.t = self.t.argmax(axis=1)

        loss = cross_entropy_error(self.y, self.t)
        return loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]

        dx = self.y.copy()
        dx[np.arange(batch_size), self.t] -= 1
        dx *= dout
        dx = dx / batch_size

        return dx
        

In [None]:
class SGD:
    def __init__(self,lr=0.01):
        self.lr = lr
    def update(self,params,grads):
        for i in range(len(params)):
            params[i] -= self.lr * grads[i]
        

In [33]:
class MatMul:
    def __init__(self,W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.x = None
    def forward(self,x):
        W ,= self.params
        out = np.dot(x,W)
        self.x = x
        return out
    
    def backward(self,dout):
        W, = self.params
        dx = np.dot(dout,W.T)
        dW = np.dot(x.T,dout)
        self.grads[0][...] = dW
        return dx         

# repeate 节点

N, D = 8, 7

# forward

x = np.random.randn(1,D)
y = np.repeat(x,N,axis=0)

# backward

dy = np.random.randn(N,D)
dx = np.sum(dy,axis=0,keepdims=True)

# sum 节点
D, N = 8, 7

# forward
x = np.random.randn(N,D)
y = np.sum(x,axis=0,keepdims=True)
# backward
dy = np.random.randn(1,D)
dx = np.repeat(dy,N,axis=0)

In [36]:
from dataset import spiral
import matplotlib.pyplot as plt

if __name__ == '__main__':
    max_epoch = 300
    batch_size = 30
    hidden_size = 10
    learning_rate = 1.0

    x, t = spiral.load_data()
    model = TwoLayerNet(2,hidden_size,3)
    optimizer = SGD(lr=learning_rate)

    data_size = len(x)

    max_iters = data_size // batch_size
    total_loss = 0
    loss_count = 0
    loss_list = []

    for epoch in range(max_epoch):
        idx = np.random.permutation(data_size)
        x = x[idx]
        t = t[idx]
        for iters in range(max_iters):
            batch_x = x[iters*batch_size:(iters+1)*batch_size]
            batch_t = t[iters*batch_size:(iters+1)*batch_size]
            loss = model.forward(batch_x,batch_t)
            model.backward()
            optimizer.update(model.params,model.grads)
            total_loss += loss
            loss_count += 1

            if (iters + 1) % 10 ==0:
                avg_loss = total_loss / loss_count
                print("| epoch %d | iter %d / %d | loss %.2f" % 
                        (epoch + 1, iters + 1, max_iters, avg_loss))
                loss_list.append(avg_loss)
                total_loss, loss_count = 0,0
    

[[ 1.79934572  2.08683229  0.00549843]
 [ 1.54092278  1.91033097  0.04106582]
 [ 1.2027725   1.68299629  0.26930903]
 [ 1.43382449  1.89089849 -0.14378329]
 [ 1.25249114  2.04999854  0.78202866]
 [ 1.66262722  1.98393094  0.07497477]
 [ 1.9227948   2.18187713 -0.11623625]
 [ 1.54225802  1.91246698  0.03230856]
 [ 1.84078965  2.11472244 -0.10299872]
 [ 1.36997154  1.84615331 -0.08717629]]
