In [1]:
import numpy as np

In [2]:
class Node:
    def __init__(self,inputs=[]):
        
        self.inputs = inputs
        self.outputs = []
        
        for n in self.inputs:
            n.outputs.append(self)
            
        self.value = None
        self.gradients = {
            #if is wx+b,this will put the 
            # x:x
            # x:w
            # b:l
        }
        
    def forward_partial(self):
        raise NotImplemented
        
    def backward_partial(self):
        raise NotImplemented

In [3]:
class Input(Node):
    #输入结点，前向传播就是简单的将值递出去
    def __init__(self):
        Node.__init__(self)
        
    def forward_partial(self,value=None):
        if value is not None:
            self.value = value
        
    def backward_partial(self):
        self.gradients = {self:0}
        for n in self.outputs:
            grad_cost = n.gradients[self]
            self.gradients[self]=grad_cost*1
    
    

In [4]:
class Linear(Node):
    def __init__(self,nodes,weights,bias):
        Node.__init__(self,[nodes,weights,bias])
        
        
    def forward_partial(self):
        inputs = self.inputs[0].value
        weights=self.inputs[1].value
        bias = self.inputs[2].value
        #print(self.inputs[0].value)

        self.value = np.dot(inputs,weights)+bias
        
    def backward_partial(self):
        self.gradients = {n:np.zeros_like(n.value) for n in self.inputs}
        
        for n in self.outputs:
            grad_cost = n.gradients[self]
            self.gradients[self.inputs[0]] = np.dot(grad_cost , self.inputs[1].value.T)
            self.gradients[self.inputs[1]] = np.dot(self.inputs[0].value.T, grad_cost)
            self.gradients[self.inputs[2]] =np.sum(grad_cost,axis=0,keepdims=False)

In [5]:
class Sigmoid(Node):
    def __init__(self,node):
        Node.__init__(self,[node])
        
    def _sigmoid(self,x):
        return 1./(1+np.exp(-1*x))
    
    def forward_partial(self):
        self.x=self.inputs[0].value
        self.value = self._sigmoid(self.x)
        
    def backward_partial(self):
        self.partial = self._sigmoid(self.x)*(1-self._sigmoid(self.x))
        
        self.gradients = {n:np.zeros_like(n.value) for n in self.inputs}
        
        for n in self.outputs:
            grad_cost = n.gradients[self]
            
            self.gradients[self.inputs[0]] = grad_cost*self.partial

In [6]:
class LOSS(Node):
    def __init__(self, y_true, y_hat):
        Node.__init__(self,[y_true,y_hat])
        
    def forward_partial(self):
        #拉平数组
        y_true = self.inputs[0].value.reshape(-1,1)
        y_hat = self.inputs[1].value.reshape(-1,1)
        
        self.m = self.inputs[0].value.shape[0]
        self.diff=y_true-y_hat
        
        self.value = np.mean(self.diff **2)
        
    def backward_partial(self):      
        
        self.gradients[self.inputs[0]] = (2/self.m)*self.diff
        self.gradients[self.inputs[1]] = (-2/self.m)*self.diff

In [7]:
def run_one_epoch(output_node,graph):
    for n in graph:
        n.forward_partial()
        
    for n in graph[::-1]:
        n.backward_partial()

In [8]:
#从图入度为0的点开始抽取，排序整张图的节点
def toplogical_sort(graph):
    
    input_nodes = [n for n in graph.keys()]
    
    G = {}
    nodes = [n for n in input_nodes]
    
    while len(nodes)>0:
        n = nodes.pop(0)
        if n not in G:
            G[n]={'in':set(),'out':set()}
        for m in n.outputs:
            if m not in G:
                G[m]={'in':set(),'out':set()}
            G[n]['out'].add(m)
            G[m]['in'].add(n)
            nodes.append(m)
    L = []
    S = set(input_nodes)
    
    while len(S)>0:
        n = S.pop()
        if isinstance(n,Input):
            n.value = graph[n]
        
        L.append(n)
        for m in n.outputs:
            G[n]['out'].remove(m)
            G[m]['in'].remove(n)
            
            if len(G[m]['in'])==0:
                S.add(m)
    
    return L

In [9]:
def gradient_descent_update(trainable_nodes,learning_rate = 1e-3):
    for node in trainable_nodes:
        node.value += -1*(learning_rate * node.gradients[node])

In [10]:
from sklearn.datasets import load_boston

In [11]:
data = load_boston()

In [12]:
#load_data
X_=data['data']
y_ = data['target']

#normolize
X_ = (X_ -np.mean(X_,axis=0))/np.std(X_,axis = 0)
n_features =X_.shape[1]
n_hidden = 10
W1_ = np.random.randn(n_features,n_hidden)
b1_=np.zeros(n_hidden)
W2_=np.random.randn(n_hidden,1)
b2_= np.zeros(1)


In [13]:
X,y=Input(),Input()
W1,b1 = Input(),Input()
W2,b2 = Input(),Input()

In [14]:
linear1 = Linear(X,W1,b1)
sigmoid_1 = Sigmoid(linear1)
linear2 = Linear(sigmoid_1,W2,b2)
cost = LOSS(y,linear2)

In [15]:
from sklearn.utils import resample,shuffle

In [16]:
graph = {
    X:X_,
    y:y_,
    W1:W1_,
    b1:b1_,
    W2:W2_,
    b2:b2_    
}

graph = toplogical_sort(graph)

In [17]:
epochs = 1000
batch_size = 16
batch_num = X_.shape[0]//batch_size
trainables = [W1, b1, W2, b2]
rate = 1e-2
losses=[]
  

In [18]:
for epoch in range(epochs):
    loss = 0
    
    for batch in range(batch_num):
        X_batch,y_batch = resample(X_,y_,n_samples = batch_size)
        
        X.value = X_batch
        y.value = y_batch
    
        finally_output = None
        
        run_one_epoch(finally_output,graph)
        
        gradient_descent_update(trainables,rate)
        
        loss +=graph[-1].value
        #print(loss)    
    if epoch % 20 == 0: 
        print("Epoch: {}, Loss: {:.3f}".format(epoch, loss/batch_num))
        losses.append(loss)
    

Epoch: 0, Loss: 155.889
Epoch: 20, Loss: 10.336
Epoch: 40, Loss: 8.618
Epoch: 60, Loss: 9.435
Epoch: 80, Loss: 7.583
Epoch: 100, Loss: 7.697
Epoch: 120, Loss: 6.365
Epoch: 140, Loss: 5.333
Epoch: 160, Loss: 5.240
Epoch: 180, Loss: 6.969
Epoch: 200, Loss: 5.739
Epoch: 220, Loss: 5.893
Epoch: 240, Loss: 6.061
Epoch: 260, Loss: 5.245
Epoch: 280, Loss: 5.053
Epoch: 300, Loss: 4.273
Epoch: 320, Loss: 4.790
Epoch: 340, Loss: 4.950
Epoch: 360, Loss: 4.519
Epoch: 380, Loss: 4.503
Epoch: 400, Loss: 4.537
Epoch: 420, Loss: 4.361
Epoch: 440, Loss: 3.963
Epoch: 460, Loss: 3.420
Epoch: 480, Loss: 4.350
Epoch: 500, Loss: 3.801
Epoch: 520, Loss: 4.447
Epoch: 540, Loss: 4.102
Epoch: 560, Loss: 4.088
Epoch: 580, Loss: 3.717
Epoch: 600, Loss: 3.325
Epoch: 620, Loss: 3.673
Epoch: 640, Loss: 3.624
Epoch: 660, Loss: 3.827
Epoch: 680, Loss: 3.823
Epoch: 700, Loss: 3.254
Epoch: 720, Loss: 3.744
Epoch: 740, Loss: 3.166
Epoch: 760, Loss: 3.811
Epoch: 780, Loss: 3.444
Epoch: 800, Loss: 3.201
Epoch: 820, Loss: 3