### 在神经网络中，每个节点基本都包括输入，输出，向前计算，向后计算，导数

#### 构建一个节点对象

In [3]:
class Node:
    def __init__(self, inputs=[]):
        self.inputs=inputs
        self.value=None
        self.outputs=[]
        self.gradients={}
        ## 创建当前节点与上一个节点的联系
        for node in self.inputs:
            node.outputs.append(self)
    def forward(self):
        """前向计算"""
        raise NotImplemented
    def backward(self):
        raise NotImplemented

In [4]:
# Input继承Node
class Input(Node):
    def __init__(self,name=''):
        Node.__init__(self,inputs=[])
        self.name=name
    def forward(self, value=None):
        if value is not None:
            self.value=value
    def backward(self):
        self.gradients={}
        # 下一个节点的导数
        for n in self.outputs:
            grad_cost=n.gradients[self]
            self.gradients[self]=grad_cost
    def __repr__(self):
        return 'Input Node:{}'.format(self.name)

In [5]:
import numpy as np

In [26]:
# 构造线性连接
class Linear(Node):
    def __init__(self, nodes, weights, bias):
        self.w_node=weights
        self.x_node=nodes
        self.b_node=bias
        Node.__init__(self, inputs=[nodes, weights, bias])
    def forward(self):
        """using numpy compute wx+b"""
        self.value=np.dot(self.x_node.value,self.w_node.value)+self.b_node.value
    def backward(self):
        for node in self.outputs:
            grad_cost=node.gradients[self]
            self.gradients[self.w_node]=np.dot(self.x_node.value.T,grad_cost)
            self.gradients[self.b_node]=np.dot(grad_cost*1,axis=0,keepdims=False)
            self.gradients[self.x_ndoe]=np.dot(grad_cost,self.w_node.value.T)

In [7]:
class Sigmoid(Node):
    def __init__(self,node):
        Node.__init__(self,[node])
        self.x_node=node
    def _sigmoid(self,x):
        return 1./(1.+np.exp(-1*x))
    def forward(self):
        self.value=self._sigmoid(self.x_node.value)
    def backward(self):
        y=self.value
        self.partial=y*(1-y)
        for n in self.outputs:
            grad_cost = n.gradients[self]
            self.gradients[self.x_node]=grad_cost* self.partial

In [8]:
class MSE(Node):
    def __init__(self, y_true, y_hat):
        self.y_true_node=y_true
        self.y_hat_node = y_hat
        Node.__init__(self, inputs=[y_true, y_hat])
    def forward(self):
        y_true_flatten=self.y_true_node.value.reshape(-1,1)
        y_hat_flatten = self.y_hat_node.value.reshape(-1,1)
        self.diff=y_true_flatten-y_hat_flatten
        self.value=np.mean(self.diff**2)
    def backward(self):
        n=self.y_hat_node.value.shape[0]
        self.gradients[self.y_true_node]=(2/n)*self.diff
        self.gradients[self.y_hat_node] = (-2/n)*self.diff

In [9]:
def training_one_batch(topological_sorted_graph):
    for node in topological_sorted_graph:
        node.forward()
    for node in topological_sorted_graph[::-1]:
        node.backward()

In [35]:
def topological_sorted(data_with_value):
    feed_dict=data_with_value
    input_nodes=[n for n in feed_dict.keys()]
    G={}
    nodes=[n for n in input_nodes]
    while len(nodes)>0:
        n=nodes.pop()
        if n not in G:
            G[n]={'in':set(), 'out':set()}
        for m in n.outputs:
            if m not in G:
                G[m]={'in':set(),'out':set()}
            G[n]['out'].add(m)
            G[m]['in'].add(n)
            nodes.append(m)
    L=[]
    S=set(input_nodes)
    while len(S)>0:
        n=S.pop()
        if isinstance(n,Input):
            n.value=feed_dict[n]
        L.append(n)
        for m in n.outputs:
            G[n]['out'].remove(m)
            G[m]['in'].remove(n)
            if len(G[m]['in'])==0:
                S.add(m)
    return L

In [11]:
def sgd_update(trainable_nodes, learning_rate=1e-2):
    for t in trainable_nodes:
        t.value += -1*learning_rate*t.grandients[t]

In [12]:
from sklearn.datasets import load_boston

In [13]:
data=load_boston()

In [14]:
X_=data['data']

In [15]:
y_=data['target']

In [16]:
X_[0]

array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
       6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
       4.980e+00])

In [17]:
y_[0]

24.0

In [18]:
n_hidden=10

In [19]:
n_hidden_2=10

In [20]:
X_=(X_-np.mean(X_,axis=0))/np.std(X_, axis=0)

In [21]:
n_features=X_.shape[1]

In [22]:
W1_,b1_=np.random.randn(n_features, n_hidden), np.zeros(n_hidden)

In [23]:
W2_,b2_=np.random.randn(n_hidden,1),np.zeros(1)

In [24]:
X,y=Input(name='X'), Input(name='y')
W1,b1=Input(name='W1'), Input(name='b1')
W2,b2=Input(name='W2'), Input(name='b2')

In [27]:
linear_output=Linear(X,W1,b1)

In [28]:
sigmoid_output=Sigmoid(linear_output)

In [29]:
yhat=Linear(sigmoid_output, W2, b2)

In [30]:
loss=MSE(y,yhat)

In [32]:
input_node_with_value={
    X:X_,
    y:y_,
    W1:W1_,
    b1:b1_,
    W2:W2_,
    b2:b2_
}

In [36]:
graph=topological_sorted(input_node_with_value)

In [37]:
graph

[Input Node:W2,
 Input Node:b2,
 Input Node:y,
 Input Node:X,
 Input Node:b1,
 Input Node:W1,
 <__main__.Linear at 0x1b6143c2ba8>,
 <__main__.Sigmoid at 0x1b6143da5c0>,
 <__main__.Linear at 0x1b6147a26a0>,
 <__main__.MSE at 0x1b6147b1080>]

In [38]:
from sklearn.utils import resample

In [39]:
np.random.choice(range(100),size=10, replace=True)

array([29, 47, 40, 22, 36, 35,  8, 83, 58, 43])

In [40]:
def run(dictionary):
    return topological_sorted(dictionary)

In [None]:
losses=[]
epochs=100

batch_size=64
steps_per_epoch=X_.shape[0]//batch_size

for i in range(epochs):
    loss=0
    for batch in range(steps_per_epoch):
        X_batch,y_batch=resample(X_,y_,n_samples=batch_size)
        X.value=X_batch
        y.value=y_batch