In [1]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    def forward(self,x,y):
        self.x=x
        self.y=y
        out = x*y
        return out
    def backward(self,dout):
        dx=dout*self.y
        dy=dout*self.x
        return dx,dy

In [2]:
apple = 100
apple_num=2
tax=1.1

mul_apple_layer=MulLayer()
mul_tax_layer=MulLayer()

#forward
apple_price = mul_apple_layer.forward(apple,apple_num)
price=mul_tax_layer.forward(apple_price,tax)

print(price)

220.00000000000003


In [3]:
dprice=1
dapple_price,dtax = mul_tax_layer.backward(dprice)
dapple,dapple_num=mul_apple_layer.backward(dapple_price)
print(dapple,dapple_num,dtax)

2.2 110.00000000000001 200


In [5]:
class AddLayer:
    def __init__(self):
        pass
    def forward(self,x,y):
        out=x+y
        return out
    def backward(self,dout):
        dx=dout*1
        dy=dout*1
        return dx,dy

In [7]:
#根据书上138页的计算图写出反向传播的代码
apple=100
apple_num=2
orange=150
orange_num=3
tax=1.1

mul_apple_layer=MulLayer()
mul_orange_layer=MulLayer()
add_apple_orange_layer=AddLayer()
mul_tax_layer=MulLayer()

#forward
apple_price=mul_apple_layer.forward(apple,apple_num)
orange_price=mul_orange_layer.forward(orange,orange_num)
all_price=add_apple_orange_layer.forward(apple_price,orange_price)
price = mul_tax_layer.forward(all_price,tax)

#backward
dprice=1
dall_price,dtax=mul_tax_layer.backward(dprice)
dapple_price,dorange_price=add_apple_orange_layer.backward(dall_price)
dapple,dapple_num=mul_apple_layer.backward(dapple_price)
dorange,dorange_num=mul_orange_layer.backward(dorange_price)

print(price)
print(dapple_num,dapple,dorange,dorange_num,dtax)


715.0000000000001
110.00000000000001 2.2 3.3000000000000003 165.0 650


这边值得注意的一点是：dapple,dapple_num=...。这边的顺序需要和之前forward里面的顺序保持一致

In [9]:
class Relu:
    def __init__(self):
        self.mask=None
    def forward(self,x):
        self.mask = (x<=0)
        out=x.copy()
        out[self.mask]=0
        return out
    def backward(self,dout):
        dout[self.mask]=0
        dx=dout
        return dx

In [10]:
import numpy as np
x=np.array([[1.,-0.5],[-2.,3.]])
print(x)

[[ 1.  -0.5]
 [-2.   3. ]]


In [11]:
mask = (x<=0)
print(mask)

[[False  True]
 [ True False]]


In [1]:
class Sigmoid:
    def __init__(self):
        self.out = None
    def forward(self,x):
        out=1/(1+np.exp(-x))
        self.out=out
        return out
    def backward(self,dout):
        dx=dout*(1.0-self.out)*self.out
        return dx

In [3]:
import numpy as np
X=np.random.rand(2)
W=np.random.rand(2,3)
B=np.random.rand(3)
print(X.shape)
print(W.shape)
print(B.shape)

(2,)
(2, 3)
(3,)


In [4]:
Y=np.dot(X,W)+B

In [5]:
print(X)

[0.30807289 0.66669631]


In [6]:
X_dot_W=np.array([[0,0,0],[10,10,10]])
B=np.array([1,2,3])
print(X_dot_W)

[[ 0  0  0]
 [10 10 10]]


In [7]:
X_dot_W+B

array([[ 1,  2,  3],
       [11, 12, 13]])

In [8]:
dY = np.array([[1,2,3],[4,5,6]])
dY

array([[1, 2, 3],
       [4, 5, 6]])

In [9]:
print(dY)

[[1 2 3]
 [4 5 6]]


In [10]:
dB = np.sum(dY,axis=0)
dB

array([5, 7, 9])

In [11]:
class Affine:
    def __init__(self,W,b):
        self.W=W
        self.b=b
        self.x=None
        self.dW=None
        self.db=None
    def forward(self,x):
        self.x=x
        out=np.dot(x,self.W)+self.b
        return out
    def backward(self,dout):
        dx=np.dot(dout,self.W.T)
        self.dW=np.dot(self.x.T,dout)
        self.db=np.sum(dout,axis=0)
        return dx

In [12]:
def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y=exp_a/sum_exp_a
    return y
def cross_entropy_error(y,t):
    delta=1e-7
    return -np.sum(t*np.log(y+delta))

In [None]:
#因为从load_digits里面获取得到的标签都不是one-hot格式的，所以导致在计算cross entropy erorr的时候，出现了形状的错误
#t的形状是(3,)而不是(3,10)这显然不是我们想要的
def cross_entropy_error_new(y,t):
    delta=1e-7
    batch_size=y.shape[0]
    #如果t标签是一维数组而不是one-hot的情况
    #也就是load_digits里数据的情况
    if t.ndim == 1:
        t_onehot = np.zeros_like(y)
        t_onehot[np.arange(batch_size),t]=1
        t=t_onehot
    return -np.sum(t*np.log(y+delta))/batch_size
#这个cross_entropy_error_new是要在SoftmaxWithLoss的forward方法里面使用
    


In [39]:
def backward_f(y,t):
    batch_size=y.shape[0]
    if t.ndim == 1:
        t_onehot = np.zeros_like(y)
        t_onehot[np.arange(batch_size),t]=1
        t=t_onehot
    return (y-t)/batch_size


In [40]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss=None
        self.y=None
        self.t=None
    def forward(self,x,t):
        self.t= t
        self.y=softmax(x)
        self.loss=cross_entropy_error_new(self.y,self.t)
        return self.loss
    def backward(self,dout=1):
        #batch_size = self.t.shape[0]
        #dx=(self.y-self.t) / batch_size
        dx=backward_f(self.y,self.t)
    

In [14]:
class Relu:
    def __init__(self):
        self.mask=None
    def forward(self,x):
        self.mask = (x<=0)
        out=x.copy()
        out[self.mask]=0
        return out
    def backward(self,dout):
        dout[self.mask]=0
        dx=dout
        return dx

In [25]:
def numerical_gradient_edited(f,x):
    h=1e-4
    grad=np.zeros_like(x)
    it = np.nditer(x,flags=["multi_index"],op_flags=["readwrite"])
    while not it.finished:
        idx=it.multi_index
        original_value = x[idx]
        x[idx] = original_value + h
        fxh1=f(x)
        x[idx] = original_value - h
        fxh2=f(x)
        grad[idx] = (fxh1-fxh2)/(2*h)
        x[idx] = original_value
        it.iternext()
    return grad

In [None]:
class OrderedDict:
    def __init__(self):
        self.keys=[]
        self._values_list=[] #这边self的属性不可以和后续的values方法同一个名字，否则会覆盖掉
        self._dict={}  #内部字典用于快速查找
    
    def __setitem__(self,key,value):
        if key not in self._dict:
            self.keys.append(key)
            self._values_list.append(value)
        self._dict[key]=value
    
    def __getitem__(self,key):
        return self._dict[key]
    
    def values(self):
        return self._values_list.copy()
    
    def __contains__(self,key):
        return key in self._dict

In [41]:
class TwoLayerNet:
    def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
        self.params = {}
        self.params['W1']=weight_init_std*np.random.randn(input_size,hidden_size)
        self.params['b1']=np.zeros(hidden_size)
        self.params['W2']=weight_init_std*np.random.rand(hidden_size,output_size)
        self.params['b2']=np.zeros(output_size)

        #生成层
        self.layers=OrderedDict()
        self.layers['Affine1']=Affine(self.params['W1'],self.params['b1'])
        self.layers['Relu1']=Relu()
        self.layers['Affine2']=Affine(self.params['W2'],self.params['b2'])
        self.lastLayer = SoftmaxWithLoss()

    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self,x,t):
        y=self.predict(x)
        return self.lastLayer.forward(y,t)

    def accuracy(self,x,t):
        y=self.predict(x)
        y=np.argmax(y,axis=1)
        if t.ndim != 1 : t = np.argmax(t,axis=1)
        accuracy = np.sum(y==t)/float(x.shape[0])
        return accuracy
    
    
    def numerical_gradient(self,x,t):
        loss_W = lambda W : self.loss(x,t)
        grads={}
        global numerical_gradient_edited #设置为全局变量
        grads['W1']=numerical_gradient_edited(loss_W,self.params['W1'])
        grads['b1']=numerical_gradient_edited(loss_W,self.params['b1'])
        grads['W2']=numerical_gradient_edited(loss_W,self.params['W2'])
        grads['b2']=numerical_gradient_edited(loss_W,self.params['b2'])

        return grads
    
    def gradient(self,x,t):
        #forward
        self.loss(x,t)

        #backward
        dout=1
        dout=self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        grads={}
        grads['W1']=self.layers['Affine1'].dW
        grads['b1']=self.layers['Affine1'].db
        grads['W2']=self.layers['Affine2'].dW
        grads['b2']=self.layers['Affine2'].db

        return grads


In [18]:
#导入数据
from sklearn.datasets import load_digits
digits = load_digits()
x_train, x_test = digits.data[600:],digits.data[:600]
y_train,y_test = digits.target[600:],digits.target[:600]
# 注意训练集和测试集的比例最好 7：3， 8：2
print(x_train.shape)
print(x_test.shape) 
print(y_train.shape)
print(y_test.shape)

(1197, 64)
(600, 64)
(1197,)
(600,)


In [42]:
#误差反向传播的梯度确认
network=TwoLayerNet(input_size=64,hidden_size=50,output_size=10)
x_batch =x_train[:3]
y_batch=y_train[:3]
grad_numerical= network.numerical_gradient(x_batch,y_batch)
grad_backprop=network.gradient(x_batch,y_batch)
#求各个权重的绝对误差的平均值
for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key]-grad_numerical[key]))
    print(key+':'+str(diff))


TypeError: unsupported operand type(s) for *: 'NoneType' and 'float'