# 問題1 二次元畳み込み層の実装

# 問題2 2次元畳み込み後の出力サイズ

In [20]:
import numpy as np

class Conv2d:
    def __init__(self, filter_num,filter_h, filter_w,C,initializer,optimizer,
                 padding=0,stride=1):
        self.filter_num = filter_num
        self.C = C
        self.initializer = initializer
        self.optimizer = optimizer
        self.filter_h = filter_h
        self.filter_w = filter_w
        self.w = self.initializer.W(self.filter_num, self.C,
                          self.filter_h, self.filter_w)#(FN,C,FH,FW)
        print('wshape:', self.w.shape)
        self.b = self.initializer.B()#(FN,)
#         print('bの形状：', self.b.shape)
        self.params = {}
        self.grads = {}
        self.params['W'] = self.w
        self.params['B'] = self.b
        self.padding = padding
        self.stride = stride
        
    def forward(self,X):
        
#         print('forward処理')
        #out_sizeの計算
        self.N,self.C,self.H,self.W = X.shape
         
        self.oh = self.calc_out_size(self.H, self.filter_h, self.padding, 
                                       self.stride)
        self.ow = self.calc_out_size(self.W, self.filter_w, self.padding, 
                                       self.stride)
        
        #padding処理
        X = np.pad(X, [(0,0),(0,0),(self.padding, self.padding),
                      (self.padding, self.padding)], 'constant')
        
        
        self.X = X
        
        
        #箱を生成
        x_6d = np.zeros((self.N, self.oh, self.ow, self.C,
                         self.filter_h, self.filter_w))
        
        
        #xの値をx_6dに格納していく
        for i in range(self.oh):
            for j in range(self.ow):
                #print('out番号：%d : %d' % (i, j))
                x_6d[:, i, j, :, :, :] =\
                X[:, :, i * self.stride : i * self.stride + self.filter_h,
                  j * self.stride : j * self.stride + self.filter_w]
                    
        #print('x_6d: ', x_6d)
        
        #x_colの生成
        self.x_col = x_6d.reshape(self.N * self.oh * self.ow, self.C * \
                                  self.filter_h * self.filter_w)
        
        #w_colの生成
        self.w_col = self.w.reshape(self.filter_num, -1)#(FN, C * FH * FW)
        #print('行列積shape:', np.dot(self.x_col, self.w_col.T).shape)
#         print('biasshape:', self.b.shape)

        out_col = np.dot(self.x_col, self.w_col.T) + self.b#(N * oh * ow, FN)
        out = out_col.reshape(self.N, self.oh, self.ow, self.filter_num)
        #(N, oh, ow, FN) →(N, FN, oh, ow)
        out = out.transpose(0,3,1,2)
#         print('out_shape: ', out.shape)
        
        return out
    
    
    def backward(self, dout):
        #dout(N,FN,oh,ow)
        dout = dout.transpose(0,2,3,1)
        dout_col = dout.reshape(-1, self.filter_num)#(N * oh * ow, FN)
#         print('dout_col.shape: ', dout_col.shape)
        #dWの生成
        
        #dout_col(N * oh * ow, FN)
        #self.x_col(N * self.oh * self.ow, C * self.filter_h * self.filter_w)
        #self.w_col(FN, C * FH * FW)
        self.dW_col = np.dot(dout_col.T, self.x_col)#(FN, C*fh*fw)
        self.dW = self.dW_col.reshape(self.filter_num, self.C, 
                                      self.filter_h, self.filter_w)
        
        #dXの生成
        #(N * oh * ow, C * FH * FW)
        self.dX_col = np.dot(dout_col, self.w_col)
        self.dX_6d = self.dX_col.reshape(self.N, self.oh, self.ow, 
                                        self.C, self.filter_h,self.filter_w)
#         print('dX_6dshape(N,oh,ow,C,FH,FW): ', self.dX_6d.shape)
        
        #dX_6dをdXに入れ込む
        #(N,C,oh,ow,fh,fw)に変換
        self.dX_6d = self.dX_6d.transpose(0,3,1,2,4,5)
        #dXの箱を用意
        dX = np.zeros_like(self.X)#(N,C,H,W)
        for i in range(self.filter_h):
            for j in range(self.filter_w):
                
#                 print('抽出したdx_6dの形状：',
#                       self.dX_6d[:, :, :, :, i, j].shape)
                
                h_max = i + self.stride * self.oh
                w_max = j + self.stride * self.ow
                
#                 print('抽出したxの形状：', 
#                      self.dX[:, :, i : h_max : self.stride,
#                         j : w_max : self.stride].shape)
                
                #print('filter_number: %d, %d' % (i,j) )
                #print('Xの適用index(H)：{}'.format(
#                     np.arange(i, h_max, self.stride)))
#                 print('Xの適用index(W)：{}'.format(
                #    np.arange(j, w_max, self.stride)))
                
                
                dX[:, :, i : h_max : self.stride,
                        j : w_max : self.stride] +=self.dX_6d[:, :, :, :, i, j]
                
                
                
#         print('dX途中経過：', dX.shape)
        #dBの生成
        self.dB = np.sum(dout_col,axis=0)#(FN,)
#         print('dXの値:', dX)
        self.grads['W'] = self.dW
        self.grads['B'] = self.dB
        
        self.optimizer.update(self)
        
        return dX
    
        
        
        
        
        
        
    
    

        
        
        
    def calc_out_size(self, D, filter_length, padding, stride):
        out_size = (D - filter_length + 2 * padding)// stride + 1
        return out_size

In [27]:
X = np.arange(1,17).reshape(1,1,4,4).astype(np.float64)
filter_num = 4
filter_h = 2
filter_w = 2


conv = Conv2d(filter_num=filter_num,filter_h=filter_h,C=1,
              initializer = HeInitializer(0.01), optimizer=SGD(0.01),
              filter_w=filter_w, padding=1,stride=2)
out = conv.forward(X)
print('out: ', out)

dout = conv.backward(out)
print('dout: ', dout)



W形状： (4, 1, 2, 2)
wshape: (4, 1, 2, 2)
out:  [[[[  0.81479487  -3.29323371  -9.21040486]
   [  7.02100653 -18.39820853 -32.94925059]
   [  9.06308363   2.6665363   -8.37124022]]

  [[  0.86416695   2.31505472  -3.09964626]
   [ 22.37955033  18.73173979 -13.78701578]
   [ 26.87019681  21.44691079 -11.52090804]]

  [[  2.79750485   1.91362598  -2.32566051]
   [  6.93698682  -0.1873003   -7.0225915 ]
   [ -1.06941921   5.80157689  10.54089669]]

  [[  0.22795603   1.53112317   1.72349831]
   [ 10.4483462   20.16121921   7.28830676]
   [ 21.95791733  29.0616289    4.26461178]]]]
dout:  [[[[  0.48776654   2.05687436   1.63686343   4.80960371   6.44865861
     -9.14626557]
   [ -5.54390615   3.40941515   4.98413155   3.61160778  27.58707262
     -8.76658454]
   [-12.84555048  67.72142544   2.52270798  61.35603198  25.9075723
    -36.82590559]
   [-34.69052681  40.2466297   41.1813256   23.41717844  99.47577364
    -33.94674721]
   [-18.20949317  99.93521715  -5.44482755  94.58313514  19.2247

In [28]:
print(np.arange(1,11).reshape(2,5))

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]


In [29]:
test_arr = np.arange(1,5)
test_arr

array([1, 2, 3, 4])

In [30]:
stride = 2
test_arr[1 : len(test_arr) : stride]

array([2, 4])

# 問題3 最大プーリング層の作成

In [31]:
class MaxPooling:
    def __init__(self,pool_h, pool_w, stride, padding=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.padding = padding

    def forward(self, X):
        #X = (N,FN,H,W)
        self.N, self.C, self.H, self.W = X.shape
        
        self.X = X.astype(np.float32)
        self.out_h = self.calc_out_size(self.H, self.pool_h, 
                                       self.padding, self.stride)
        self.out_w = self.calc_out_size(self.W, self.pool_w, 
                                       self.padding, self.stride)
        
        #print('out_h: ', self.out_h)
        self.x_6d = np.zeros((self.N, self.C, self.out_h, self.out_w, 
                             self.pool_h, self.pool_w))
        
        for i in range(self.out_h):
            for j in range(self.out_w):
                self.x_6d[:, :, i, j, :, :] = \
                X[:, :, i * self.stride : i * self.stride + self.pool_h,
                 j * self.stride : j * self.stride + self.pool_w]
        
        
        #print('x_6d.shape', self.x_6d.shape)
        self.x_col = self.x_6d.reshape(self.N * self.C * self.out_h * self.out_w,
                               self.pool_h * self.pool_w)
        #print('x_col: ', self.x_col)
        self.arg_index = np.argmax(self.x_col, axis=1)
        self.x_max = np.max(self.x_col, axis=1)
        #print('x_max.shape: ', self.x_max.shape)
        out = self.x_max.reshape(self.N, self.C, self.out_h, self.out_w)
        #print('out.shape: ',out.shape)
        
        return out
    
    def backward(self,dout):
        self.dx_col = np.zeros_like(self.x_col)
        #print('arg_index: ', self.arg_index)
        self.dx_col[:, self.arg_index] = dout.reshape(-1,1)
        #print('dx_col: ', self.dx_col)
        self.dx_6d = self.dx_col.reshape(self.N, self.C, self.out_h,
                                         self.out_w, self.pool_h, self.pool_w)
        
        self.dx = np.zeros_like(self.X)
        for i in range(self.out_h):
            for j in range(self.out_w):
                
#                 print('部分dxの形状：',
#                      self.dx[:, :, i * self.stride : i * self.stride + self.pool_h,
#                         j * self.stride : j * self.stride + self.pool_w].shape)
                
                self.dx[:, :, i * self.stride : i * self.stride + self.pool_h,
                        j * self.stride : j * self.stride + self.pool_w] += \
                self.dx_6d[:, :, i, j, :, :]
        #print('self.dx:', self.dx)
        return self.dx
        
        
    def calc_out_size(self, D, filter_length, padding, stride):
        out_size = (D - filter_length + 2 * padding)// stride + 1
        return out_size

In [32]:
pool_arr = np.arange(1,49).reshape(1,3,4,4)
print('pool_arr: ', pool_arr)
mp = MaxPooling(pool_h=2, pool_w=2, stride=2)
out = mp.forward(pool_arr)
dout = mp.backward(out)
print(out)

pool_arr:  [[[[ 1  2  3  4]
   [ 5  6  7  8]
   [ 9 10 11 12]
   [13 14 15 16]]

  [[17 18 19 20]
   [21 22 23 24]
   [25 26 27 28]
   [29 30 31 32]]

  [[33 34 35 36]
   [37 38 39 40]
   [41 42 43 44]
   [45 46 47 48]]]]
[[[[ 6.  8.]
   [14. 16.]]

  [[22. 24.]
   [30. 32.]]

  [[38. 40.]
   [46. 48.]]]]


# 問題4 平均値プーリングの実装

In [33]:
class MeanPooling:
    
    def __init__(self, pool_h, pool_w, stride, padding=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.padding = padding
        
        
    def forward(self, X):
        self.N, self.C, self.H, self.W = X.shape
        self.oh = self.calc_out_size(self.H, self.pool_h, self.padding,
                                    self.stride)
        self.ow = self.calc_out_size(self.W, self.pool_w, self.padding,
                                    self.stride)
        
        self.x_6d = np.zeros((self.N, self.C, self.oh, self.ow,
                              self.pool_h, self.pool_w))
        print('x_6d.shape: ', self.x_6d.shape)
        for i in range(self.pool_h):
            for j in range(self.pool_w):
               
                h_max = i + self.stride * self.oh
                w_max = j + self.stride * self.ow
        
                self.x_6d[:, :, :, :, i, j] = X[:, :, i : h_max : self.stride,
                                               j : w_max : self.stride]
        
        self.x_col = X.reshape(self.N * self.C * self.oh * self.ow,
                              self.pool_h * self.pool_w)
        print('x_col.shape: ', self.x_col.shape)
        self.x_mean = np.mean(self.x_col, axis=1)
        self.out = self.x_mean.reshape(self.N, self.C, self.oh, self.ow)
        return self.out
    
    def backward(self, dout):
        #dout: shape=(N, C, oh, ow)
        dout = dout.reshape(-1,1)
        self.dx_col = dout.repeat(self.pool_h * self.pool_w, axis=1)
        self.dx_6d = self.dx_col.reshape(self.N, self.C, self.oh, self.ow,
                                        self.pool_h, self.pool_w)
        
        dX = np.zeros((self.N, self.C, self.H, self.W))
        
        for i in range(self.pool_h):
            for j in range(self.pool_w):
                
                h_max = i + self.stride * self.oh
                w_max = j + self.stride * self.ow
                
                dX[:, :, i : h_max : self.stride, j : w_max : self.stride] += \
                self.dx_6d[:, :, :, :, i, j]
                
        return dX
                
        
        
        
        
        
        
    def calc_out_size(self, D, filter_length, padding, stride):
        out_size = (D - filter_length + 2 * padding)// stride + 1
        return out_size
        

In [34]:
pool_arr = np.arange(1,49).reshape(1,3,4,4)
print('pool_arr: ', pool_arr)
mp = MeanPooling(pool_h=2, pool_w=2, stride=2)
out = mp.forward(pool_arr)
dout = mp.backward(out)
print(out)

pool_arr:  [[[[ 1  2  3  4]
   [ 5  6  7  8]
   [ 9 10 11 12]
   [13 14 15 16]]

  [[17 18 19 20]
   [21 22 23 24]
   [25 26 27 28]
   [29 30 31 32]]

  [[33 34 35 36]
   [37 38 39 40]
   [41 42 43 44]
   [45 46 47 48]]]]
x_6d.shape:  (1, 3, 2, 2, 2, 2)
x_col.shape:  (12, 4)
[[[[ 2.5  6.5]
   [10.5 14.5]]

  [[18.5 22.5]
   [26.5 30.5]]

  [[34.5 38.5]
   [42.5 46.5]]]]


# 問題5 平滑化

In [35]:
class Flatten:
    def __init__(self):
        pass
    
    def forward(self,X):
        self.N, self.C, self.H, self.W = X.shape
        flatten_X = X.reshape(self.N, -1)
        return flatten_X
    
    def backward(self, dout):
        dout = dout.reshape(-1, self.C, self.H, self.W)
        return dout
    
        

# 問題7 LeNet

In [36]:
import numpy as np
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split


(X_train, y_train), (X_test, y_test) = mnist.load_data()


X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255
print(X_train.max())
print(X_train.min())
print(X_train.shape)
print(y_train.shape)


y_train_one_hot = to_categorical(y_train, num_classes=10)
print(y_train.shape)
print(y_train_one_hot.shape) # (60000, 10)
print(y_train_one_hot.dtype) # float64
print(type(y_train_one_hot))


X_train, X_val, y_train, y_val = train_test_split(X_train, y_train_one_hot,
                                                 test_size=0.2)

Using TensorFlow backend.


1.0
0.0
(60000, 28, 28)
(60000,)
(60000,)
(60000, 10)
float32
<class 'numpy.ndarray'>


In [39]:
Conv1_params = {'filter_num' : 6, 'filter_h' : 5, 'filter_w' : 5,'C' : 1,
                'initializer' : HeInitializer(0.01),'optimizer' : SGD(0.01),
                'padding' : 2}

Conv2_params = {'filter_num' : 16,'filter_h' : 5,'filter_w' : 5,'C' : 6,
               'initializer' : HeInitializer(0.01),'optimizer' : SGD(0.01)}

Conv3_params = {'filter_num' : 120,'filter_h' : 5,'filter_w' : 5,'C' : 16,
               'initializer' : HeInitializer(0.01),'optimizer' : SGD(0.01)}

conv1 = Conv2d(**Conv1_params)
X_train_batch = X_train[:10,np.newaxis, :, :]
out = conv1.forward(X_train_batch)

mp1 = MaxPooling(pool_h=2, pool_w=2, stride=2)
out = mp1.forward(out)

conv2 = Conv2d(**Conv2_params)
out = conv2.forward(out)

mp2 = MaxPooling(pool_h=2, pool_w=2, stride=2)
out = mp2.forward(out)

conv3 = Conv2d(**Conv3_params)
out = conv3.forward(out)



#pool

W形状： (6, 1, 5, 5)
wshape: (6, 1, 5, 5)
W形状： (16, 6, 5, 5)
wshape: (16, 6, 5, 5)
W形状： (120, 16, 5, 5)
wshape: (120, 16, 5, 5)


In [395]:
#32*32→conv1→28*28→pool1→14*14→conv2→10*10→pool2→5*5
#→conv3→1*1→flatten→FC→84→FC→last_layer→10

# 問題6 学習と推定

In [None]:
Conv1_params = {'filter_num' : 6, 'filter_h' : 5, 'filter_w' : 5,'C' : 1,
                'padding' : 2}
MaxPool1_params = {'pool_h' : 2, 'pool_w' : 2, 'stride' : 2}
Conv2_params = {'filter_num' : 16,'filter_h' : 5,'filter_w' : 5,'C' : 6}
MaxPool2_params = {'pool_h' : 2, 'pool_w' : 2, 'stride' : 2}
Conv3_params = {'filter_num' : 120,'filter_h' : 5,'filter_w' : 5,'C' : 16}
n_nodes1 = 120
n_nodes2=84
n_classes=10

network = LeNet(Conv1_params=Conv1_params,
                MaxPool1_params=MaxPool1_params,
                Conv2_params=Conv2_params,
                MaxPool2_params=MaxPool2_params,
                Conv3_params=Conv3_params,
                n_nodes1=n_nodes1, n_nodes2=n_nodes2,
                 n_classes=n_classes,
                initializer=HeInitializer, optimizer=SGD,
                sigma=0.01, opt_params={'lr' : 0.01})

trainer = Trainer(model=network, n_epochs=40, batch=10)

X_train_1000 = X_train[:1000,np.newaxis,:,:]
trainer.fit(X_train_1000,y_train[:1000])


network.accuracy(X_train_1000,y_train[:1000])

W形状： (6, 1, 5, 5)
wshape: (6, 1, 5, 5)
W形状： (16, 6, 5, 5)
wshape: (16, 6, 5, 5)
W形状： (120, 16, 5, 5)
wshape: (120, 16, 5, 5)
W形状： (120, 84)
W形状： (84, 10)
*********1エポック*********
loss: 2.449403084054159
*********2エポック*********
loss: 2.3387537091489516
*********3エポック*********
loss: 2.3302676856882356
*********4エポック*********
loss: 2.3057153384794566
*********5エポック*********
loss: 2.305324008528191
*********6エポック*********
loss: 2.307878187493806
*********7エポック*********
loss: 2.3058301688452936
*********8エポック*********
loss: 2.3043458953105613
*********9エポック*********
loss: 2.3049165884925835
*********10エポック*********
loss: 2.3023771127577204
*********11エポック*********
loss: 2.29675400599669
*********12エポック*********
loss: 2.2995135496437826
*********13エポック*********
loss: 2.297605902838801
*********14エポック*********
loss: 2.2959795315763967
*********15エポック*********
loss: 2.29377217524258
*********16エポック*********
loss: 2.312725813873443
*********17エポック*********
loss: 2.2929387221314785
*********18エポッ

In [349]:
import keras

In [363]:
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D
from keras.optimizers import SGD
from keras.layers import Activation, Flatten, Dense

model = Sequential()
model.add(Conv2D(6, kernel_size=5, input_shape=(28,28,1),
                padding='same', activation='relu',
                 kernel_initializer='he_normal'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16, kernel_size=5, padding='valid',
                 activation = 'relu', 
                kernel_initializer='he_normal'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16, kernel_size=5, padding='valid',
                activation='relu',
                kernel_initializer='he_normal'))
model.add(Flatten())
model.add(Dense(84,activation='relu',
               kernel_initializer='he_normal'))
model.add(Dense(10, activation='relu',
               kernel_initializer='he_normal'))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', 
             optimizer=SGD(lr=0.01), metrics=['accuracy'])

X_train_1000 = X_train[:1000,:,:, np.newaxis]
X_train_all = X_train[:, :, :, np.newaxis]
history = model.fit(X_train_1000, y_train[:1000],
                   batch_size=10, epochs=10, verbose=1,
                   validation_data=(X_train_1000, y_train[:1000]))

score = model.evaluate(X_train_1000, y_train[:1000],
                      verbose=0)
print('train loss:', score[0])
print('train accuracy:', score[1])

Train on 1000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train loss: 0.6298545203208923
train accuracy: 0.8550000190734863


In [355]:
params = {'de' : 2}

In [10]:
class FC:
    """
    ノード数n_nodes1からn_nodes2への全結合層
    Parameters
    ----------
    n_nodes1 : int
      前の層のノード数
    n_nodes2 : int
      後の層のノード数
    initializer : 初期化方法のインスタンス
    optimizer : 最適化手法のインスタンス
    """
    def __init__(self, n_nodes1, n_nodes2, initializer, optimizer):
        self.optimizer = optimizer
        self.n_nodes1 = n_nodes1
        self.n_nodes2 = n_nodes2
        self.initializer = initializer
        self.optimizer = optimizer
        
        
        # 初期化
        # initializerのメソッドを使い、self.Wとself.Bを初期化する
        self.params = {}
        self.grads = {}
        self.W = self.initializer.W(self.n_nodes1, self.n_nodes2)
        self.B = self.initializer.B()
        self.params['W'] = self.W
        self.params['B'] = self.B
        self.X = None
        self.dW = np.zeros_like(self.W)
        self.dB = np.zeros_like(self.B)
        self.grads['W'] = self.dW
        self.grads['B'] = self.dB
        
        
    def forward(self, X):
        """
        フォワード
        Parameters
        ----------
        X : 次の形のndarray, shape (batch_size, n_nodes1)
            入力
        Returns
        ----------
        A : 次の形のndarray, shape (batch_size, n_nodes2)
            出力
        """        
        self.X = X
        A = np.dot(X,self.W) + self.B
        return A
    def backward(self, dA):#x=(N,D) w=(D,H),A=(N,H))
#         print('dAの値：', dA)
#         print('Xの値：', self.X)
        dZ = np.dot(dA, self.W.T)
        self.grads['W'][...] = np.dot(self.X.T, dA)
        self.grads['B'][...] = np.sum(dA,axis=0)
        """
        バックワード
        Parameters
        ----------
        dA : 次の形のndarray, shape (batch_size, n_nodes2)
            後ろから流れてきた勾配
        Returns
        ----------
        dZ : 次の形のndarray, shape (batch_size, n_nodes1)
            前に流す勾配
        """
        # 更新
        self.optimizer.update(self)
        return dZ

In [11]:
class SimpleInitializer:
    """
    ガウス分布によるシンプルな初期化
    Parameters
    ----------
    sigma : float
      ガウス分布の標準偏差
    """
    def __init__(self, sigma):
        self.sigma = sigma
    def W(self, n_nodes1, n_nodes2,n_nodes3=None,
          n_nodes4=None):
        
        
        """
        重みの初期化
        Parameters
        ----------
        n_nodes1 : int
          前の層のノード数
        n_nodes2 : int
          後の層のノード数

        Returns
        ----------
        W :
        """
        if n_nodes3 is None:
            W = np.random.randn(n_nodes1, n_nodes2) * self.sigma
            self.B_nodes = n_nodes2
        else:
            #(FN, C, FH, FW)
            W = np.random.randn(n_nodes1, n_nodes2,
                                n_nodes3, n_nodes4) * self.sigma
            self.B_nodes = n_nodes1
        
        return W
    def B(self):
        """
        バイアスの初期化
        Parameters
        ----------
        n_nodes2 : int
          後の層のノード数

        Returns
        ----------
        B :
        """
        #(b or FN)
        B = np.random.randn(self.B_nodes,)
        return B

In [12]:
class SGD:
    """
    確率的勾配降下法
    Parameters
    ----------
    lr : 学習率
    """
    def __init__(self, lr):
        self.lr = lr
    def update(self, layer):
        grads = layer.grads
        for key in layer.params.keys():
            #print('勾配名：', key)
            #print('勾配形状：', grads[key].shape)
            layer.params[key] -= self.lr * grads[key]
            
    
        """
        ある層の重みやバイアスの更新
        Parameters
        ----------
        layer : 更新前の層のインスタンス
        """
        

In [13]:
class Sigmoid:
    
    def __init__(self):
        self.z = None
        
    def forward(self,x):
        z = 1 / (1 + np.exp(-x))
        self.z = z
        return z
    
    def backward(self,dout):
        return self.z * (1 - self.z) * dout
    

In [14]:
class SoftmaxWithLoss:
    def init(self):
        self.y = None
        self.t = None
    
    def forward(self,a,t):
        self.t = t
        a_max = np.max(a,axis=1).reshape(-1,1)
        a_exp = np.exp(a - a_max)
        a_sum = np.sum(a_exp,axis=1)
        y = a_exp / a_sum.reshape(-1,1)
        self.y = y
        
        loss = self.mean_cross_entropy(y,t)
        return loss
    
    def backward(self,dout=1):
        return (self.y - self.t) / len(self.t)
    
    def mean_cross_entropy(self,y,t,eps=1e-7):
        return - np.sum(t * np.log(y + eps)) / len(t)
        

In [15]:
class Trainer:
    
    def __init__(self,model, n_epochs, batch):
        self.model = model
        self.n_epochs = n_epochs
        self.batch = batch
        self.loss_list = []
        
        
        
    def fit(self,X,y):
        
        for i in range(self.n_epochs):
            print('*********' + str(i+1) + 'エポック*********')
            get_mini_batch = GetMiniBatch(X, y, batch_size=20)
            count = 0
            for x_mini, y_mini in get_mini_batch:
                count += 1
                loss = self.model.forward(x_mini,y_mini)
                
                self.model.backward()
                if count == len(get_mini_batch):
                    self.loss_list.append(loss)
                    print('loss:',loss)
        

In [16]:
class GetMiniBatch:
    def __init__(self, X, y, batch_size=20, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self._X = X[shuffle_index]
        self._y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0] / self.batch_size).astype(np.int)
        #self.stopは作成するバッチサイズ数。１エポック分作成する
        
        
    def __len__(self):
        return self._stop
    
    #指定したバッチ番号を取ってきてくれる
    def __getitem__(self, item):
        p0 = item * self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self._X[p0 : p1], self._y[p0 : p1]

    
    
    #batchカウンターを初期化する
    def __iter__(self):
        self._counter = 0
        return self
    
    #batchを前から一つずつ取ってくる
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter * self.batch_size
        p1 = self._counter * self.batch_size + self.batch_size
        self._counter += 1
        return self._X[p0 : p1], self._y[p0 : p1]

In [17]:
class HeInitializer:
    """
    ガウス分布によるシンプルな初期化
    Parameters
    ----------
    sigma : float
      ガウス分布の標準偏差
    """
    def __init__(self,sigma):
        self.sigma = sigma
        self.n_nodes1 = None
        self.n_nodes2 = None
        self.n_nodes3 = None
        self.n_nodes4 = None
        
    
    def W(self, n_nodes1, n_nodes2, n_nodes3=None, n_nodes4=None):
        
        if n_nodes3 is None:
            self.n_nodes1 = n_nodes1
            self.n_nodes2 = n_nodes2
            self.n_nodes3 = n_nodes3
            self.n_nodes4 = n_nodes4
            """
            重みの初期化
            Parameters
            ----------
            n_nodes1 : int
              前の層のノード数
            n_nodes2 : int
              後の層のノード数

            Returns
            ----------
            W :
            """
            W = np.random.randn(n_nodes1, n_nodes2) * \
            np.sqrt(2 / n_nodes1)
            
        else:
            
            self.n_nodes1 = n_nodes1
            self.n_nodes2 = n_nodes2
            self.n_nodes3 = n_nodes3
            self.n_nodes4 = n_nodes4
            
            W = np.random.randn(n_nodes1, n_nodes2, n_nodes3,
                               n_nodes4) * np.sqrt(2 / n_nodes2)
            
        print('W形状：', W.shape)
        return W
    def B(self):
        """
        バイアスの初期化
        Parameters
        ----------
        n_nodes2 : int
          後の層のノード数

        Returns
        ----------
        B :
        """
        if self.n_nodes3 is None:
            B = np.random.randn(self.n_nodes2,) * \
            np.sqrt(2 / self.n_nodes1)
        else:
            B = np.random.randn(self.n_nodes1,) * \
            np.sqrt(2 / self.n_nodes2)
            
        return B

In [18]:
class ReLU:
    
    def __init__(self):
        self.X = None
        self.X_mask = None#0以下ならTrue
        
    def forward(self, X):
        self.X = X
        self.X_mask = self.X <= 0
        self.X[self.X_mask] = 0
        return self.X
    
    def backward(self,dout):
        dX = np.zeros_like(self.X)
        dX[~self.X_mask] = 1
        return dX * dout

In [19]:
import torch