In [None]:
!git clone https://github.com/oreilly-japan/deep-learning-from-scratch.git

Cloning into 'deep-learning-from-scratch'...
remote: Enumerating objects: 453, done.[K
remote: Total 453 (delta 0), reused 0 (delta 0), pack-reused 453[K
Receiving objects: 100% (453/453), 5.52 MiB | 31.92 MiB/s, done.
Resolving deltas: 100% (235/235), done.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mnist 

多次元配列における数値微分

In [None]:
def numerical_grad(f,x):
  h = 1e-4
 
  grad = np.zeros_like(x)
  for i in range(x.shape[0]):
    if x.ndim == 1:
      tmp_val = x[i]
      x[i] = tmp_val + h
      fx1 = f(x)

      x[i] = tmp_val - h
      fx2 = f(x)

      grad[i] = (fx1-fx2) / (2*h)
      x[i] = tmp_val

    else:
      for j in range(x.shape[1]):
        tmp_val = x[i][j]
        x[i][j] = tmp_val + h
        fx1 = f(x)

        x[i][j] = tmp_val - h
        fx2 = f(x)

        grad[i][j] = (fx1-fx2) / (2*h)
        x[i][j] = tmp_val

  
  return grad

    
  

In [None]:
x = np.array([[1.,2.],[3.,4.]])
def circle(x):
  return np.sum(x**2)

print(numerical_grad(circle,x))



[[2. 4.]
 [6. 8.]]


クラス定義

In [None]:
#次のようなクラスを作ってみる
class Hoge:
  def __init__(self):
    self.hoge = -1
  def progress(self,recent_score):
    if self.hoge == -1:
      print('初参加です')
    elif recent_score > self.hoge:
      print('Highestを更新しました')
    else:
      print('精進が足りないようです') 

    self.hoge = max([self.hoge,recent_score])   


In [None]:
hoge = Hoge()
hoge.progress(100)
hoge.progress(300)
hoge.progress(200)

初参加です
Highestを更新しました
精進が足りないようです


In [None]:
#勾配降下法のめちゃくちゃ単純なやつ
#勾配計算
def numerical_gradient_val(f,x,h = 1e-4):
  return (f(x+h) - f(x-h))/ (2*h)

#損失関数の代わり
def circle(x,y):
  return x ** 2 + y ** 2


#勾配降下法
lr = 0.1
iters = 1000

#初期値
(x,y) = (10,10)

#学習
for i in range(iters):
  #xで偏微分
  circle_x = lambda x:circle(x,y)
  dx = numerical_gradient_val(circle_x,x)

  #yで偏微分
  circle_y = lambda y:circle(x,y)
  dy = numerical_gradient_val(circle_y,y)

  #パラメータの更新
  x -= lr*dx
  y -= lr*dy

print(x,y)


6.6836193494284636e-21 6.6836193494284636e-21


In [None]:
#インスタンスを保持する場合
#SGDをクラス化してみよう
class SGD:
  #ハイパーパラメータの格納
  def __init__(self,learning_rate):
      self.lr = learning_rate 

  #パラメータの更新    
  def update(self,params,grads):
    for i in range(len(params)):
      params[i] -= self.lr * grads[i]


In [None]:
params = [10,10]
optimizer = SGD(learning_rate=0.1)
for i in range(iters):
  grads = []
  for i in range(len(params)):
    #勾配計算
    x = params[i]
    circle_x = lambda x:circle(x,y)
    grads.append(numerical_gradient_val(circle_x,x))
    
  #パラメータの更新
  optimizer.update(params,grads)

print(params)    
  

[6.696854239229312e-21, 6.696854239229312e-21]


ニューラルネットワークのクラス実装

In [None]:
def sigmoid(x):
  return 1/(1+np.exp(-x))
def softmax(x):
  x = x - np.max(x, axis=-1, keepdims=True)   
  return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)  
def cross_entropy_error(y, t):
  if y.ndim == 1:
      t = t.reshape(1, t.size)
      y = y.reshape(1, y.size)
        
  if t.size == y.size:
      t = t.argmax(axis=1)
             
  batch_size = y.shape[0]
  return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size


In [None]:
(x_train,t_train),(x_test,t_test) = mnist.load_mnist(normalize=True,flatten=True,one_hot_label=True)


In [None]:
#中間層ノード数50の、mnist画像解析用2層NNを実装してください。
#名称は、すでに指示があるものについてはそれに従い、指示のないものはできるだけ見やすいものにしてください（競プロコードだと泣いてしまいます）

class TwoLayerNet:
  def __init__(self,weight_init_std = 0.01,input_size = 784,middle_size=50,output_size=10):
  #====インスタンスを格納してください====
    self.weight_init_std = weight_init_std
    self.input_size = input_size
    self.middele_size = middle_size
    self.output_size = output_size
    self.params = {}

   #ハイパーパラメータを渡してください
   #パラメータDictを保持してください
   #パラメータ初期値は、重みW1,W2: 標準偏差=weight_init_std の正規分布に設定してください
                #バイアスb1,b2: ゼロ行列に設定してください
    self.params['w1'] = weight_init_std*np.random.randn(input_size,middle_size)
    self.params['w2'] = weight_init_std*np.random.randn(middle_size,output_size)
    self.params['b1'] = weight_init_std*np.zeros(middle_size)
    self.params['b2'] = weight_init_std*np.zeros(output_size)

  #=================================

  #必要な関数（スライド参照）を定義してください
  def predict(self,x):
      w1,b1,w2,b2 = self.params['w1'],self.params['b1'],self.params['w2'],self.params['b2']
      a1 = np.dot(x,w1)+b1
      z1 = sigmoid(a1)
      a2 = np.dot(z1,w2)+b2
      z2 = sigmoid(a2)
      y = softmax(z2)
      return y

  def loss(self,x,t):
      y = self.predict(x)
      return cross_entropy_error(y,t)  

  def numerical_grad(self,x,t):
      loss = lambda w: self.loss(x,t)
      grads = {}
      grads['w1'] = numerical_grad(loss,self.params['w1'])
      grads['w2'] = numerical_grad(loss,self.params['w2'])
      grads['b1'] = numerical_grad(loss,self.params['b1'])
      grads['b2'] = numerical_grad(loss,self.params['b2'])

      return grads

  def accuracy(self,x,t):
      y = self.predict(x,t)
      y = np.argmax(y, axis=1)
      if t.ndim != 1:
        t = np.argmax(t, axis=1)
      accuracy = np.sum(y == t) / x.shape[0]
      return accuracy 


        




学習

In [None]:
epochs = 10
train_size = 60000
batch_size = 100
learning_rate = 0.1
NN = TwoLayerNet(input_size = 784,middle_size=50,output_size=10)



#====データをロードしてください====


train_loss_list = []
train_acc_list = []
test_acc_list = []
iters_per_epoch = train_size//batch_size
#学習
for epoch in range(epochs):
  for iter in range(iters_per_epoch):
    #====スライドに従って学習を行ってください====
    #バッチ抽出
    batch_mask = np.random.choice(train_size,batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    loss = NN.loss(x_batch,t_batch)
    grads = NN.numerical_grad(x_batch,t_batch)

    NN.params['w1'] -= learning_rate*grads['w1']
    NN.params['b1'] -= learning_rate*grads['b1']
    NN.params['w2'] -= learning_rate*grads['w2']
    NN.params['b2'] -= learning_rate*grads['b2']
    #======================================
    print('======= iters'+iter+'=======')
    print(loss)
    print(train_acc)
    print(test_acc)

  #====途中過程をtrain_loss_list,train_acc_list,test_acc_listに格納してください====
  train_loss_list.append(loss)

  train_acc = NN.accuracy(x_train,t_train)
  test_acc = NN.accracy(x_train,t_train)
  train_acc_list.append(train_acc)
  test_acc_list.append(test_acc)
  


#ちゃんと出力されたらOK
print('loss:',train_loss_list[-1])
print('train_acc:',train_acc_list[-1])  
print('test_acc:',test_acc_list[-1])

NameError: ignored

可視化

In [None]:
x = np.arange(1,epochs+1)

plt.plot(x,train_loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

plt.plot(x,train_acc_list,'b',label='trsin')
plt.plot(x,test_acc_list,'r',label='test')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend()
plt.show()
