# 学習

ステップ１で作った関数をもう一度実装

In [1]:
# numpyを導入
import numpy as np

# モデル作成
def create_model():    
    model = {
        # 荷重を -5 ~ 5 の乱数で初期化
        "weights" : np.random.uniform(-5, 5, 2),    
        # バイアスも！
        "bias"    : np.random.uniform(-5, 5, 1)}
    return model

# 推論
def predict(model, activation, x):
    
    # 足し算を計算し…
    y = model["weights"][0] * x[0] + model["weights"][1] * x[1] + model["bias"]
    
    # 活性化で処理し、その結果を返す
    y = activation(y)
    return y

# 線形活性化関数
def linear_activation(x):
    return x

# ステップ活性化関数
def step_activation(x):
    if x >= 0.5:
        return 1
    else:
        return 0    

## 誤差を計算

正しい答え（ラベル）と推論した答えの差分を用い、学習させる。

まず、「AND」のラベルを準備しよう：

In [2]:
# 入力
x_list = np.array([
    [0, 0], 
    [0, 1], 
    [1, 0], 
    [1, 1]
], dtype = float)
print(x_list.shape)

#期待してる出力（ラベル）
y_true = np.array([
    [0], 
    [0], 
    [0], 
    [1]
], dtype = float)
print(y_true.shape)

(4, 2)
(4, 1)


誤差（損失）関数を実装しよう。

課題により、適切な関数を使うべきが、今回の入門課題はただの「差分」にしよう。

In [3]:
# 損失関数
def error(y_true, y_pred):
    return y_true - y_pred

In [4]:
# 推論とラベルの誤差は：
model = create_model()

In [5]:
# 結果を表示する
def print_results(model, activation, x_list, y_true):
    
    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]
    
    for i in range(data_size):
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        err = error(y_t, y_p)
        print(x, y_t, "->", y_p, "err:", err)    

In [6]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> [-4.76588595] err: [4.76588595]
[0. 1.] [0.] -> [-6.4132334] err: [6.4132334]
[1. 0.] [0.] -> [-4.46310244] err: [4.46310244]
[1. 1.] [1.] -> [-6.1104499] err: [7.1104499]


In [7]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [0.] -> 0 err: [0.]
[1. 0.] [0.] -> 0 err: [0.]
[1. 1.] [1.] -> 0 err: [1.]


## 学習
誤差にて、荷重を調整しよう。ただ、「入力」は「０」であると、出力に影響がないため、入力は「１」のときだけに荷重を調整する、つまり：

$$ w_i' = w_i + x_i \cdot error(y_{true}, y_{pred}) $$ 


In [8]:
# 荷重を更新する関数
def update_weight(w, x, err):
    return w + x * err

# 学習は「fit」とよく言われる
def fit_single_step(model, activation, x_list, y_true):

    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]
    
    # 誤差の平均
    mse = 0
    
    # さて、１個ずつを処理しよう
    for i in range(data_size):
        
        # 推論
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        
        # 誤差を計算
        err  = error(y_t, y_p)
        mse += err * err
            
        # 荷重を更新
        w0   = model["weights"][0]
        w1   = model["weights"][1]
        bias = model["bias"]
        
        w0   = update_weight(w0, x[0], err)
        w1   = update_weight(w1, x[1], err)
        bias = update_weight(bias, 1 , err)
        
        model["weights"][0] = w0[0]
        model["weights"][1] = w1[0]
        model["bias"] = bias[0]
    
    #誤差（損失）としては、平均値を返す
    return mse / data_size        

In [9]:
loss = fit_single_step(model, linear_activation, x_list, y_true)
print("loss:", loss)

loss: [9.48342647]


In [10]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> 2.647347455187591 err: [-2.64734746]
[0. 1.] [0.] -> 5.597478412325592 err: [-5.59747841]
[1. 0.] [0.] -> 3.950130957138001 err: [-3.95013096]
[1. 1.] [1.] -> 6.900261914276001 err: [-5.90026191]


In [11]:
def fit(model, activation, x_list, y_true, epochs):
    
    for i in range(epochs):
        loss = fit_single_step(model, activation, x_list, y_true)
        print("epoch:", i, "loss:", loss)

In [12]:
fit(model, linear_activation, x_list, y_true, 100)

epoch: 0 loss: [4.71113339]
epoch: 1 loss: [7.13221918]
epoch: 2 loss: [4.71517769]
epoch: 3 loss: [12.23374189]
epoch: 4 loss: [6.71922199]
epoch: 5 loss: [19.3352646]
epoch: 6 loss: [10.72326628]
epoch: 7 loss: [28.4367873]
epoch: 8 loss: [16.72731058]
epoch: 9 loss: [39.53831001]
epoch: 10 loss: [24.73135487]
epoch: 11 loss: [52.63983272]
epoch: 12 loss: [34.73539917]
epoch: 13 loss: [67.74135543]
epoch: 14 loss: [46.73944347]
epoch: 15 loss: [84.84287814]
epoch: 16 loss: [60.74348776]
epoch: 17 loss: [103.94440085]
epoch: 18 loss: [76.74753206]
epoch: 19 loss: [125.04592355]
epoch: 20 loss: [94.75157635]
epoch: 21 loss: [148.14744626]
epoch: 22 loss: [114.75562065]
epoch: 23 loss: [173.24896897]
epoch: 24 loss: [136.75966494]
epoch: 25 loss: [200.35049168]
epoch: 26 loss: [160.76370924]
epoch: 27 loss: [229.45201439]
epoch: 28 loss: [186.76775354]
epoch: 29 loss: [260.55353709]
epoch: 30 loss: [214.77179783]
epoch: 31 loss: [293.6550598]
epoch: 32 loss: [244.77584213]
epoch: 33 los

## 学習率とは

上記のように、差分だけを直そうとすると、平均的な誤差がお大きくなってしまう。その理由は、確か、勾配の方向が正しいが、ステップが大きいすぎる。つまり、最適な数値から大幅に超えてしまい、段々離れてしまう。

「学習率」という係数で、ステップの大きさを小さくし、少しずつ最適な数値に近づくようにする。

In [13]:
# 学習は「fit」とよく言われる
def fit_single_step(model, activation, x_list, y_true):

    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]
    
    # 誤差の平均
    mse = 0
    
    # さて、１個ずつを処理しよう
    for i in range(data_size):
        
        # 推論
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        
        # 誤差を計算
        err = error(y_t, y_p)
        mse = err * err
        
        # 学習率
        learning_rate = 0.01

        # 荷重を更新
        w0   = model["weights"][0]
        w1   = model["weights"][1]
        bias = model["bias"]
        
        w0   = update_weight(w0, x[0], err * learning_rate)
        w1   = update_weight(w1, x[1], err * learning_rate)
        bias = update_weight(bias, 1 , err * learning_rate)
        
        model["weights"][0] = w0[0]
        model["weights"][1] = w1[0]
        model["bias"] = bias[0]
        
    return mse / data_size

In [14]:
fit(model, linear_activation, x_list, y_true, 1000)

epoch: 0 loss: [2764.92505837]
epoch: 1 loss: [2569.84868712]
epoch: 2 loss: [2392.57119241]
epoch: 3 loss: [2231.28874461]
epoch: 4 loss: [2084.39244798]
epoch: 5 loss: [1950.4463416]
epoch: 6 loss: [1828.16795372]
epoch: 7 loss: [1716.4111078]
epoch: 8 loss: [1614.15071508]
epoch: 9 loss: [1520.46931931]
epoch: 10 loss: [1434.54518796]
epoch: 11 loss: [1355.64176824]
epoch: 12 loss: [1283.09834797]
epoch: 13 loss: [1216.32178044]
epoch: 14 loss: [1154.77914895]
epoch: 15 loss: [1097.99126161]
epoch: 16 loss: [1045.52687987]
epoch: 17 loss: [996.99759551]
epoch: 18 loss: [952.05328126]
epoch: 19 loss: [910.37804859]
epoch: 20 loss: [871.6866544]
epoch: 21 loss: [835.72130498]
epoch: 22 loss: [802.24881175]
epoch: 23 loss: [771.05805861]
epoch: 24 loss: [741.95774543]
epoch: 25 loss: [714.77437627]
epoch: 26 loss: [689.35046478]
epoch: 27 loss: [665.54293215]
epoch: 28 loss: [643.22167611]
epoch: 29 loss: [622.26829177]
epoch: 30 loss: [602.57492741]
epoch: 31 loss: [584.04326027]
epoc

In [15]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> -0.38349529449329667 err: [0.38349529]
[0. 1.] [0.] -> 0.2273757695265377 err: [-0.22737577]
[1. 0.] [0.] -> 0.23135084497067016 err: [-0.23135084]
[1. 1.] [1.] -> 0.8422219089905046 err: [0.15777809]


In [16]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [0.] -> 0 err: [0.]
[1. 0.] [0.] -> 0 err: [0.]
[1. 1.] [1.] -> 1 err: [0.]


## 練習:「OR」を学習させる

In [17]:
#期待してる出力（ラベル）
y_true = np.array([
    [0], 
    [1], 
    [1], 
    [1]
], dtype = float)
print(y_true.shape)

(4, 1)


In [18]:
# 推論とラベルの誤差は：
model = create_model()

In [19]:
fit(model, linear_activation, x_list, y_true, 1000)

epoch: 0 loss: [0.06263027]
epoch: 1 loss: [0.04721831]
epoch: 2 loss: [0.03480223]
epoch: 3 loss: [0.02492871]
epoch: 4 loss: [0.01720585]
epoch: 5 loss: [0.01129529]
epoch: 6 loss: [0.00690527]
epoch: 7 loss: [0.00378461]
epoch: 8 loss: [0.00171744]
epoch: 9 loss: [0.00051854]
epoch: 10 loss: [2.9366584e-05]
epoch: 11 loss: [0.00011446]
epoch: 12 loss: [0.00065843]
epoch: 13 loss: [0.00156321]
epoch: 14 loss: [0.0027458]
epoch: 15 loss: [0.00413612]
epoch: 16 loss: [0.00567533]
epoch: 17 loss: [0.00731422]
epoch: 18 loss: [0.00901189]
epoch: 19 loss: [0.01073454]
epoch: 20 loss: [0.01245453]
epoch: 21 loss: [0.0141494]
epoch: 22 loss: [0.01580118]
epoch: 23 loss: [0.01739569]
epoch: 24 loss: [0.01892196]
epoch: 25 loss: [0.02037175]
epoch: 26 loss: [0.0217391]
epoch: 27 loss: [0.02302]
epoch: 28 loss: [0.02421202]
epoch: 29 loss: [0.02531405]
epoch: 30 loss: [0.0263261]
epoch: 31 loss: [0.02724906]
epoch: 32 loss: [0.02808453]
epoch: 33 loss: [0.02883468]
epoch: 34 loss: [0.02950214]

In [22]:
# 生の出力（活性化なしで）
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> 0.25485312262597065 err: [-0.25485312]
[0. 1.] [1.] -> 0.7503139148327709 err: [0.24968609]
[1. 0.] [1.] -> 0.7479388771878912 err: [0.25206112]
[1. 1.] [1.] -> 1.2433996693946916 err: [-0.24339967]


In [23]:
# ステップ関数を適用した後：
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [1.] -> 1 err: [0.]
[1. 0.] [1.] -> 1 err: [0.]
[1. 1.] [1.] -> 1 err: [0.]
