# 学習

ステップ１で作った関数をもう一度実装

In [1]:
# numpyを導入
import numpy as np

# モデル作成
def create_model():    
    model = {
        # 荷重を -5 ~ 5 の乱数で初期化
        "weights" : np.random.uniform(-5, 5, 2),    
        # バイアスも！
        "bias"    : np.random.uniform(-5, 5, 1)}
    return model

# 推論
def predict(model, activation, x):
    
    # 足し算を計算し…
    y = model["weights"][0] * x[0] + model["weights"][1] * x[1] + model["bias"]
    
    # 活性化で処理し、その結果を返す
    y = activation(y)
    return y

# 線形活性化関数
def linear_activation(x):
    return x

# ステップ活性化関数
def step_activation(x):
    if x >= 0.5:
        return 1
    else:
        return 0    

## 誤差を計算

正しい答え（ラベル）と推論した答えの差分を用い、学習させる。

まず、「AND」のラベルを準備しよう：

In [2]:
# 入力
x_list = np.array([
    [0, 0], 
    [0, 1], 
    [1, 0], 
    [1, 1]
], dtype = float)
print(x_list.shape)

#期待してる出力（ラベル）
y_true = np.array([
    [0], 
    [0], 
    [0], 
    [1]
], dtype = float)
print(y_true.shape)

(4, 2)
(4, 1)


誤差（損失）関数を実装しよう。

課題により、適切な関数を使うべきが、今回の入門課題はただの「差分」にしよう。

In [3]:
# 損失関数
def error(y_true, y_pred):
    return y_true - y_pred

In [4]:
# 推論とラベルの誤差は：
model = create_model()

In [5]:
# 結果を表示する
def print_results(model, activation, x_list, y_true):
    
    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]
    
    for i in range(data_size):
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        err = error(y_t, y_p)
        print(x, y_t, "->", y_p, "err:", err)    

In [6]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> [4.09281912] err: [-4.09281912]
[0. 1.] [0.] -> [2.88840141] err: [-2.88840141]
[1. 0.] [0.] -> [1.07864801] err: [-1.07864801]
[1. 1.] [1.] -> [-0.1257697] err: [1.1257697]


In [7]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 1 err: [-1.]
[0. 1.] [0.] -> 1 err: [-1.]
[1. 0.] [0.] -> 1 err: [-1.]
[1. 1.] [1.] -> 0 err: [1.]


## 学習
誤差にて、荷重を調整しよう。ただ、「入力」は「０」であると、出力に影響がないため、入力は「１」のときだけに荷重を調整する、つまり：

$$ w_i' = w_i + x_i \cdot error(y_{true}, y_{pred}) $$ 


In [8]:
# 荷重を更新する関数
def update_weight(w, x, err):
    return w + x * err

# 学習は「fit」とよく言われる
def fit_single_step(model, activation, x_list, y_true):

    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]
    
    # 誤差の平均
    mse = 0
    
    # さて、１個ずつを処理しよう
    for i in range(data_size):
        
        # 推論
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        
        # 誤差を計算
        err = error(y_t, y_p)
        mse = err * err
            
        # 荷重を更新
        w0   = model["weights"][0]
        w1   = model["weights"][1]
        bias = model["bias"]
        
        w0   = update_weight(w0, x[0], err)
        w1   = update_weight(w1, x[1], err)
        bias = update_weight(bias, 1 , err)
        
        model["weights"][0] = w0
        model["weights"][1] = w1
        model["bias"] = bias
    
    #誤差（損失）としては、平均値を返す
    return mse / data_size        

In [9]:
loss = fit_single_step(model, linear_activation, x_list, y_true)
print("loss:", loss)

loss: [0.16392515]


In [10]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> [2.20441771] err: [-2.20441771]
[0. 1.] [0.] -> [1.3946643] err: [-1.3946643]
[1. 0.] [0.] -> [0.19024659] err: [-0.19024659]
[1. 1.] [1.] -> [-0.61950681] err: [1.61950681]


In [11]:
def fit(model, activation, x_list, y_true, epochs):
    
    for i in range(epochs):
        loss = fit_single_step(model, activation, x_list, y_true)
        print("epoch:", i, "loss:", loss)

In [12]:
fit(model, linear_activation, x_list, y_true, 100)

epoch: 0 loss: [0.01044665]
epoch: 1 loss: [0.00904844]
epoch: 2 loss: [0.1582378]
epoch: 3 loss: [0.35417174]
epoch: 4 loss: [0.80602894]
epoch: 5 loss: [1.19929503]
epoch: 6 loss: [1.95382009]
epoch: 7 loss: [2.54441833]
epoch: 8 loss: [3.60161123]
epoch: 9 loss: [4.38954163]
epoch: 10 loss: [5.74940238]
epoch: 11 loss: [6.73466492]
epoch: 12 loss: [8.39719353]
epoch: 13 loss: [9.57978822]
epoch: 14 loss: [11.54498467]
epoch: 15 loss: [12.92491152]
epoch: 16 loss: [15.19277582]
epoch: 17 loss: [16.77003481]
epoch: 18 loss: [19.34056696]
epoch: 19 loss: [21.11515811]
epoch: 20 loss: [23.98835811]
epoch: 21 loss: [25.96028141]
epoch: 22 loss: [29.13614926]
epoch: 23 loss: [31.3054047]
epoch: 24 loss: [34.7839404]
epoch: 25 loss: [37.150528]
epoch: 26 loss: [40.93173155]
epoch: 27 loss: [43.4956513]
epoch: 28 loss: [47.5795227]
epoch: 29 loss: [50.34077459]
epoch: 30 loss: [54.72731384]
epoch: 31 loss: [57.68589789]
epoch: 32 loss: [62.37510499]
epoch: 33 loss: [65.53102118]
epoch: 34 l

## 学習率とは

上記のように、差分だけを直そうとすると、平均的な誤差がお大きくなってしまう。その理由は、確か、勾配の方向が正しいが、ステップが大きいすぎる。つまり、最適な数値から大幅に超えてしまい、段々離れてしまう。

「学習率」という係数で、ステップの大きさを小さくし、少しずつ最適な数値に近づくようにする。

In [13]:
# 学習は「fit」とよく言われる
def fit_single_step(model, activation, x_list, y_true):

    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]
    
    # 誤差の平均
    mse = 0
    
    # さて、１個ずつを処理しよう
    for i in range(data_size):
        
        # 推論
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        
        # 誤差を計算
        err = error(y_t, y_p)
        mse = err * err
        
        # 学習率
        learning_rate = 0.01

        # 荷重を更新
        w0   = model["weights"][0]
        w1   = model["weights"][1]
        bias = model["bias"]
        
        w0   = update_weight(w0, x[0], err * learning_rate)
        w1   = update_weight(w1, x[1], err * learning_rate)
        bias = update_weight(bias, 1 , err * learning_rate)
        
        model["weights"][0] = w0
        model["weights"][1] = w1
        model["bias"] = bias
        
    return mse / data_size

In [14]:
fit(model, linear_activation, x_list, y_true, 1000)

epoch: 0 loss: [2391.60156233]
epoch: 1 loss: [2227.90434023]
epoch: 2 loss: [2078.91963666]
epoch: 3 loss: [1943.17141778]
epoch: 4 loss: [1819.34200535]
epoch: 5 loss: [1706.25429315]
epoch: 6 loss: [1602.85602104]
epoch: 7 loss: [1508.20586425]
epoch: 8 loss: [1421.46112409]
epoch: 9 loss: [1341.86683215]
epoch: 10 loss: [1268.74610183]
epoch: 11 loss: [1201.4915814]
epoch: 12 loss: [1139.55787968]
epoch: 13 loss: [1082.45485092]
epoch: 14 loss: [1029.74163872]
epoch: 15 loss: [981.02139101]
epoch: 16 loss: [935.93656807]
epoch: 17 loss: [894.16477513]
epoch: 18 loss: [855.41505902]
epoch: 19 loss: [819.42461539]
epoch: 20 loss: [785.9558595]
epoch: 21 loss: [754.79381886]
epoch: 22 loss: [725.74381111]
epoch: 23 loss: [698.62937459]
epoch: 24 loss: [673.29042301]
epoch: 25 loss: [649.58159896]
epoch: 26 loss: [627.37080371]
epoch: 27 loss: [606.53788375]
epoch: 28 loss: [586.97345631]
epoch: 29 loss: [568.57785865]
epoch: 30 loss: [551.26020721]
epoch: 31 loss: [534.93755461]
epoch

epoch: 284 loss: [15.13062477]
epoch: 285 loss: [14.93423818]
epoch: 286 loss: [14.74036039]
epoch: 287 loss: [14.54895962]
epoch: 288 loss: [14.36000447]
epoch: 289 loss: [14.17346397]
epoch: 290 loss: [13.98930752]
epoch: 291 loss: [13.8075049]
epoch: 292 loss: [13.62802629]
epoch: 293 loss: [13.45084223]
epoch: 294 loss: [13.27592365]
epoch: 295 loss: [13.10324183]
epoch: 296 loss: [12.93276842]
epoch: 297 loss: [12.76447542]
epoch: 298 loss: [12.5983352]
epoch: 299 loss: [12.43432046]
epoch: 300 loss: [12.27240425]
epoch: 301 loss: [12.11255997]
epoch: 302 loss: [11.95476134]
epoch: 303 loss: [11.79898243]
epoch: 304 loss: [11.64519762]
epoch: 305 loss: [11.49338162]
epoch: 306 loss: [11.34350947]
epoch: 307 loss: [11.19555651]
epoch: 308 loss: [11.04949839]
epoch: 309 loss: [10.90531108]
epoch: 310 loss: [10.76297084]
epoch: 311 loss: [10.62245425]
epoch: 312 loss: [10.48373816]
epoch: 313 loss: [10.34679973]
epoch: 314 loss: [10.2116164]
epoch: 315 loss: [10.0781659]
epoch: 316 l

epoch: 640 loss: [0.08756195]
epoch: 641 loss: [0.08599284]
epoch: 642 loss: [0.08444768]
epoch: 643 loss: [0.08292614]
epoch: 644 loss: [0.0814279]
epoch: 645 loss: [0.07995264]
epoch: 646 loss: [0.07850005]
epoch: 647 loss: [0.07706981]
epoch: 648 loss: [0.07566162]
epoch: 649 loss: [0.07427517]
epoch: 650 loss: [0.07291017]
epoch: 651 loss: [0.07156632]
epoch: 652 loss: [0.07024333]
epoch: 653 loss: [0.06894091]
epoch: 654 loss: [0.06765877]
epoch: 655 loss: [0.06639665]
epoch: 656 loss: [0.06515426]
epoch: 657 loss: [0.06393133]
epoch: 658 loss: [0.06272759]
epoch: 659 loss: [0.06154278]
epoch: 660 loss: [0.06037663]
epoch: 661 loss: [0.05922889]
epoch: 662 loss: [0.05809929]
epoch: 663 loss: [0.0569876]
epoch: 664 loss: [0.05589355]
epoch: 665 loss: [0.05481691]
epoch: 666 loss: [0.05375744]
epoch: 667 loss: [0.05271489]
epoch: 668 loss: [0.05168902]
epoch: 669 loss: [0.05067962]
epoch: 670 loss: [0.04968644]
epoch: 671 loss: [0.04870926]
epoch: 672 loss: [0.04774786]
epoch: 673 l

epoch: 924 loss: [0.00316951]
epoch: 925 loss: [0.00321941]
epoch: 926 loss: [0.00326937]
epoch: 927 loss: [0.00331941]
epoch: 928 loss: [0.0033695]
epoch: 929 loss: [0.00341965]
epoch: 930 loss: [0.00346986]
epoch: 931 loss: [0.0035201]
epoch: 932 loss: [0.00357039]
epoch: 933 loss: [0.00362071]
epoch: 934 loss: [0.00367107]
epoch: 935 loss: [0.00372145]
epoch: 936 loss: [0.00377185]
epoch: 937 loss: [0.00382228]
epoch: 938 loss: [0.00387271]
epoch: 939 loss: [0.00392316]
epoch: 940 loss: [0.00397361]
epoch: 941 loss: [0.00402406]
epoch: 942 loss: [0.0040745]
epoch: 943 loss: [0.00412495]
epoch: 944 loss: [0.00417538]
epoch: 945 loss: [0.00422579]
epoch: 946 loss: [0.00427619]
epoch: 947 loss: [0.00432656]
epoch: 948 loss: [0.00437691]
epoch: 949 loss: [0.00442723]
epoch: 950 loss: [0.00447752]
epoch: 951 loss: [0.00452777]
epoch: 952 loss: [0.00457798]
epoch: 953 loss: [0.00462815]
epoch: 954 loss: [0.00467828]
epoch: 955 loss: [0.00472835]
epoch: 956 loss: [0.00477837]
epoch: 957 lo

In [15]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> [-0.37938392] err: [0.37938392]
[0. 1.] [0.] -> [0.22804577] err: [-0.22804577]
[1. 0.] [0.] -> [0.23205556] err: [-0.23205556]
[1. 1.] [1.] -> [0.83948525] err: [0.16051475]


In [16]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [0.] -> 0 err: [0.]
[1. 0.] [0.] -> 0 err: [0.]
[1. 1.] [1.] -> 1 err: [0.]


## 練習:「OR」を学習させる

In [17]:
#期待してる出力（ラベル）
y_true = np.array([
    [0], 
    [1], 
    [1], 
    [1]
], dtype = float)
print(y_true.shape)

(4, 1)


In [18]:
# 推論とラベルの誤差は：
model = create_model()

In [19]:
fit(model, linear_activation, x_list, y_true, 1000)

epoch: 0 loss: [11.23987784]
epoch: 1 loss: [9.86187663]
epoch: 2 loss: [8.65155654]
epoch: 3 loss: [7.58858332]
epoch: 4 loss: [6.65508502]
epoch: 5 loss: [5.83535398]
epoch: 6 loss: [5.11558496]
epoch: 7 loss: [4.48364482]
epoch: 8 loss: [3.9288702]
epoch: 9 loss: [3.44188958]
epoch: 10 loss: [3.01446697]
epoch: 11 loss: [2.63936438]
epoch: 12 loss: [2.31022107]
epoch: 13 loss: [2.02144734]
epoch: 14 loss: [1.76813116]
epoch: 15 loss: [1.54595612]
epoch: 16 loss: [1.35112935]
epoch: 17 loss: [1.18031808]
epoch: 18 loss: [1.03059393]
epoch: 19 loss: [0.89938395]
epoch: 20 loss: [0.78442755]
epoch: 21 loss: [0.68373866]
epoch: 22 loss: [0.59557249]
epoch: 23 loss: [0.51839626]
epoch: 24 loss: [0.45086354]
epoch: 25 loss: [0.39179164]
epoch: 26 loss: [0.34014177]
epoch: 27 loss: [0.29500159]
epoch: 28 loss: [0.25556989]
epoch: 29 loss: [0.22114307]
epoch: 30 loss: [0.19110333]
epoch: 31 loss: [0.16490827]
epoch: 32 loss: [0.14208169]
epoch: 33 loss: [0.12220559]
epoch: 34 loss: [0.10491

epoch: 322 loss: [0.01327155]
epoch: 323 loss: [0.01328788]
epoch: 324 loss: [0.0133041]
epoch: 325 loss: [0.01332024]
epoch: 326 loss: [0.01333627]
epoch: 327 loss: [0.01335222]
epoch: 328 loss: [0.01336807]
epoch: 329 loss: [0.01338382]
epoch: 330 loss: [0.01339949]
epoch: 331 loss: [0.01341506]
epoch: 332 loss: [0.01343054]
epoch: 333 loss: [0.01344593]
epoch: 334 loss: [0.01346123]
epoch: 335 loss: [0.01347644]
epoch: 336 loss: [0.01349156]
epoch: 337 loss: [0.01350659]
epoch: 338 loss: [0.01352154]
epoch: 339 loss: [0.01353639]
epoch: 340 loss: [0.01355116]
epoch: 341 loss: [0.01356584]
epoch: 342 loss: [0.01358043]
epoch: 343 loss: [0.01359493]
epoch: 344 loss: [0.01360935]
epoch: 345 loss: [0.01362369]
epoch: 346 loss: [0.01363794]
epoch: 347 loss: [0.0136521]
epoch: 348 loss: [0.01366619]
epoch: 349 loss: [0.01368018]
epoch: 350 loss: [0.0136941]
epoch: 351 loss: [0.01370793]
epoch: 352 loss: [0.01372168]
epoch: 353 loss: [0.01373535]
epoch: 354 loss: [0.01374894]
epoch: 355 lo

epoch: 630 loss: [0.0155539]
epoch: 631 loss: [0.01555635]
epoch: 632 loss: [0.01555878]
epoch: 633 loss: [0.0155612]
epoch: 634 loss: [0.0155636]
epoch: 635 loss: [0.01556598]
epoch: 636 loss: [0.01556835]
epoch: 637 loss: [0.01557071]
epoch: 638 loss: [0.01557305]
epoch: 639 loss: [0.01557538]
epoch: 640 loss: [0.01557769]
epoch: 641 loss: [0.01557999]
epoch: 642 loss: [0.01558227]
epoch: 643 loss: [0.01558454]
epoch: 644 loss: [0.01558679]
epoch: 645 loss: [0.01558903]
epoch: 646 loss: [0.01559126]
epoch: 647 loss: [0.01559347]
epoch: 648 loss: [0.01559567]
epoch: 649 loss: [0.01559785]
epoch: 650 loss: [0.01560003]
epoch: 651 loss: [0.01560218]
epoch: 652 loss: [0.01560433]
epoch: 653 loss: [0.01560646]
epoch: 654 loss: [0.01560857]
epoch: 655 loss: [0.01561068]
epoch: 656 loss: [0.01561277]
epoch: 657 loss: [0.01561484]
epoch: 658 loss: [0.01561691]
epoch: 659 loss: [0.01561896]
epoch: 660 loss: [0.015621]
epoch: 661 loss: [0.01562302]
epoch: 662 loss: [0.01562503]
epoch: 663 loss

epoch: 936 loss: [0.01588628]
epoch: 937 loss: [0.01588663]
epoch: 938 loss: [0.01588698]
epoch: 939 loss: [0.01588733]
epoch: 940 loss: [0.01588768]
epoch: 941 loss: [0.01588802]
epoch: 942 loss: [0.01588836]
epoch: 943 loss: [0.01588871]
epoch: 944 loss: [0.01588904]
epoch: 945 loss: [0.01588938]
epoch: 946 loss: [0.01588971]
epoch: 947 loss: [0.01589004]
epoch: 948 loss: [0.01589037]
epoch: 949 loss: [0.0158907]
epoch: 950 loss: [0.01589103]
epoch: 951 loss: [0.01589135]
epoch: 952 loss: [0.01589167]
epoch: 953 loss: [0.01589199]
epoch: 954 loss: [0.01589231]
epoch: 955 loss: [0.01589262]
epoch: 956 loss: [0.01589294]
epoch: 957 loss: [0.01589325]
epoch: 958 loss: [0.01589356]
epoch: 959 loss: [0.01589387]
epoch: 960 loss: [0.01589417]
epoch: 961 loss: [0.01589447]
epoch: 962 loss: [0.01589478]
epoch: 963 loss: [0.01589508]
epoch: 964 loss: [0.01589537]
epoch: 965 loss: [0.01589567]
epoch: 966 loss: [0.01589596]
epoch: 967 loss: [0.01589626]
epoch: 968 loss: [0.01589655]
epoch: 969 

In [20]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> [0.25295813] err: [-0.25295813]
[0. 1.] [1.] -> [0.74995601] err: [0.25004399]
[1. 0.] [1.] -> [0.74766292] err: [0.25233708]
[1. 1.] [1.] -> [1.2446608] err: [-0.2446608]


In [21]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [1.] -> 1 err: [0.]
[1. 0.] [1.] -> 1 err: [0.]
[1. 1.] [1.] -> 1 err: [0.]
