# 学習

ステップ１で作った関数をもう一度実装

In [1]:
# numpyを導入
import numpy as np

# モデル作成
def create_model():    
    model = {
        # 荷重を -5 ~ 5 の乱数で初期化
        "weights" : np.random.uniform(-5, 5, 2),    
        # バイアスも！
        "bias"    : np.random.uniform(-5, 5, 1)}
    return model

# 推論
def predict(model, activation, x):
    
    # 足し算を計算し…
    y = model["weights"][0] * x[0] + model["weights"][1] * x[1] + model["bias"]
    
    # 活性化で処理し、その結果を返す
    y = activation(y)
    return y

# 線形活性化関数
def linear_activation(x):
    return x

# ステップ活性化関数
def step_activation(x):
    if x >= 0.5:
        return 1
    else:
        return 0    

## 誤差を計算

正しい答え（ラベル）と推論した答えの差分を用い、学習させる。

まず、「AND」のラベルを準備しよう：

In [2]:
# 入力
x_list = np.array([
    [0, 0], 
    [0, 1], 
    [1, 0], 
    [1, 1]
], dtype = float)
print(x_list.shape)

#期待してる出力（ラベル）
y_true = np.array([
    [0], 
    [0], 
    [0], 
    [1]
], dtype = float)
print(y_true.shape)

(4, 2)
(4, 1)


誤差（損失）関数を実装しよう。

課題により、適切な関数を使うべきが、今回の入門課題はただの「差分」にしよう。

In [3]:
# 損失関数
def error(y_true, y_pred):
    return y_true - y_pred

In [4]:
# 推論とラベルの誤差は：
model = create_model()

In [5]:
# 結果を表示する
def print_results(model, activation, x_list, y_true):
    
    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]
    
    for i in range(data_size):
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        err = error(y_t, y_p)
        print(x, y_t, "->", y_p, "err:", err)    

In [6]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> [0.49003829] err: [-0.49003829]
[0. 1.] [0.] -> [-1.20464417] err: [1.20464417]
[1. 0.] [0.] -> [-0.29942395] err: [0.29942395]
[1. 1.] [1.] -> [-1.99410641] err: [2.99410641]


In [7]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [0.] -> 0 err: [0.]
[1. 0.] [0.] -> 0 err: [0.]
[1. 1.] [1.] -> 0 err: [1.]


## 学習
誤差にて、荷重を調整しよう。ただ、「入力」は「０」であると、出力に影響がないため、入力は「１」のときだけに荷重を調整する、つまり：

$$ w_i' = w_i + x_i \cdot error(y_{true}, y_{pred}) $$ 


In [8]:
# 荷重を更新する関数
def update_weight(w, x, err):
    return w + x * err

# 学習は「fit」とよく言われる
def fit_single_step(model, activation, x_list, y_true):

    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]
    
    # 誤差の平均
    mse = 0
    
    # さて、１個ずつを処理しよう
    for i in range(data_size):
        
        # 推論
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        
        # 誤差を計算
        err  = error(y_t, y_p)
        mse += err * err
            
        # 荷重を更新
        w0   = model["weights"][0]
        w1   = model["weights"][1]
        bias = model["bias"]
        
        w0   = update_weight(w0, x[0], err)
        w1   = update_weight(w1, x[1], err)
        bias = update_weight(bias, 1 , err)
        
        model["weights"][0] = w0[0]
        model["weights"][1] = w1[0]
        model["bias"] = bias[0]
    
    #誤差（損失）としては、平均値を返す
    return mse / data_size        

In [9]:
loss = fit_single_step(model, linear_activation, x_list, y_true)
print("loss:", loss)

loss: [1.89034347]


In [10]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> 2.6946824578983133 err: [-2.69468246]
[0. 1.] [0.] -> 4.599902677852856 err: [-4.59990268]
[1. 0.] [0.] -> 2.9052202199545425 err: [-2.90522022]
[1. 1.] [1.] -> 4.810440439909085 err: [-3.81044044]


In [11]:
def fit(model, activation, x_list, y_true, epochs):
    
    for i in range(epochs):
        loss = fit_single_step(model, activation, x_list, y_true)
        print("epoch:", i, "loss:", loss)

In [12]:
fit(model, linear_activation, x_list, y_true, 100)

epoch: 0 loss: [3.5614275]
epoch: 1 loss: [3.34304399]
epoch: 2 loss: [2.97201392]
epoch: 3 loss: [6.8535331]
epoch: 4 loss: [4.38260034]
epoch: 5 loss: [12.3640222]
epoch: 6 loss: [7.79318677]
epoch: 7 loss: [19.8745113]
epoch: 8 loss: [13.20377319]
epoch: 9 loss: [29.3850004]
epoch: 10 loss: [20.61435961]
epoch: 11 loss: [40.8954895]
epoch: 12 loss: [30.02494604]
epoch: 13 loss: [54.4059786]
epoch: 14 loss: [41.43553246]
epoch: 15 loss: [69.9164677]
epoch: 16 loss: [54.84611888]
epoch: 17 loss: [87.4269568]
epoch: 18 loss: [70.2567053]
epoch: 19 loss: [106.9374459]
epoch: 20 loss: [87.66729173]
epoch: 21 loss: [128.447935]
epoch: 22 loss: [107.07787815]
epoch: 23 loss: [151.95842411]
epoch: 24 loss: [128.48846457]
epoch: 25 loss: [177.46891321]
epoch: 26 loss: [151.899051]
epoch: 27 loss: [204.97940231]
epoch: 28 loss: [177.30963742]
epoch: 29 loss: [234.48989141]
epoch: 30 loss: [204.72022384]
epoch: 31 loss: [266.00038051]
epoch: 32 loss: [234.13081027]
epoch: 33 loss: [299.5108696

## 学習率とは

上記のように、差分だけを直そうとすると、平均的な誤差がお大きくなってしまう。その理由は、確か、勾配の方向が正しいが、ステップが大きいすぎる。つまり、最適な数値から大幅に超えてしまい、段々離れてしまう。

「学習率」という係数で、ステップの大きさを小さくし、少しずつ最適な数値に近づくようにする。

In [13]:
# 学習は「fit」とよく言われる
def fit_single_step(model, activation, x_list, y_true):

    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]
    
    # 誤差の平均
    mse = 0
    
    # さて、１個ずつを処理しよう
    for i in range(data_size):
        
        # 推論
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        
        # 誤差を計算
        err = error(y_t, y_p)
        mse = err * err
        
        # 学習率
        learning_rate = 0.01

        # 荷重を更新
        w0   = model["weights"][0]
        w1   = model["weights"][1]
        bias = model["bias"]
        
        w0   = update_weight(w0, x[0], err * learning_rate)
        w1   = update_weight(w1, x[1], err * learning_rate)
        bias = update_weight(bias, 1 , err * learning_rate)
        
        model["weights"][0] = w0[0]
        model["weights"][1] = w1[0]
        model["bias"] = bias[0]
        
    return mse / data_size

In [14]:
fit(model, linear_activation, x_list, y_true, 1000)

epoch: 0 loss: [2658.20074063]
epoch: 1 loss: [2471.88919735]
epoch: 2 loss: [2302.52106815]
epoch: 3 loss: [2148.38300169]
epoch: 4 loss: [2007.94651139]
epoch: 5 loss: [1879.84713479]
epoch: 6 loss: [1762.86601016]
epoch: 7 loss: [1655.91358519]
epoch: 8 loss: [1558.0152062]
epoch: 9 loss: [1468.29836673]
epoch: 10 loss: [1385.98142038]
epoch: 11 loss: [1310.3635861]
epoch: 12 loss: [1240.81609463]
epoch: 13 loss: [1176.77434261]
epoch: 14 loss: [1117.73093676]
epoch: 15 loss: [1063.22952464]
epoch: 16 loss: [1012.85932031]
epoch: 17 loss: [966.25024461]
epoch: 18 loss: [923.06860877]
epoch: 19 loss: [883.01327875]
epoch: 20 loss: [845.81226495]
epoch: 21 loss: [811.2196885]
epoch: 22 loss: [779.01308094]
epoch: 23 loss: [748.99097943]
epoch: 24 loss: [720.97078362]
epoch: 25 loss: [694.78684479]
epoch: 26 loss: [670.28876076]
epoch: 27 loss: [647.33985359]
epoch: 28 loss: [625.81580947]
epoch: 29 loss: [605.6034627]
epoch: 30 loss: [586.59970771]
epoch: 31 loss: [568.71052497]
epoch

In [15]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> -0.38211971865603694 err: [0.38211972]
[0. 1.] [0.] -> 0.22760394660223576 err: [-0.22760395]
[1. 0.] [0.] -> 0.23158263753971936 err: [-0.23158264]
[1. 1.] [1.] -> 0.8413063027979921 err: [0.1586937]


In [16]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [0.] -> 0 err: [0.]
[1. 0.] [0.] -> 0 err: [0.]
[1. 1.] [1.] -> 1 err: [0.]


## 練習:「OR」を学習させる

In [17]:
#期待してる出力（ラベル）
y_true = np.array([
    [0], 
    [1], 
    [1], 
    [1]
], dtype = float)
print(y_true.shape)

(4, 1)


In [18]:
# 推論とラベルの誤差は：
model = create_model()

In [19]:
fit(model, linear_activation, x_list, y_true, 1000)

epoch: 0 loss: [1.50668551]
epoch: 1 loss: [1.28741126]
epoch: 2 loss: [1.09780564]
epoch: 3 loss: [0.93404651]
epoch: 4 loss: [0.79279078]
epoch: 5 loss: [0.67111526]
epoch: 6 loss: [0.56646477]
epoch: 7 loss: [0.47660662]
epoch: 8 loss: [0.39959064]
epoch: 9 loss: [0.3337141]
epoch: 10 loss: [0.277491]
epoch: 11 loss: [0.22962505]
epoch: 12 loss: [0.18898604]
epoch: 13 loss: [0.15458908]
epoch: 14 loss: [0.12557642]
epoch: 15 loss: [0.10120147]
epoch: 16 loss: [0.08081488]
epoch: 17 loss: [0.06385225]
epoch: 18 loss: [0.04982339]
epoch: 19 loss: [0.03830295]
epoch: 20 loss: [0.02892217]
epoch: 21 loss: [0.02136165]
epoch: 22 loss: [0.01534508]
epoch: 23 loss: [0.01063365]
epoch: 24 loss: [0.00702131]
epoch: 25 loss: [0.00433044]
epoch: 26 loss: [0.00240821]
epoch: 27 loss: [0.00112336]
epoch: 28 loss: [0.00036331]
epoch: 29 loss: [3.17719886e-05]
epoch: 30 loss: [4.65268338e-05]
epoch: 31 loss: [0.00033759]
epoch: 32 loss: [0.00084554]
epoch: 33 loss: [0.00152012]
epoch: 34 loss: [0.

In [20]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> 0.254729173621862 err: [-0.25472917]
[0. 1.] [1.] -> 0.7502406377334313 err: [0.24975936]
[1. 0.] [1.] -> 0.7479704476485354 err: [0.25202955]
[1. 1.] [1.] -> 1.2434819117601048 err: [-0.24348191]


In [21]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [1.] -> 1 err: [0.]
[1. 0.] [1.] -> 1 err: [0.]
[1. 1.] [1.] -> 1 err: [0.]
