In [1]:
import numpy as np
x = np.array([1,2,3])
print(x)

[1 2 3]


In [1]:
import numpy as np
x = np.array([[1, 2], [3, 4], [5, 6]])
y= np.array([[1, 2, 3], [4, 5, 6]])
n = np.dot(x, y)
print(n)


[[ 9 12 15]
 [19 26 33]
 [29 40 51]]


In [5]:
import numpy as np

x1 = np.random.rand(10, 4)
w1 = np.random.rand(4,5)
b1 = np.random.rand(5)
w2 = np.random.rand(5, 3)
b2 = np.random.rand(3)

#sigmoid
def sigmoid(x):
    return 1/(1+np.exp(-x))

h = sigmoid(np.dot(x1, w1) + b1)
y = np.dot(h, w2) + b2
print(y)

[[1.94348152 2.20605241 2.98592997]
 [2.14791343 2.4033595  3.24603116]
 [2.30944095 2.52772163 3.42666742]
 [2.24362042 2.44052543 3.34550156]
 [2.15844011 2.39858351 3.26005936]
 [2.29350296 2.50453521 3.41416543]
 [2.43569131 2.62452249 3.57626116]
 [2.00616495 2.23766623 3.0592839 ]
 [2.16642963 2.40590891 3.25998457]
 [2.42973387 2.57786708 3.55583845]]


In [None]:
import numpy as np

# ======================
# 0. 学習データを準備
# ======================
# 入力x: (10,4)  ->  10サンプル, 特徴量4次元
x = np.random.rand(10, 4)

# 正解ラベル: 3クラス分類(0~2)を適当に10件用意
# 例: [0, 2, 1, 0, 1, ...]
t = np.random.randint(0, 3, size=(10,))

# one-hotベクトル化: (10,3) へ
# たとえばクラス0なら [1,0,0] 、クラス2なら [0,0,1] 等
t_one_hot = np.eye(3)[t]  # shape: (10,3)


# ======================
# 1. パラメータの初期化
# ======================
w1 = np.random.randn(4, 5)  # (4->5)
b1 = np.random.randn(5)     # (5,)
w2 = np.random.randn(5, 3)  # (5->3)
b2 = np.random.randn(3)     # (3,)

# 学習率 (learning rate)
lr = 0.1


# ======================
# 2. 順伝播 (forward)
# ======================
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def softmax(x):
    # x: (N,3)
    # 数値安定化のために最大値を引いてからexp
    x_shift = x - np.max(x, axis=1, keepdims=True)
    return np.exp(x_shift) / np.sum(np.exp(x_shift), axis=1, keepdims=True)

# --- 第1層 (隠れ層) ---
h = sigmoid(np.dot(x, w1) + b1)  # shape: (10,5)

# --- 第2層 (出力層) ---
logits = np.dot(h, w2) + b2      # shape: (10,3)
y = softmax(logits)              # shape: (10,3)


# ======================
# 3. 損失関数 (cross entropy)
# ======================
def cross_entropy(pred, true_onehot):
    # pred: (N,3), true_onehot: (N,3)
    # 各サンプルごとの -∑ t_i log(y_i) を平均
    eps = 1e-7
    return -np.mean(np.sum(true_onehot * np.log(pred + eps), axis=1))

loss = cross_entropy(y, t_one_hot)
print("Before update, Loss:", loss)


# ======================
# 4. 逆伝播 (backpropagation)
# ======================
# ここでは手計算で導出された勾配式を直接実装します。

# 4.1 出力層の勾配
#   「ソフトマックス＋クロスエントロピー」の場合、
#   dL/d(logits) = (y - t_one_hot) / N (Nはサンプル数)
N = x.shape[0]
delta_out = (y - t_one_hot) / N  # shape: (10,3)

# 4.2 第2層パラメータ w2, b2 の勾配
grad_w2 = np.dot(h.T, delta_out)      # shape: (5,3)
grad_b2 = np.sum(delta_out, axis=0)   # shape: (3,)

# 4.3 第1層の勾配
#   dL/dh = delta_out · w2^T
delta_h = np.dot(delta_out, w2.T)  # shape: (10,5)

#   シグモイドの勾配： d(sigmoid)/dx = sigmoid(x)*(1-sigmoid(x))
#   よって dL/dz1 = dL/dh * sigmoid'(z1)
#   z1 = x·w1 + b1 の出力に対応する入力 z1
#   ただし今回は z1 を明示的に保持してないので h から計算
delta_z1 = delta_h * (h * (1 - h))  # shape: (10,5)

# 4.4 第1層パラメータ w1, b1 の勾配
grad_w1 = np.dot(x.T, delta_z1)     # shape: (4,5)
grad_b1 = np.sum(delta_z1, axis=0)  # shape: (5,)

# ======================
# 5. パラメータ更新 (勾配降下法)
# ======================
w1 -= lr * grad_w1
b1 -= lr * grad_b1
w2 -= lr * grad_w2
b2 -= lr * grad_b2


# ======================
# 6. 更新後の損失を確認 (もう一度forward)
# ======================
h = sigmoid(np.dot(x, w1) + b1)
logits = np.dot(h, w2) + b2
y = softmax(logits)

loss = cross_entropy(y, t_one_hot)
print("After update, Loss :", loss)
