In [97]:
import torch
from sklearn import datasets
import matplotlib.pyplot as plt
from torch.nn import functional as F
from sklearn.model_selection import train_test_split
import numpy as np
import math
from torch import nn

### Backpropagation

In [126]:
# データの読み込み
dataset = datasets.load_digits()
images = dataset['images']
target = dataset['target']

X_train, X_val, y_train, y_val = train_test_split(images, target, test_size=0.2)
print(X_train.shape, X_val.shape)
print(y_train.shape, y_val.shape)

# 前処理
X_train = torch.tensor(X_train, dtype=torch.float32).reshape(-1, 64)
y_train = F.one_hot(torch.tensor(y_train), num_classes=10) # one-hot
X_val= torch.tensor(X_val, dtype=torch.float32).reshape(-1, 64)
y_val = F.one_hot(torch.tensor(y_val), num_classes=10) # one-hot

print(X_train.shape, X_val.shape)
print(y_train.shape, y_val.shape)

# 標準化
X_train_mean = X_train.mean()
X_train_std = X_train.std()
X_train = (X_train - X_train_mean) / X_train_std
X_val = (X_val - X_train_mean) / X_train_std

(1437, 8, 8) (360, 8, 8)
(1437,) (360,)
torch.Size([1437, 64]) torch.Size([360, 64])
torch.Size([1437, 10]) torch.Size([360, 10])


In [159]:
def linear_backward(A, W, b, Z):
    W.grad_ = Z.grad_.T @ A 
    b.grad_ = torch.sum(Z.grad_ , dim=0)
    A.grad_ = Z.grad_ @ W

def relu_backward(Z, A):
    Z.grad_ = A.grad_ * (Z > 0).float()

def softmax_cross_entropy(X, y_true):
    e_x = torch.exp(X - torch.max(X, dim=-1, keepdim=True)[0])
    softmax_out = e_x / (torch.sum(e_x, dim=-1, keepdim=True) + 1e-10)
    loss = -torch.sum(y_true * torch.log(softmax_out  + 1e-10)) / y_true.shape[0]
    return loss, softmax_out

def linear(X, W, b):
    return X @ W.T + b

def relu(Z):
    return Z.clamp_min(0.)

def forward_and_backward(X, y):
    # forward
    Z1 = linear(X, W1, b1)
    Z1.retain_grad() # 中間ノードは勾配を求める時にretain_grad()が必要
    A1 = relu(Z1)
    A1.retain_grad()
    Z2 = linear(A1, W2, b2)
    Z2.retain_grad()
    loss , A2 = softmax_cross_entropy(Z2, y)
    A2.retain_grad()
    # backward
    Z2.grad_ = (A2 - y) / X.shape[0]
    linear_backward(A1, W2, b2, Z2)
    relu_backward(Z1, A1)
    linear_backward(X, W1, b1, Z1)
    return loss, Z1, A1, Z2, A2

### Autogradの結果と確認することを確認

In [163]:
# パラメータの初期化

num_features = X_train.shape[1]
hidden_size = 32
num_classes = y_train.shape[1]


# パラメータの初期値
W1 = torch.randn((hidden_size, num_features)) * torch.sqrt(torch.tensor(2 / num_features))  # kaiming初期値
W1.requires_grad=True
b1 = torch.zeros((1, hidden_size), requires_grad=True)

print(X_train.shape, W1.T.shape)
W2 = torch.randn((num_classes, hidden_size))  * torch.sqrt(torch.tensor(2 / hidden_size)) # kaiming初期値
W2.requires_grad = True
b2 = torch.zeros((1, num_classes), requires_grad=True)




# スクラッチ実装したbackwardの出力
loss, Z1, A1, Z2, A2 = forward_and_backward(X_train, y_train)


torch.Size([1437, 64]) torch.Size([64, 32])


In [164]:
# autograd
loss.backward()

In [165]:
W1.grad_

tensor([[-0.0199, -0.0225, -0.0143,  ...,  0.0537,  0.0045, -0.0174],
        [-0.0116, -0.0152, -0.0299,  ..., -0.0258, -0.0237, -0.0112],
        [-0.1092, -0.1090, -0.0287,  ...,  0.1149, -0.0437, -0.0958],
        ...,
        [ 0.0117,  0.0153,  0.0250,  ...,  0.0085,  0.0094,  0.0113],
        [-0.1055, -0.1064, -0.0749,  ...,  0.2135,  0.0351, -0.0801],
        [-0.0293, -0.0314, -0.0298,  ...,  0.0556, -0.0088, -0.0284]],
       grad_fn=<MmBackward0>)

In [166]:
W1.grad

tensor([[-0.0199, -0.0225, -0.0143,  ...,  0.0537,  0.0045, -0.0174],
        [-0.0116, -0.0152, -0.0299,  ..., -0.0258, -0.0237, -0.0112],
        [-0.1092, -0.1090, -0.0287,  ...,  0.1149, -0.0437, -0.0958],
        ...,
        [ 0.0117,  0.0153,  0.0250,  ...,  0.0085,  0.0094,  0.0113],
        [-0.1055, -0.1064, -0.0749,  ...,  0.2135,  0.0351, -0.0801],
        [-0.0293, -0.0314, -0.0298,  ...,  0.0556, -0.0088, -0.0284]])

In [171]:
# だいだい同じかを確認
print(torch.allclose(W1.grad, W1.grad_),torch.allclose(W2.grad, W2.grad_),torch.allclose(b1.grad, b1.grad_),torch.allclose(b2.grad, b2.grad_))

True True True True


##### 補足 : one-hotの確認

In [62]:
a = torch.tensor([2, 3, 1, 4, 5], dtype=torch.long)
one_hot = F.one_hot(a,  num_classes=10) # 0 ~ 9の10クラス分類
one_hot

tensor([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]])