# MLP

In [1]:
import numpy as np
import torch
from torch.nn import functional as F
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [9]:
# 1. データロード
dataset = datasets.load_digits()
images = dataset['images']
target = dataset['target']

In [10]:
# 学習データと検証データ分割
X_train, X_val, y_train, y_val = train_test_split(images, target, test_size=0.2, random_state=42)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)

(1437, 8, 8) (1437,)
(360, 8, 8) (360,)


In [4]:
# 前処理
# 2-1.ラベルのone-hot encoing
y_train = F.one_hot(torch.tensor(y_train), num_classes=10)
X_train = torch.tensor(X_train, dtype=torch.float32).reshape(-1, 64)

y_val = F.one_hot(torch.tensor(y_val), num_classes=10)
X_val = torch.tensor(X_val, dtype=torch.float32).reshape(-1, 64)

# 2-2. 画像の標準化
X_train_mean = X_train.mean()
X_train_std = X_train.std()
X_train = (X_train - X_train_mean) / X_train_std
X_val = (X_val - X_train_mean) / X_train_std
# 以下のように手元のデータ全ての平均&標準偏差を使えば，学習データと検証データの分布を近くすることが可能
# しかし，この場合validationの精度は，未知のデータよりも若干高くなることに注意
# X_train = (X_train - images.mean()) / images.std()
# X_val = (X_val - images.mean()) / images.std()

In [6]:
X_val.shape

torch.Size([360, 64])

In [21]:
X_train.shape

torch.Size([1437, 64])

In [39]:
m, n = X_train.shape
nh = 30
class_num = 10
# パラメータの初期化
W1 = torch.randn((nh, n), requires_grad = True) # 出力×入力
b1 = torch.zeros((1, nh), requires_grad = True)

W2 = torch.randn((10, nh), requires_grad = True)
b2 = torch.zeros((1, class_num), requires_grad = True)

In [24]:
def linear(X, W, b):
    return X @ W.T + b

In [26]:
linear(X_train, W1, b1).shape

torch.Size([1437, 30])

In [33]:
def relu(Z):
    return Z.clamp_min(0.)

In [34]:
t = torch.tensor([-1., 0., 2., -5.])
t.clamp_min_(0.) # clamp_min_にすると上書きされる

tensor([0., 0., 2., 0.])

In [29]:
t

tensor([0., 0., 2., 0.])

In [30]:
def softmax(x):
    e_x = torch.exp(x - torch.max(x, dim=-1, keepdim=True)[0])
    return e_x / (torch.sum(e_x, dim=-1, keepdim=True) + 1e-10)

In [31]:
def model(X):
    Z1 = linear(X, W1, b1)
    A1 = relu(Z1)
    Z2 = linear(A1, W2, b2)
    A2 = softmax(Z2)
    return A2

In [35]:
y_train_pred = model(X_train)

In [36]:
y_train_pred

tensor([[2.9116e-14, 1.1837e-18, 1.5258e-01,  ..., 5.7295e-13, 1.4145e-03,
         6.9556e-15],
        [4.0963e-36, 7.4882e-14, 3.3960e-28,  ..., 4.2293e-33, 1.7203e-24,
         6.5865e-26],
        [2.9381e-39, 1.7752e-31, 3.0689e-21,  ..., 5.9604e-37, 2.1907e-22,
         1.4013e-45],
        ...,
        [1.9175e-24, 4.2753e-32, 2.8219e-08,  ..., 1.7900e-29, 7.2701e-35,
         1.1404e-34],
        [4.9334e-35, 1.7236e-27, 7.6186e-05,  ..., 7.0911e-21, 1.0891e-21,
         8.8272e-31],
        [1.8032e-33, 0.0000e+00, 1.0000e+00,  ..., 6.0869e-34, 1.5526e-24,
         4.6915e-42]], grad_fn=<DivBackward0>)

In [37]:
y_train_pred.shape

torch.Size([1437, 10])

In [38]:
y_train_pred.sum(dim = 1)

tensor([1., 1., 1.,  ..., 1., 1., 1.], grad_fn=<SumBackward1>)

## 復習

In [11]:
# 前処理
y_train = F.one_hot(torch.tensor(y_train), num_classes = 10)
y_val = F.one_hot(torch.tensor(y_val), num_classes = 10)
X_train_mean = X_train.mean()
X_train_std = X_train.std()
X_train = (X_train - X_train_mean) / X_train_std
X_val = (X_val - X_train_mean) / X_train_std

X_train = torch.tensor(X_train, dtype = torch.float32).reshape(-1, 64)
X_val = torch.tensor(X_val, dtype = torch.float32).reshape(-1, 64)

In [12]:
def linear(X, W, b):
    return X @ W.T + b

In [14]:
X_train.shape

torch.Size([1437, 64])

In [15]:
# パラメータの初期化
m, n = X_train.shape
nh = 30
class_num = 10
W1 = torch.randn((nh, n), requires_grad = True)
b1 = torch.zeros((1, nh), requires_grad = True)
W2 = torch.randn((10, nh), requires_grad = True)
b2 = torch.zeros((1, class_num), requires_grad = True)

In [16]:
def relu(Z):
    return Z.clamp_min(0.)

In [19]:
def model(X):
    Z1 = linear(X, W1, b1)
    A1 = relu(Z1)
    Z2 = linear(A1, W2, b2)
    A2 = softmax(Z2)
    return A2

In [22]:
def softmax(x):
    e_x = torch.exp(x - torch.max(x, dim=-1, keepdim=True)[0])
    return e_x / (torch.sum(e_x, dim=-1, keepdim=True) + 1e-10)

In [23]:
y_train_pred = model(X_train)

In [24]:
y_train_pred

tensor([[4.1363e-01, 4.2362e-30, 0.0000e+00,  ..., 1.5896e-41, 1.2441e-31,
         6.5275e-27],
        [2.9204e-03, 3.7782e-16, 1.7474e-17,  ..., 2.9729e-14, 5.1256e-08,
         2.3867e-09],
        [1.1387e-28, 1.2305e-39, 1.4013e-45,  ..., 0.0000e+00, 1.0000e+00,
         9.0699e-33],
        ...,
        [7.8745e-19, 0.0000e+00, 2.2307e-11,  ..., 3.3134e-17, 1.0000e+00,
         2.2332e-26],
        [4.7875e-16, 1.0899e-39, 1.5978e-23,  ..., 1.0352e-37, 1.0000e+00,
         1.5335e-36],
        [9.3205e-03, 1.4499e-26, 3.2827e-19,  ..., 1.4222e-29, 5.5366e-09,
         2.6971e-31]], grad_fn=<DivBackward0>)