# Optimizer

## スクラッチ実装

In [1]:
from torch import nn
import torch
from torch.nn import functional as F
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [12]:
class MLP(nn.Module):
    def __init__(self, num_in, num_hidden, num_out):
        super().__init__()
        self.l1 = nn.Linear(num_in, num_hidden) # 隠れ層(第1層)を定義
        self.l2 = nn.Linear(num_hidden, num_out) # 隠れ層(第2層)を定義

    def forward(self, x):
        # z1 = self.l1(x) 
        # a1 = F.relu(z1)
        # z2 = self.l2(a1)
        x = self.l2(F.relu(self.l1(x)))
        return x

In [1]:
class Optimizer(): # optim.SGD()と同様のクラス

    def __init__(self, parameters, lr = 0.03):
        self.parameters = list(parameters)
        self.lr = lr

    def step(self):
        with torch.no_grad():
            for param in self.parameters:
                param -= self.lr * param.grad

    def zero_grad(self):
        for param in self.parameters:
            if param.grad is not None:
                param.grad.zero_()

In [17]:
model = MLP(64, 30, 10)
model.parameters()

<generator object Module.parameters at 0x7ffefe65af90>

In [16]:
learning_rate = 0.01
opt = Optimizer(parameters=model.parameters(), lr=learning_rate)

In [19]:
# データ準備
dataset = datasets.load_digits()
data = dataset['data']
target = dataset['target']
images = dataset['images']

# バリデーションデータ作成
X_train, X_val, y_train, y_val = train_test_split(images, target, test_size = 0.2, random_state = 0)
# 前処理
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, 64))
X_val = scaler.transform(X_val.reshape(-1, 64))
X_train = torch.tensor(X_train, dtype = torch.float32).reshape(-1, 64)
X_val= torch.tensor(X_val, dtype = torch.float32).reshape(-1, 64)
y_train = torch.tensor(y_train)
y_val = torch.tensor(y_val)

# モデル設定
batch_size = 30
learning_rate = 0.03
train_losses = []
val_losses = []
val_accuracies = []
nh = 30
m, n = X_train.shape
num_classes = 10
num_batches = np.ceil(X_train.shape[0] / batch_size).astype(int)

# モデル学習
for epoch in range(100):
    shuffled_indices = np.random.permutation(len(y_train))
    running_loss = 0.0
    
    for n in range(num_batches):
        start_index = n * 30
        end_index = start_index + 30

        X = X_train[shuffled_indices[start_index:end_index]]
        y = y_train[shuffled_indices[start_index:end_index]]
        preds = model(X)
        loss = F.cross_entropy(preds, y)
        running_loss += loss.item()

        loss.backward()
        # optimizer.step()
        # optimizer.zero_grad()
        opt.step()
        opt.zero_grad()
        

    # validation
    with torch.no_grad():
        preds_val = model(X_val)
        loss_val = F.cross_entropy(preds_val, y_val)
        accuracy_val = torch.sum(torch.argmax(preds_val, dim = 1) == y_val) / y_val.shape[0]

    train_losses.append(running_loss / num_batches)
    val_losses.append(loss_val.item())
    val_accuracies.append(accuracy_val)
    print(f'epoch:{epoch}, train_loss:{train_losses[-1]}, val_losses:{val_losses[-1]}, val_accuracy:{val_accuracies[-1]}')

epoch:0, train_loss:2.2738026281197867, val_losses:2.2145540714263916, val_accuracy:0.1805555522441864
epoch:1, train_loss:2.138106845319271, val_losses:2.0879812240600586, val_accuracy:0.3722222149372101
epoch:2, train_loss:1.9987405265371005, val_losses:1.9540841579437256, val_accuracy:0.5305555462837219
epoch:3, train_loss:1.8486982857187588, val_losses:1.8114182949066162, val_accuracy:0.5888888835906982
epoch:4, train_loss:1.6890101606647174, val_losses:1.6618175506591797, val_accuracy:0.6333333253860474
epoch:5, train_loss:1.526999570429325, val_losses:1.5124379396438599, val_accuracy:0.6583333611488342
epoch:6, train_loss:1.3703487043579419, val_losses:1.3690534830093384, val_accuracy:0.6833333373069763
epoch:7, train_loss:1.225645337253809, val_losses:1.2356295585632324, val_accuracy:0.7083333134651184
epoch:8, train_loss:1.095756823817889, val_losses:1.1149777173995972, val_accuracy:0.730555534362793
epoch:9, train_loss:0.9817953805128733, val_losses:1.0080468654632568, val_acc

## torch.optim

In [18]:
from torch import optim
opt = optim.SGD(model.parameters(), lr = learning_rate)