## Optimizer

In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np

### スクラッチ実装

In [2]:
class MLP(nn.Module):
    def __init__(self, num_in, num_hidden, num_out):
        super().__init__()
        self.l1 = nn.Linear(num_in, num_hidden)
        self.l2 = nn.Linear(num_hidden, num_out)

    def forward(self, x):
        # z1 = self.l1(x)
        # a1 = F.relu(z1)
        # z2 = self.l2(a1)
        x = self.l2(F.relu(self.l1(x)))
        return x

In [3]:
model = MLP(64, 30, 10)
model.parameters()

<generator object Module.parameters at 0xffff4a3ebba0>

In [4]:
class Optimizer():

    def __init__(self, parameters, lr=0.03):
        self.parameters = list(parameters)
        self.lr = lr

    def step(self):
        with torch.no_grad():
            for param in self.parameters:
                param -= self.lr * param.grad

    def zero_grad(self):
        for param in self.parameters:
            if param.grad is not None:
                param.grad.zero_()

In [5]:
learning_rate = 0.01
opt = Optimizer(parameters=model.parameters(), lr=learning_rate)

In [6]:

## Refactoring後の学習ループ(OptimizerやDataset, Dataloaderは後ほどRefactoring)
# ===データの準備====
dataset = datasets.load_digits()
data = dataset['data']
target = dataset['target']
images = dataset['images']
X_train, X_val, y_train, y_val = train_test_split(images, target, test_size=0.2, random_state=42)
X_train = (X_train - X_train.mean()) / X_train.std()
X_val = (X_val - X_train.mean()) / X_train.std()
X_train = torch.tensor(X_train.reshape(-1, 64), dtype=torch.float32)
X_val = torch.tensor(X_val.reshape(-1, 64), dtype=torch.float32)
y_train = torch.tensor(y_train) 
y_val = torch.tensor(y_val) 
batch_size = 30
num_batches = np.ceil(len(y_train)/batch_size).astype(int)

# ログ
train_losses = []
val_losses = []
val_accuracies = []
for epoch in range(100):
    # エポック毎にデータをシャッフル
    shuffled_indices = np.random.permutation(len(y_train))
    running_loss = 0.0
    
    for i in range(num_batches):
        
        # mini batch作成
        start = i * batch_size
        end = start + batch_size

        batch_indices = shuffled_indices[start:end]
        y = y_train[batch_indices] # batch_size x 10
        
        X = X_train[batch_indices] # batch_size x 64
        # 順伝播と逆伝播の計算
        opt.zero_grad()
        preds = model(X)
        loss = F.cross_entropy(preds, y)
        loss.backward()
        running_loss += loss.item()

        # パラメータ更新
        # with torch.no_grad():
        #     for param in model.parameters():
        #         param -= learning_rate * param.grad

        # model.zero_grad()
        opt.step()
        

    # validation
    with torch.no_grad():
        preds_val = model(X_val)
        val_loss = F.cross_entropy(preds_val, y_val)
        val_accuracy = torch.sum(torch.argmax(preds_val, dim=-1) == y_val) / y_val.shape[0]

    train_losses.append(running_loss/num_batches)
    val_losses.append(val_loss.item())
    val_accuracies.append(val_accuracy)
    print(f'epoch: {epoch}: train error: {running_loss/num_batches}, validation error: {val_loss.item()}, validation accuracy: {val_accuracy}')

epoch: 0: train error: 2.250039726495743, validation error: 2.1881699562072754, validation accuracy: 0.18888889253139496
epoch: 1: train error: 2.118503545721372, validation error: 1.8520101308822632, validation accuracy: 0.34166666865348816
epoch: 2: train error: 1.9763285120328267, validation error: 1.5020840167999268, validation accuracy: 0.48055556416511536
epoch: 3: train error: 1.8085261459151905, validation error: 1.203054428100586, validation accuracy: 0.6027777791023254
epoch: 4: train error: 1.61716744552056, validation error: 1.0023492574691772, validation accuracy: 0.6611111164093018
epoch: 5: train error: 1.4140830611189206, validation error: 0.825630247592926, validation accuracy: 0.7027778029441833
epoch: 6: train error: 1.2153398506343365, validation error: 0.7068569660186768, validation accuracy: 0.7722222208976746
epoch: 7: train error: 1.0370984685917695, validation error: 0.6625185608863831, validation accuracy: 0.8083333373069763
epoch: 8: train error: 0.8885923624

### torch.optim

In [7]:
from torch import optim
opt = optim.SGD(model.parameters(), lr=learning_rate)