<a href="https://colab.research.google.com/github/TadaoYamaoka/ShogiAIBook/blob/main/notebook/train_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
# ハイパーパラメータ
learning_rate = 0.001
batch_size = 64
epochs = 5

In [3]:
# デバイス
use_cuda = True
device = torch.device("cuda" if use_cuda else "cpu")

In [4]:
# ニューラルネットワーク
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        output = self.fc2(x)
        return output

model = Net()
model.to(device)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [5]:
# 損失関数
loss_fn = nn.CrossEntropyLoss()

In [6]:
# オプティマイザ
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [7]:
# データセット
transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])
training_data = datasets.MNIST(
    'data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(
    'data', train=False, transform=transform)

train_dataloader = DataLoader(training_data, batch_size)
test_dataloader = DataLoader(test_data, batch_size)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [8]:
# 訓練ループ
for t in range(epochs):
    for batch_idx, (data, target) in enumerate(train_dataloader):
        data, target = data.to(device), target.to(device)

        # 順伝播
        output = model(data)
        loss = loss_fn(output, target)
        
        # 誤差逆伝播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 一定間隔ごとに訓練損失を表示
        if batch_idx % 100 == 0:
            print('epoch: {}, steps: {}/{}, train loss: {:.6f}'.format(
                t + 1,
                batch_idx, len(train_dataloader),
                loss.item()
                ))

    # エポックの終わりにテストデータすべてを使用して評価する
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in test_dataloader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += loss_fn(output, target).item()
            correct += (output.argmax(1) == target).type(torch.float).sum().item()
            
    print('epoch: {}, test loss: {:.6f}, test accuracy: {:.6f}'.format(
        t + 1,
        test_loss / len(test_dataloader),
        correct / len(test_dataloader.dataset)
        ))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


epoch: 1, steps: 0/938, train loss: 2.306810
epoch: 1, steps: 100/938, train loss: 2.198878
epoch: 1, steps: 200/938, train loss: 2.148744
epoch: 1, steps: 300/938, train loss: 1.817784
epoch: 1, steps: 400/938, train loss: 1.527979
epoch: 1, steps: 500/938, train loss: 1.175787
epoch: 1, steps: 600/938, train loss: 0.736597
epoch: 1, steps: 700/938, train loss: 0.779190
epoch: 1, steps: 800/938, train loss: 0.613172
epoch: 1, steps: 900/938, train loss: 0.502654
epoch: 1, test loss: 0.475524, test accuracy: 0.871200
epoch: 2, steps: 0/938, train loss: 0.500956
epoch: 2, steps: 100/938, train loss: 0.398368
epoch: 2, steps: 200/938, train loss: 0.339160
epoch: 2, steps: 300/938, train loss: 0.443210
epoch: 2, steps: 400/938, train loss: 0.330672
epoch: 2, steps: 500/938, train loss: 0.394733
epoch: 2, steps: 600/938, train loss: 0.241399
epoch: 2, steps: 700/938, train loss: 0.453107
epoch: 2, steps: 800/938, train loss: 0.383423
epoch: 2, steps: 900/938, train loss: 0.407582
epoch: 2,