# 準備
1.  必要パッケージのインストール

In [None]:
! pip install torchinfo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.1-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.1


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torchinfo import summary

# MNISTデータセットのダウンロード

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

training_set = datasets.MNIST("./data", train=True, download=True, transform=transform)
test_set = datasets.MNIST("./data", train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(training_set, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=1000)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



# モデルの定義（CNN）

In [None]:
class HSwish(nn.Module):
    def forward(self, x):
        return x * F.relu6(x + 3) / 6


class MobileNetV4Block(nn.Module):
    """
    MobileNetV4 Universal Inverted Bottleneck (UIB)
    - optional dw1: depthwise BEFORE expansion
    - expand: 1x1
    - optional dw2: depthwise AFTER expansion
    - project: 1x1
    """
    def __init__(
        self,
        inp,
        oup,
        expand_ratio=4,
        kernel_size=3,
        stride=1,
        use_dw1=True,   # optional depthwise before expand
        use_dw2=True,    # optional depthwise after expand
        activation='hswish'
    ):
        super().__init__()

        hidden_dim = inp * expand_ratio
        self.use_res = (stride == 1 and inp == oup)

        if activation == 'relu':
            act = nn.ReLU(inplace=True)
        else:
            act = HSwish()

        layers = []

        # Optional Depthwise BEFORE Expand (ExtraDW)
        if use_dw1:
            layers += [
                nn.Conv2d(inp, inp, kernel_size, stride,
                          kernel_size // 2, groups=inp, bias=False),
                nn.BatchNorm2d(inp),
                act,
            ]

        # Expand 1x1
        layers += [
            nn.Conv2d(inp, hidden_dim, 1, 1, bias=False),
            nn.BatchNorm2d(hidden_dim),
            act,
        ]

        # Optional dw AFTER expand
        if use_dw2:
            layers += [
                nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride,
                          kernel_size // 2, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                act,
            ]

        # Project 1x1
        layers += [
            nn.Conv2d(hidden_dim, oup, 1, 1, bias=False),
            nn.BatchNorm2d(oup)
        ]

        self.block = nn.Sequential(*layers)

    def forward(self, x):
        out = self.block(x)
        if self.use_res:
            return out + x
        return out


class MyMnistNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Stem: 1 → 12
        self.stem = nn.Sequential(
            nn.Conv2d(1, 12, 3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(12),
            HSwish(),
        )

        # MobileNetV4 Blocks
        self.blocks = nn.Sequential(
            MobileNetV4Block(12, 14, stride=1, expand_ratio=2),
            MobileNetV4Block(14, 16, stride=1, expand_ratio=2),  
        )

        # Classifier
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(16, 10)

    def forward(self, x):
        x = self.stem(x)
        x = self.blocks(x)
        x = self.gap(x).flatten(1)
        return self.fc(x)
print( summary(MyMnistNet(), input_size=(64, 1, 28, 28)) )

# 学習とテスト


In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    out = 0
    for batch_idx, (x, y) in enumerate(train_loader):
        x = x.to(device)
        y = y.to(device)
        p_y_hat = model(x)
        loss = F.cross_entropy(p_y_hat, y, label_smoothing=0.1)  # softmaxを含む
        # backward()関数は「前回計算した勾配＋今回計算した勾配」を返す。この仕様が適したDNNが存在するためである（今回は不要）
        # よって前回の勾配を０とするためにbackward()の前にoptimizer.zero_grad()を入れる
        optimizer.zero_grad()
        loss.backward()  # 勾配計算
        optimizer.step()  # パラメータ更新
        if batch_idx  % 100 == 0:
            print(f"Epoch={epoch+1}, Batch={batch_idx+1:03}, Loss={loss.item():.4f}")
            out = loss.item()
    return out

def test(model, device, test_loader):
    model.eval()
    correct = 0
    for x, y in test_loader:
      x = x.to(device)
      y = y.to(device)
      p_y_hat = model(x)
      y_hat = p_y_hat.argmax(dim=1, keepdim=True)
      correct += y_hat.eq(y.view_as(y_hat)).sum().item()

    accuracy = correct / len(test_loader.dataset)
    print(f"Test-set accuracy={accuracy :.04f}\n")
    return accuracy

def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = MyMnistNet().to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)

main()


# 実習
1. （全員）全セルを実行して精度を調べよ

2. （全員）MyMnistNetのコードを以下のように変更して、対象のセルを実行せよ

* before1
                self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1)
* after1
                self.conv2 = nn.Conv2d(32, 100, kernel_size=3, stride=1)
* before2
        self.fc1 = nn.Linear(9216, 128)
* after2
        self.fc1 = nn.Linear(14400, 128)

3. （全員）nn.Conv2dの引数を調べよ。スライドの数式とどのように対応しているか考えよ。
https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
4. （全員）問題１のafter2において、なぜ14400にすべきかを考えよ。

* ヒント：
├─Dropout: 1-3                           [64, 100, 12, 12]          --

5. （任意）以下のように変更して、対象のセルを実行し、他に変更すべき部分を変更して実行可能とせよ
* before
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1)

* after
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1)

* エラーでセルが実行できないときは、以下の行をコメントアウトし、セルを実行可能とするとよい。セルが実行されるとsummaryが表示されるので、エラーを修正してから改めてコメントアウトを解除するとよい
        x = F.relu( self.fc1(x) ) # 活性化関数とLinear層を１行で書く記法
        x = self.dropout2(x) # ２番目のドロップアウト
        x = self.fc2(x) # ２番目のLinear層

7.  （全員）テスト精度が99.2%以上となるモデルを構築したい（Test-set accuracy>=0.992）。ただし、できるだけモデルのパラメータ数を少なくしたい。上記のコードを変更してこれを達成せよ。

  * パラメータ数は、以下を実行すると表示される。
      * `print( summary(MyMnistNet(), input_size=(64, 1, 28, 28)) )`
      * `Trainable params: ここにパラメータ数が表示される`

