<a href="https://colab.research.google.com/github/Custardwh1pp/GoogleColab_DL/blob/main/lecture04_homework.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 第4回講義 宿題

## 課題

今Lessonで学んだことを元に，MNISTのファッション版 (Fashion MNIST，クラス数10) を多層パーセプトロンによって分類してみましょう．

Fashion MNISTの詳細については以下のリンクを参考にしてください．

Fashion MNIST: https://github.com/zalandoresearch/fashion-mnist

### 目標値

Accuracy 85%

### ルール


- 訓練データは`x_train`， `t_train`，テストデータは`x_test`で与えられます．
- 予測ラベルは one_hot表現ではなく0~9のクラスラベル で表してください．
- **下のセルで指定されている`x_train`，`t_train`以外の学習データは使わないでください．**
- Pytorchを利用して構いません．
- ただし，**`torch.nn.Conv2d`のような高レベルのAPIは使用しないで下さい**．具体的には，`nn.Parameter`, `nn.Module`, `nn.Sequential`以外の`nn`系のAPIです．使用した場合エラーになります．
- torchvision等で既に実装されているモデルも使用しないで下さい．

### 提出方法
- 2つのファイルを提出していただきます．
    1. テストデータ (`x_test`) に対する予測ラベルをcsv形式で保存し，**Omnicampusの宿題タブから「第4回 ニューラルネットワークの最適化・正則化」を選択して**提出してください．
    2. それに対応するpythonのコードを　ファイル＞ダウンロード＞.pyをダウンロード　から保存し，**Omnicampusの宿題タブから「第4回 ニューラルネットワークの最適化・正則化 (code)」を選択して**提出してください．pythonファイル自体の提出ではなく，「提出内容」の部分にコード全体をコピー&ペーストしてください．
      
- なお，採点は1で行い，2はコードの確認用として利用します（成績優秀者はコード内容を公開させていただくかもしれません）．コードの内容を変更した場合は，**1と2の両方を提出し直してください**．

### 評価方法
- 予測ラベルの`t_test`に対する精度 (Accuracy) で評価します．
- 即時採点しLeader Boardを更新します（採点スケジュールは別アナウンス）．
- 締切時の点数を最終的な評価とします．

### ドライブのマウント

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 作業ディレクトリを指定
work_dir = 'drive/MyDrive/Colab Notebooks/DLBasics2025_colab'

### データの読み込み（この部分は修正しないでください）

`__len__`は，Pythonの組み込み関数len()を呼んだときに，内部で呼ばれる特殊メソッドです．

`__getitem__`は，インデックスやキーで要素を取得するときに，内部で呼ばれる特殊メソッドです．

どちらも， Datasetクラスを自作する際によく登場します．

```python
class MyList:
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index]

mylist = MyList([10, 20, 30])
print(len(mylist))  # __len__が呼び出される
# 3
print(mylist[1])  # __getitem__が呼び出される
# 20
```

In [None]:
# API制限のコードと，torchに統合されているdynamoライブラリの挙動がコンフリクトを起こすため，dynamoを無効化
import torch._dynamo
torch._dynamo.disable()

import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import inspect

# 使用できるAPIを制限
nn_except = ["Module", "Parameter", "Sequential"]
for m in inspect.getmembers(nn):
    if not m[0] in nn_except and m[0][0:2] != "__":
        delattr(nn, m[0])

seed = 1234
torch.manual_seed(seed)
np.random.seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 学習データ
x_train = np.load(work_dir + '/Lecture04/data/x_train.npy')
t_train = np.load(work_dir + '/Lecture04/data/y_train.npy')

# テストデータ
x_test = np.load(work_dir + '/Lecture04/data/x_test.npy')

class train_dataset(torch.utils.data.Dataset):
    def __init__(self, x_train, t_train):
        self.x_train = x_train.reshape(-1, 784).astype('float32') / 255
        self.t_train = t_train

    def __len__(self):
        return self.x_train.shape[0]

    def __getitem__(self, idx):
        return torch.tensor(self.x_train[idx], dtype=torch.float), torch.tensor(self.t_train[idx], dtype=torch.long)

class test_dataset(torch.utils.data.Dataset):
    def __init__(self, x_test):
        self.x_test = x_test.reshape(-1, 784).astype('float32') / 255

    def __len__(self):
        return self.x_test.shape[0]

    def __getitem__(self, idx):
        return torch.tensor(self.x_test[idx], dtype=torch.float)

trainval_data = train_dataset(x_train, t_train)
test_data = test_dataset(x_test)

### 多層パーセプトロンの実装

In [None]:
batch_size = 32

val_size = 10000
train_size = len(trainval_data) - val_size

train_data, val_data = torch.utils.data.random_split(trainval_data, [train_size, val_size])

dataloader_train = torch.utils.data.DataLoader(
    train_data,
    batch_size=batch_size,
    shuffle=True
)

dataloader_valid = torch.utils.data.DataLoader(
    val_data,
    batch_size=batch_size,
    shuffle=False
)

dataloader_test = torch.utils.data.DataLoader(
    test_data,
    batch_size=batch_size,
    shuffle=False
)

In [None]:
def relu(x):
    return torch.maximum(torch.tensor(0.0).to(x.device), x)


def softmax(x):
    exp_x = torch.exp(x - torch.max(x, dim=-1, keepdim=True)[0])
    return exp_x / torch.sum(exp_x, dim=-1, keepdim=True)


class Dense(nn.Module):  # nn.Moduleを継承する
    def __init__(self, in_dim, out_dim):
        super().__init__()
        # Heの初期化
        self.W = nn.Parameter(torch.randn(in_dim, out_dim) * np.sqrt(2. / in_dim))
        self.b = nn.Parameter(torch.zeros(out_dim))

    def forward(self, x):
        return torch.matmul(x, self.W) + self.b


class MLP(nn.Module):  # nn.Moduleを継承する
    def __init__(self, in_dim, hid_dim, out_dim):
        super().__init__()
        self.l1 = Dense(in_dim, hid_dim)
        self.l2 = Dense(hid_dim, out_dim)

    def forward(self, x):
        x = relu(self.l1(x))
        x = self.l2(x)
        return x

in_dim = 784
hid_dim = 500
out_dim = 10
lr = 0.001
n_epochs = 20


mlp = MLP(in_dim, hid_dim, out_dim).to(device)

optimizer = optim.Adam(mlp.parameters(), lr=lr)

In [None]:
for epoch in range(n_epochs):
    losses_train = []
    losses_valid = []
    train_num = 0
    train_true_num = 0
    valid_num = 0
    valid_true_num = 0

    mlp.train()  # 訓練時には勾配を計算するtrainモードにする
    for x, t in dataloader_train:
        x = x.to(device)
        t = t.to(device)

        # 勾配の初期化
        optimizer.zero_grad()

        # 順伝播
        y = mlp.forward(x)

        # 損失関数の計算 (クロスエントロピー) - 数値安定化のため、logの前にepsilonを加える
        epsilon = 1e-8
        softmax_output = softmax(y)
        loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))


        # 誤差逆伝播
        loss.backward()

        # パラメータの更新
        optimizer.step()


        losses_train.append(loss.tolist())

        pred = softmax(y).argmax(1)
        acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
        train_num += acc.size()[0]
        train_true_num += acc.sum().item()

    mlp.eval()  # 評価時には勾配を計算しないevalモードにする
    for x, t in dataloader_valid:
        x = x.to(device)
        t = t.to(device)

        # 順伝播
        y = mlp.forward(x)

        # 損失関数の計算 (クロスエントロピー) - 数値安定化のため、logの前にepsilonを加える
        epsilon = 1e-8
        softmax_output = softmax(y)
        loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))


        losses_valid.append(loss.tolist())

        pred = softmax(y).argmax(1)
        acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
        valid_num += acc.size()[0]
        valid_true_num += acc.sum().item()


    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch,
        np.mean(losses_train),
        train_true_num/train_num,
        np.mean(losses_valid),
        valid_true_num/valid_num
    ))

EPOCH: 0, Train [Loss: 0.229, Accuracy: 0.915], Valid [Loss: 0.307, Accuracy: 0.891]
EPOCH: 1, Train [Loss: 0.218, Accuracy: 0.918], Valid [Loss: 0.301, Accuracy: 0.894]
EPOCH: 2, Train [Loss: 0.213, Accuracy: 0.921], Valid [Loss: 0.317, Accuracy: 0.891]
EPOCH: 3, Train [Loss: 0.207, Accuracy: 0.923], Valid [Loss: 0.311, Accuracy: 0.891]
EPOCH: 4, Train [Loss: 0.198, Accuracy: 0.926], Valid [Loss: 0.314, Accuracy: 0.891]
EPOCH: 5, Train [Loss: 0.193, Accuracy: 0.927], Valid [Loss: 0.330, Accuracy: 0.887]
EPOCH: 6, Train [Loss: 0.186, Accuracy: 0.930], Valid [Loss: 0.331, Accuracy: 0.887]
EPOCH: 7, Train [Loss: 0.180, Accuracy: 0.932], Valid [Loss: 0.322, Accuracy: 0.892]
EPOCH: 8, Train [Loss: 0.175, Accuracy: 0.934], Valid [Loss: 0.320, Accuracy: 0.893]
EPOCH: 9, Train [Loss: 0.171, Accuracy: 0.935], Valid [Loss: 0.322, Accuracy: 0.895]


In [None]:
mlp.eval()

t_pred = []
for x in dataloader_test:

    x = x.to(device)

    # 順伝播
    y = mlp.forward(x)

    # モデルの出力を予測値のスカラーに変換
    pred = y.argmax(1).tolist()

    t_pred.extend(pred)

submission = pd.Series(t_pred, name='label')
submission.to_csv(work_dir + '/Lecture04/submission_pred_04.csv', header=True, index_label='id')

## Train models with different optimizers and tunable hyperparameters

### Subtask:
Train the MLP model using different optimization algorithms (e.g., Adam, SGD, RMSprop). For each optimizer, allow for individual tuning of hyperparameters such as learning rate, momentum, etc.


In [None]:
# Training for Adam with hid_dim: 500, params: {'lr': 0.001}
hid_dim = 500
opt_name = 'Adam'
optimizer_class = optim.Adam
params = {'lr': 0.001}
n_epochs_for_optimizer = 20

print(f"Training with hid_dim: {hid_dim} and {opt_name}")
print(f"Training with params: {params} for {n_epochs_for_optimizer} epochs")

# Instantiate a new model for each training run
mlp = MLP(in_dim, hid_dim, out_dim).to(device)

# Instantiate the optimizer with current parameters
optimizer = optimizer_class(mlp.parameters(), **params)

epoch_train_losses = []
epoch_valid_losses = []
epoch_train_accuracies = []
epoch_valid_accuracies = []

for epoch in range(n_epochs_for_optimizer):
    losses_train = []
    train_num = 0
    train_true_num = 0

    mlp.train()
    for x, t in dataloader_train:
        x = x.to(device)
        t = t.to(device)

        optimizer.zero_grad()
        y = mlp.forward(x)

        epsilon = 1e-8
        softmax_output = softmax(y)
        loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

        loss.backward()
        optimizer.step()

        losses_train.append(loss.tolist())

        pred = softmax(y).argmax(1)
        acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
        train_num += acc.size()[0]
        train_true_num += acc.sum().item()

    losses_valid = []
    valid_num = 0
    valid_true_num = 0

    mlp.eval()
    with torch.no_grad():
        for x, t in dataloader_valid:
            x = x.to(device)
            t = t.to(device)

            y = mlp.forward(x)

            epsilon = 1e-8
            softmax_output = softmax(y)
            loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

            losses_valid.append(loss.tolist())

            pred = softmax(y).argmax(1)
            acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
            valid_num += acc.size()[0]
            valid_true_num += acc.sum().item()

    avg_train_loss = np.mean(losses_train)
    train_accuracy = train_true_num / train_num
    avg_valid_loss = np.mean(losses_valid)
    valid_accuracy = valid_true_num / valid_num

    epoch_train_losses.append(avg_train_loss)
    epoch_valid_losses.append(avg_valid_loss)
    epoch_train_accuracies.append(train_accuracy)
    epoch_valid_accuracies.append(valid_accuracy)

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch,
        avg_train_loss,
        train_accuracy,
        avg_valid_loss,
        valid_accuracy
    ))

trained_models = []
performance_metrics = []

trained_models.append({'hid_dim': hid_dim, 'optimizer': opt_name, 'params': params, 'n_epochs': n_epochs_for_optimizer, 'model': mlp})
performance_metrics.append({
    'hid_dim': hid_dim,
    'optimizer': opt_name,
    'params': params,
    'n_epochs': n_epochs_for_optimizer,
    'train_losses': epoch_train_losses,
    'valid_losses': epoch_valid_losses,
    'train_accuracies': epoch_train_accuracies,
    'valid_accuracies': epoch_valid_accuracies
})

Training with hid_dim: 500 and Adam
Training with params: {'lr': 0.001} for 20 epochs
EPOCH: 0, Train [Loss: 0.493, Accuracy: 0.825], Valid [Loss: 0.399, Accuracy: 0.851]
EPOCH: 1, Train [Loss: 0.369, Accuracy: 0.867], Valid [Loss: 0.370, Accuracy: 0.861]
EPOCH: 2, Train [Loss: 0.330, Accuracy: 0.877], Valid [Loss: 0.326, Accuracy: 0.883]
EPOCH: 3, Train [Loss: 0.308, Accuracy: 0.886], Valid [Loss: 0.303, Accuracy: 0.888]
EPOCH: 4, Train [Loss: 0.288, Accuracy: 0.894], Valid [Loss: 0.354, Accuracy: 0.875]
EPOCH: 5, Train [Loss: 0.276, Accuracy: 0.898], Valid [Loss: 0.307, Accuracy: 0.888]
EPOCH: 6, Train [Loss: 0.258, Accuracy: 0.904], Valid [Loss: 0.317, Accuracy: 0.888]
EPOCH: 7, Train [Loss: 0.248, Accuracy: 0.907], Valid [Loss: 0.315, Accuracy: 0.885]
EPOCH: 8, Train [Loss: 0.236, Accuracy: 0.911], Valid [Loss: 0.333, Accuracy: 0.883]
EPOCH: 9, Train [Loss: 0.228, Accuracy: 0.915], Valid [Loss: 0.307, Accuracy: 0.892]
EPOCH: 10, Train [Loss: 0.220, Accuracy: 0.918], Valid [Loss: 0.

In [None]:
# Training for Adam with hid_dim: 500, params: {'lr': 0.0005}
hid_dim = 500
opt_name = 'Adam'
optimizer_class = optim.Adam
params = {'lr': 0.0005}
n_epochs_for_optimizer = 20

print(f"Training with hid_dim: {hid_dim} and {opt_name}")
print(f"Training with params: {params} for {n_epochs_for_optimizer} epochs")

# Instantiate a new model for each training run
mlp = MLP(in_dim, hid_dim, out_dim).to(device)

# Instantiate the optimizer with current parameters
optimizer = optimizer_class(mlp.parameters(), **params)

epoch_train_losses = []
epoch_valid_losses = []
epoch_train_accuracies = []
epoch_valid_accuracies = []

for epoch in range(n_epochs_for_optimizer):
    losses_train = []
    train_num = 0
    train_true_num = 0

    mlp.train()
    for x, t in dataloader_train:
        x = x.to(device)
        t = t.to(device)

        optimizer.zero_grad()
        y = mlp.forward(x)

        epsilon = 1e-8
        softmax_output = softmax(y)
        loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

        loss.backward()
        optimizer.step()

        losses_train.append(loss.tolist())

        pred = softmax(y).argmax(1)
        acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
        train_num += acc.size()[0]
        train_true_num += acc.sum().item()

    losses_valid = []
    valid_num = 0
    valid_true_num = 0

    mlp.eval()
    with torch.no_grad():
        for x, t in dataloader_valid:
            x = x.to(device)
            t = t.to(device)

            y = mlp.forward(x)

            epsilon = 1e-8
            softmax_output = softmax(y)
            loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

            losses_valid.append(loss.tolist())

            pred = softmax(y).argmax(1)
            acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
            valid_num += acc.size()[0]
            valid_true_num += acc.sum().item()

    avg_train_loss = np.mean(losses_train)
    train_accuracy = train_true_num / train_num
    avg_valid_loss = np.mean(losses_valid)
    valid_accuracy = valid_true_num / valid_num

    epoch_train_losses.append(avg_train_loss)
    epoch_valid_losses.append(avg_valid_loss)
    epoch_train_accuracies.append(train_accuracy)
    epoch_valid_accuracies.append(valid_accuracy)

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch,
        avg_train_loss,
        train_accuracy,
        avg_valid_loss,
        valid_accuracy
    ))

trained_models.append({'hid_dim': hid_dim, 'optimizer': opt_name, 'params': params, 'n_epochs': n_epochs_for_optimizer, 'model': mlp})
performance_metrics.append({
    'hid_dim': hid_dim,
    'optimizer': opt_name,
    'params': params,
    'n_epochs': n_epochs_for_optimizer,
    'train_losses': epoch_train_losses,
    'valid_losses': epoch_valid_losses,
    'train_accuracies': epoch_train_accuracies,
    'valid_accuracies': epoch_valid_accuracies
})

NameError: name 'optim' is not defined

In [None]:
# Training for SGD with hid_dim: 250, params: {'lr': 0.01, 'momentum': 0.9}
hid_dim = 200
opt_name = 'SGD'
optimizer_class = optim.SGD
params = {'lr': 0.02, 'momentum': 0.9}
n_epochs_for_optimizer = 25

print(f"Training with hid_dim: {hid_dim} and {opt_name}")
print(f"Training with params: {params} for {n_epochs_for_optimizer} epochs")

# Instantiate a new model for each training run
mlp = MLP(in_dim, hid_dim, out_dim).to(device)

# Instantiate the optimizer with current parameters
optimizer = optimizer_class(mlp.parameters(), **params)

epoch_train_losses = []
epoch_valid_losses = []
epoch_train_accuracies = []
epoch_valid_accuracies = []

for epoch in range(n_epochs_for_optimizer):
    losses_train = []
    train_num = 0
    train_true_num = 0

    mlp.train()
    for x, t in dataloader_train:
        x = x.to(device)
        t = t.to(device)

        optimizer.zero_grad()
        y = mlp.forward(x)

        epsilon = 1e-8
        softmax_output = softmax(y)
        loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

        loss.backward()
        optimizer.step()

        losses_train.append(loss.tolist())

        pred = softmax(y).argmax(1)
        acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
        train_num += acc.size()[0]
        train_true_num += acc.sum().item()

    losses_valid = []
    valid_num = 0
    valid_true_num = 0

    mlp.eval()
    with torch.no_grad():
        for x, t in dataloader_valid:
            x = x.to(device)
            t = t.to(device)

            y = mlp.forward(x)

            epsilon = 1e-8
            softmax_output = softmax(y)
            loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

            losses_valid.append(loss.tolist())

            pred = softmax(y).argmax(1)
            acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
            valid_num += acc.size()[0]
            valid_true_num += acc.sum().item()

    avg_train_loss = np.mean(losses_train)
    train_accuracy = train_true_num / train_num
    avg_valid_loss = np.mean(losses_valid)
    valid_accuracy = valid_true_num / valid_num

    epoch_train_losses.append(avg_train_loss)
    epoch_valid_losses.append(avg_valid_loss)
    epoch_train_accuracies.append(train_accuracy)
    epoch_valid_accuracies.append(valid_accuracy)

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch,
        avg_train_loss,
        train_accuracy,
        avg_valid_loss,
        valid_accuracy
    ))

trained_models.append({'hid_dim': hid_dim, 'optimizer': opt_name, 'params': params, 'n_epochs': n_epochs_for_optimizer, 'model': mlp})
performance_metrics.append({
    'hid_dim': hid_dim,
    'optimizer': opt_name,
    'params': params,
    'n_epochs': n_epochs_for_optimizer,
    'train_losses': epoch_train_losses,
    'valid_losses': epoch_valid_losses,
    'train_accuracies': epoch_train_accuracies,
    'valid_accuracies': epoch_valid_accuracies
})

Training with hid_dim: 200 and SGD
Training with params: {'lr': 0.02, 'momentum': 0.9} for 25 epochs
EPOCH: 0, Train [Loss: 0.531, Accuracy: 0.810], Valid [Loss: 0.393, Accuracy: 0.860]
EPOCH: 1, Train [Loss: 0.399, Accuracy: 0.854], Valid [Loss: 0.364, Accuracy: 0.866]
EPOCH: 2, Train [Loss: 0.368, Accuracy: 0.866], Valid [Loss: 0.364, Accuracy: 0.867]
EPOCH: 3, Train [Loss: 0.343, Accuracy: 0.874], Valid [Loss: 0.343, Accuracy: 0.875]
EPOCH: 4, Train [Loss: 0.324, Accuracy: 0.880], Valid [Loss: 0.342, Accuracy: 0.872]
EPOCH: 5, Train [Loss: 0.309, Accuracy: 0.884], Valid [Loss: 0.351, Accuracy: 0.870]
EPOCH: 6, Train [Loss: 0.298, Accuracy: 0.888], Valid [Loss: 0.314, Accuracy: 0.883]
EPOCH: 7, Train [Loss: 0.286, Accuracy: 0.893], Valid [Loss: 0.335, Accuracy: 0.875]
EPOCH: 8, Train [Loss: 0.278, Accuracy: 0.896], Valid [Loss: 0.319, Accuracy: 0.883]
EPOCH: 9, Train [Loss: 0.269, Accuracy: 0.899], Valid [Loss: 0.336, Accuracy: 0.880]
EPOCH: 10, Train [Loss: 0.261, Accuracy: 0.902], 

In [None]:
# Training for RMSprop with hid_dim: 500, params: {'lr': 0.001}
hid_dim = 500
opt_name = 'RMSprop'
optimizer_class = optim.RMSprop
params = {'lr': 0.001}
n_epochs_for_optimizer = 25

print(f"Training with hid_dim: {hid_dim} and {opt_name}")
print(f"Training with params: {params} for {n_epochs_for_optimizer} epochs")

# Instantiate a new model for each training run
mlp = MLP(in_dim, hid_dim, out_dim).to(device)

# Instantiate the optimizer with current parameters
optimizer = optimizer_class(mlp.parameters(), **params)

epoch_train_losses = []
epoch_valid_losses = []
epoch_train_accuracies = []
epoch_valid_accuracies = []

for epoch in range(n_epochs_for_optimizer):
    losses_train = []
    train_num = 0
    train_true_num = 0

    mlp.train()
    for x, t in dataloader_train:
        x = x.to(device)
        t = t.to(device)

        optimizer.zero_grad()
        y = mlp.forward(x)

        epsilon = 1e-8
        softmax_output = softmax(y)
        loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

        loss.backward()
        optimizer.step()

        losses_train.append(loss.tolist())

        pred = softmax(y).argmax(1)
        acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
        train_num += acc.size()[0]
        train_true_num += acc.sum().item()

    losses_valid = []
    valid_num = 0
    valid_true_num = 0

    mlp.eval()
    with torch.no_grad():
        for x, t in dataloader_valid:
            x = x.to(device)
            t = t.to(device)

            y = mlp.forward(x)

            epsilon = 1e-8
            softmax_output = softmax(y)
            loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

            losses_valid.append(loss.tolist())

            pred = softmax(y).argmax(1)
            acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
            valid_num += acc.size()[0]
            valid_true_num += acc.sum().item()

    avg_train_loss = np.mean(losses_train)
    train_accuracy = train_true_num / train_num
    avg_valid_loss = np.mean(losses_valid)
    valid_accuracy = valid_true_num / valid_num

    epoch_train_losses.append(avg_train_loss)
    epoch_valid_losses.append(avg_valid_loss)
    epoch_train_accuracies.append(train_accuracy)
    epoch_valid_accuracies.append(valid_accuracy)

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch,
        avg_train_loss,
        train_accuracy,
        avg_valid_loss,
        valid_accuracy
    ))

trained_models.append({'hid_dim': hid_dim, 'optimizer': opt_name, 'params': params, 'n_epochs': n_epochs_for_optimizer, 'model': mlp})
performance_metrics.append({
    'hid_dim': hid_dim,
    'optimizer': opt_name,
    'params': params,
    'n_epochs': n_epochs_for_optimizer,
    'train_losses': epoch_train_losses,
    'valid_losses': epoch_valid_losses,
    'train_accuracies': epoch_train_accuracies,
    'valid_accuracies': epoch_valid_accuracies
})

Training with hid_dim: 500 and RMSprop
Training with params: {'lr': 0.001} for 25 epochs
EPOCH: 0, Train [Loss: 0.519, Accuracy: 0.817], Valid [Loss: 0.377, Accuracy: 0.869]
EPOCH: 1, Train [Loss: 0.373, Accuracy: 0.864], Valid [Loss: 0.356, Accuracy: 0.870]
EPOCH: 2, Train [Loss: 0.334, Accuracy: 0.877], Valid [Loss: 0.339, Accuracy: 0.876]
EPOCH: 3, Train [Loss: 0.310, Accuracy: 0.885], Valid [Loss: 0.327, Accuracy: 0.882]
EPOCH: 4, Train [Loss: 0.291, Accuracy: 0.892], Valid [Loss: 0.316, Accuracy: 0.883]
EPOCH: 5, Train [Loss: 0.277, Accuracy: 0.897], Valid [Loss: 0.361, Accuracy: 0.870]
EPOCH: 6, Train [Loss: 0.263, Accuracy: 0.902], Valid [Loss: 0.367, Accuracy: 0.868]
EPOCH: 7, Train [Loss: 0.252, Accuracy: 0.906], Valid [Loss: 0.333, Accuracy: 0.889]
EPOCH: 8, Train [Loss: 0.244, Accuracy: 0.909], Valid [Loss: 0.328, Accuracy: 0.886]
EPOCH: 9, Train [Loss: 0.231, Accuracy: 0.914], Valid [Loss: 0.366, Accuracy: 0.881]
EPOCH: 10, Train [Loss: 0.224, Accuracy: 0.917], Valid [Loss:

## Evaluate each model

### Subtask:
Evaluate the performance of each trained model on the validation set.


In [None]:
for metrics in performance_metrics:
    optimizer_name = metrics['optimizer']
    params = metrics['params']
    final_valid_accuracy = metrics['valid_accuracies'][-1]
    print(f"Optimizer: {optimizer_name}, Params: {params}, Final Validation Accuracy: {final_valid_accuracy:.3f}")

Optimizer: Adam, Params: {'lr': 0.001}, Final Validation Accuracy: 0.894
Optimizer: Adam, Params: {'lr': 0.0005}, Final Validation Accuracy: 0.898
Optimizer: SGD, Params: {'lr': 0.02, 'momentum': 0.9}, Final Validation Accuracy: 0.892
Optimizer: RMSprop, Params: {'lr': 0.001}, Final Validation Accuracy: 0.888


## Perform inference on test data

### Subtask:
Use each trained model to predict labels for the test dataset.


In [None]:
test_predictions = []

# Keep track of the last trained model for each unique configuration (optimizer, hid_dim, params)
last_trained_models_by_config = {}
for model_info in trained_models:
    config_key = (model_info['optimizer'], model_info['hid_dim'], tuple(sorted(model_info['params'].items())))
    last_trained_models_by_config[config_key] = model_info

# Select only the last trained model for each unique configuration
selected_models = list(last_trained_models_by_config.values())


print("Models selected for ensembling:")
for model_info in selected_models:
    print(f"- Optimizer: {model_info['optimizer']}, HID Dim: {model_info['hid_dim']}, Params: {model_info['params']}, Epochs: {model_info['n_epochs']}")


for trained_model_info in selected_models:
    model = trained_model_info['model']
    model.eval()
    model_predictions = []
    with torch.no_grad():
        for x in dataloader_test:
            x = x.to(device)
            y = model.forward(x)
            pred = softmax(y).argmax(1).tolist()
            model_predictions.extend(pred)
    test_predictions.append(model_predictions)

Models selected for ensembling:
- Optimizer: Adam, HID Dim: 500, Params: {'lr': 0.001}, Epochs: 20
- Optimizer: Adam, HID Dim: 500, Params: {'lr': 0.0005}, Epochs: 20
- Optimizer: SGD, HID Dim: 200, Params: {'lr': 0.02, 'momentum': 0.9}, Epochs: 25
- Optimizer: RMSprop, HID Dim: 500, Params: {'lr': 0.001}, Epochs: 25


## Ensemble predictions

### Subtask:
Combine the predictions from the different models (e.g., using majority voting or averaging predicted probabilities).


In [None]:
import numpy as np
from collections import Counter

test_predictions_array = np.array(test_predictions)

ensembled_predictions = []
for i in range(test_predictions_array.shape[1]):
    # Get predictions for the i-th test sample across all models
    sample_predictions = test_predictions_array[:, i]
    # Count the occurrences of each prediction
    prediction_counts = Counter(sample_predictions)
    # Get the most common prediction (majority vote)
    most_common_prediction = prediction_counts.most_common(1)[0][0]
    ensembled_predictions.append(most_common_prediction)

ensembled_predictions_np = np.array(ensembled_predictions)

## Save ensembled predictions

### Subtask:
Save the final ensembled predictions to a CSV file in the specified format.


In [None]:
submission = pd.Series(ensembled_predictions_np, name='label')
submission.to_csv(work_dir + '/Lecture04/submission_pred_04.csv', header=True, index_label='id')

# 没案

In [None]:
import itertools

optimizers_config = [
    {'name': 'Adam', 'optimizer': optim.Adam, 'params': {'lr': [0.0005]}, 'n_epochs': 20, 'hid_dim': 550},
    {'name': 'SGD', 'optimizer': optim.SGD, 'params': {'lr': [0.015], 'momentum': [0.9]}, 'n_epochs': 35, 'hid_dim': 200}, # hid_dim set to 250 for SGD
    {'name': 'RMSprop', 'optimizer': optim.RMSprop, 'params': {'lr': [0.001]}, 'n_epochs': 20, 'hid_dim': 1000}
]

trained_models = []
performance_metrics = []

for opt_config in optimizers_config:
    opt_name = opt_config['name']
    optimizer_class = opt_config['optimizer']
    opt_params_grid = opt_config['params']
    n_epochs_for_optimizer = opt_config['n_epochs']
    hid_dim = opt_config['hid_dim'] # Get hid_dim from optimizer config


    # Generate all combinations of hyperparameters
    keys, values = zip(*opt_params_grid.items())
    param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    print(f"Training with hid_dim: {hid_dim} and {opt_name}")
    for params in param_combinations:
        print(f"Training with params: {params} for {n_epochs_for_optimizer} epochs")

        # Instantiate a new model for each training run
        mlp = MLP(in_dim, hid_dim, out_dim).to(device)

        # Instantiate the optimizer with current parameters
        optimizer = optimizer_class(mlp.parameters(), **params)

        epoch_train_losses = []
        epoch_valid_losses = []
        epoch_train_accuracies = []
        epoch_valid_accuracies = []

        for epoch in range(n_epochs_for_optimizer):
            losses_train = []
            train_num = 0
            train_true_num = 0

            mlp.train()
            for x, t in dataloader_train:
                x = x.to(device)
                t = t.to(device)

                optimizer.zero_grad()
                y = mlp.forward(x)

                epsilon = 1e-8
                softmax_output = softmax(y)
                loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

                loss.backward()
                optimizer.step()

                losses_train.append(loss.tolist())

                pred = softmax(y).argmax(1)
                acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
                train_num += acc.size()[0]
                train_true_num += acc.sum().item()

            losses_valid = []
            valid_num = 0
            valid_true_num = 0

            mlp.eval()
            with torch.no_grad():
                for x, t in dataloader_valid:
                    x = x.to(device)
                    t = t.to(device)

                    y = mlp.forward(x)

                    epsilon = 1e-8
                    softmax_output = softmax(y)
                    loss = -torch.mean(torch.sum(torch.log(softmax_output + epsilon) * torch.eye(out_dim).to(device)[t], dim=1))

                    losses_valid.append(loss.tolist())

                    pred = softmax(y).argmax(1)
                    acc = torch.where(t - pred == 0, torch.ones_like(t), torch.zeros_like(t))
                    valid_num += acc.size()[0]
                    valid_true_num += acc.sum().item()

            avg_train_loss = np.mean(losses_train)
            train_accuracy = train_true_num / train_num
            avg_valid_loss = np.mean(losses_valid)
            valid_accuracy = valid_true_num / valid_num

            epoch_train_losses.append(avg_train_loss)
            epoch_valid_losses.append(avg_valid_loss)
            epoch_train_accuracies.append(train_accuracy)
            epoch_valid_accuracies.append(valid_accuracy)

            print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
                epoch,
                avg_train_loss,
                train_accuracy,
                avg_valid_loss,
                valid_accuracy
            ))

        trained_models.append({'hid_dim': hid_dim, 'optimizer': opt_name, 'params': params, 'n_epochs': n_epochs_for_optimizer, 'model': mlp})
        performance_metrics.append({
            'hid_dim': hid_dim,
            'optimizer': opt_name,
            'params': params,
            'n_epochs': n_epochs_for_optimizer,
            'train_losses': epoch_train_losses,
            'valid_losses': epoch_valid_losses,
            'train_accuracies': epoch_train_accuracies,
            'valid_accuracies': epoch_valid_accuracies
        })