<a href="https://colab.research.google.com/github/Custardwh1pp/GoogleColab_DL/blob/main/lecture03_homework.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 第3回講義 宿題

## 課題

今回のLessonで学んだことを元に，MNISTのファッション版 (Fashion MNIST，クラス数10) を多層パーセプトロンによって分類してみましょう．

Fashion MNISTの詳細については以下のリンクを参考にしてください．

Fashion MNIST: https://github.com/zalandoresearch/fashion-mnist

### 目標値

Accuracy 85%

### ルール

- 訓練データは`x_train`， `t_train`，テストデータは`x_test`で与えられます．
- 予測ラベルは one_hot表現ではなく0~9のクラスラベル で表してください．
- **下のセルで指定されている`x_train`，`t_train`以外の学習データは使わないでください．**
- **多層パーセプトロンのアルゴリズム部分は第3回の演習を参考に，NumPyのみで実装してください．** (sklearnやtensorflowなどは使用しないでください)．
    - データの前処理部分でsklearnの関数を使う (例えば `sklearn.model_selection.train_test_split`) のは問題ありません．

### 提出方法
- 2つのファイルを提出していただきます．
    1. テストデータ (`x_test`) に対する予測ラベルをcsv形式で保存し，**Omnicampusの宿題タブから「第3回 ニューラルネットワーク基礎」を選択して**提出してください．
    2. それに対応するpythonのコードを　ファイル＞ダウンロード＞.pyをダウンロード　から保存し，**Omnicampusの宿題タブから「第3回 ニューラルネットワーク基礎 (code)」を選択して**提出してください．pythonファイル自体の提出ではなく，「提出内容」の部分にコード全体をコピー&ペーストしてください．
      
- なお，採点は1で行い，2はコードの確認用として利用します（成績優秀者はコード内容を公開させていただくかもしれません）．コードの内容を変更した場合は，**1と2の両方を提出し直してください**．

### 評価方法
- 予測ラベルの`t_test`に対する精度 (Accuracy) で評価します．
- 即時採点しLeader Boardを更新します（採点スケジュールは別アナウンス）．
- 締切時の点数を最終的な評価とします．

### ドライブのマウント

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 作業ディレクトリを指定
work_dir = 'drive/MyDrive/Colab Notebooks/DLBasics2025_colab'

### データの読み込み（このセルは修正しないでください）

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import inspect


#学習データ
x_train = np.load(work_dir + '/Lecture03/data/x_train.npy')
t_train = np.load(work_dir + '/Lecture03/data/y_train.npy')

#テストデータ
x_test = np.load(work_dir + '/Lecture03/data/x_test.npy')

# データの前処理（正規化， one-hot encoding)
x_train, x_test = x_train / 255., x_test / 255.
x_train, x_test = x_train.reshape(x_train.shape[0], -1), x_test.reshape(x_test.shape[0], -1)
t_train = np.eye(N=10)[t_train.astype("int32").flatten()]

### 多層パーセプトロンの実装

In [None]:
# データの分割
x_train, x_val, t_train, t_val =\
    train_test_split(x_train, t_train, test_size=10000)

In [None]:
def np_log(x):
    return np.log(np.clip(x, 1e-10, 1e+10))


def create_batch(data, batch_size):
    """
    :param data: np.ndarray，入力データ
    :param batch_size: int，バッチサイズ
    """
    num_batches, mod = divmod(data.shape[0], batch_size)
    batched_data = np.split(data[: batch_size * num_batches], num_batches)
    if mod:
        batched_data.append(data[batch_size * num_batches:])

    return batched_data

In [None]:
import numpy as np

# シード値を変えることで何が起きるかも確かめてみてください．
rng = np.random.RandomState(1234)
random_state = 42


# 発展: 今回の講義で扱っていない活性化関数について調べ，実装してみましょう
def relu(x):
    return np.maximum(x, 0)


def deriv_relu(x):
    return (x > 0).astype(x.dtype)

def softmax(x):
    x = x - np.max(x, axis=-1, keepdims=True)
    return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)


def deriv_softmax(x):
    # Softmax の微分はクロスエントロピーとの組み合わせで単純化されるため、ここでは実装しません
    pass


def crossentropy_loss(t, y):
    return -np.mean(np.sum(t * np_log(y), axis=-1))


class Dense:
    def __init__(self, in_dim, out_dim, rng, activation="relu"):
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.W = rng.uniform(
            low=-0.08, high=0.08, size=(in_dim, out_dim)
        ).astype("float32")
        self.b = np.zeros(out_dim).astype("float32")
        self.activation = activation

    def __call__(self, x):
        self.x = x
        u = x @ self.W + self.b
        if self.activation == "relu":
            self.u = u
            return relu(u)
        elif self.activation == "softmax":
            return softmax(u)
        else:
            return u

    def backward(self, delta):
        if self.activation == "relu":
            delta *= deriv_relu(self.u)
        dW = self.x.T @ delta
        db = np.sum(delta, axis=0)
        self.delta = delta @ self.W.T
        return dW, db


class Model:
    def __init__(self, rng):
        self.l1 = Dense(784, 256, rng, "relu")
        self.l2 = Dense(256, 10, rng, "softmax")
        self.layers = [self.l1, self.l2]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def backward(self, t, y):
        delta = y - t
        dW2, db2 = self.l2.backward(delta)
        dW1, db1 = self.l1.backward(self.l2.delta)
        return [(dW1, db1), (dW2, db2)]

    def update(self, grads, lr):
        self.l1.W -= lr * grads[0][0]
        self.l1.b -= lr * grads[0][1]
        self.l2.W -= lr * grads[1][0]
        self.l2.b -= lr * grads[1][1]


lr = 0.001 # 学習率を調整
n_epochs = 35 # エポック数を調整
batch_size = 100 # バッチサイズを調整

mlp = Model(rng)

### モデルの学習

In [None]:
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score

def train_model(mlp, x_train, t_train, x_val, t_val, n_epochs=10):
    for epoch in range(n_epochs):
        losses_train = []
        losses_valid = []
        train_num = 0
        train_true_num = 0
        valid_num = 0
        valid_true_num = 0

        x_train, t_train = shuffle(x_train, t_train)
        x_train_batches, t_train_batches = create_batch(x_train, batch_size), create_batch(t_train, t_train.shape[0] if batch_size > t_train.shape[0] else batch_size) # Modified to handle smaller last batch

        x_val, t_val = shuffle(x_val, t_val)
        x_val_batches, t_val_batches = create_batch(x_val, batch_size), create_batch(t_val, t_val.shape[0] if batch_size > t_val.shape[0] else batch_size) # Modified to handle smaller last batch

        # モデルの訓練
        for x, t in zip(x_train_batches, t_train_batches):
            # 順伝播
            y = mlp(x)

            # 損失の計算
            loss = crossentropy_loss(t, y)
            losses_train.append(loss.tolist())

            # パラメータの更新
            grads = mlp.backward(t, y)
            mlp.update(grads, lr)

            # 精度を計算
            acc = accuracy_score(t.argmax(axis=1), y.argmax(axis=1), normalize=False)
            train_num += x.shape[0]
            train_true_num += acc

        # モデルの評価
        for x, t in zip(x_val_batches, t_val_batches):
            # 順伝播
            y = mlp(x)

            # 損失の計算
            loss = crossentropy_loss(t, y)
            losses_valid.append(loss.tolist())

            acc = accuracy_score(t.argmax(axis=1), y.argmax(axis=1), normalize=False)
            valid_num += x.shape[0]
            valid_true_num += acc

        print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
            epoch,
            np.mean(losses_train),
            train_true_num/train_num,
            np.mean(losses_valid),
            valid_true_num/valid_num
        ))


train_model(mlp, x_train, t_train, x_val, t_val, n_epochs)

EPOCH: 0, Train [Loss: 0.682, Accuracy: 0.766], Valid [Loss: 0.546, Accuracy: 0.802]
EPOCH: 1, Train [Loss: 0.476, Accuracy: 0.832], Valid [Loss: 0.455, Accuracy: 0.840]
EPOCH: 2, Train [Loss: 0.427, Accuracy: 0.849], Valid [Loss: 0.438, Accuracy: 0.843]
EPOCH: 3, Train [Loss: 0.397, Accuracy: 0.858], Valid [Loss: 0.406, Accuracy: 0.858]
EPOCH: 4, Train [Loss: 0.375, Accuracy: 0.866], Valid [Loss: 0.402, Accuracy: 0.851]
EPOCH: 5, Train [Loss: 0.360, Accuracy: 0.871], Valid [Loss: 0.396, Accuracy: 0.860]
EPOCH: 6, Train [Loss: 0.346, Accuracy: 0.875], Valid [Loss: 0.373, Accuracy: 0.868]
EPOCH: 7, Train [Loss: 0.334, Accuracy: 0.880], Valid [Loss: 0.360, Accuracy: 0.871]
EPOCH: 8, Train [Loss: 0.325, Accuracy: 0.884], Valid [Loss: 0.366, Accuracy: 0.867]
EPOCH: 9, Train [Loss: 0.314, Accuracy: 0.887], Valid [Loss: 0.361, Accuracy: 0.872]
EPOCH: 10, Train [Loss: 0.306, Accuracy: 0.891], Valid [Loss: 0.351, Accuracy: 0.875]
EPOCH: 11, Train [Loss: 0.298, Accuracy: 0.893], Valid [Loss: 0.

In [None]:
lr = 0.003 # 学習率を調整
n_epochs = 35 # エポック数を調整
batch_size = 150 # バッチサイズを調整

mlp = Model(rng)

In [None]:
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score

def train_model(mlp, x_train, t_train, x_val, t_val, n_epochs=10):
    for epoch in range(n_epochs):
        losses_train = []
        losses_valid = []
        train_num = 0
        train_true_num = 0
        valid_num = 0
        valid_true_num = 0

        x_train, t_train = shuffle(x_train, t_train)
        x_train_batches, t_train_batches = create_batch(x_train, batch_size), create_batch(t_train, t_train.shape[0] if batch_size > t_train.shape[0] else batch_size) # Modified to handle smaller last batch

        x_val, t_val = shuffle(x_val, t_val)
        x_val_batches, t_val_batches = create_batch(x_val, batch_size), create_batch(t_val, t_val.shape[0] if batch_size > t_val.shape[0] else batch_size) # Modified to handle smaller last batch

        # モデルの訓練
        for x, t in zip(x_train_batches, t_train_batches):
            # 順伝播
            y = mlp(x)

            # 損失の計算
            loss = crossentropy_loss(t, y)
            losses_train.append(loss.tolist())

            # パラメータの更新
            grads = mlp.backward(t, y)
            mlp.update(grads, lr)

            # 精度を計算
            acc = accuracy_score(t.argmax(axis=1), y.argmax(axis=1), normalize=False)
            train_num += x.shape[0]
            train_true_num += acc

        # モデルの評価
        for x, t in zip(x_val_batches, t_val_batches):
            # 順伝播
            y = mlp(x)

            # 損失の計算
            loss = crossentropy_loss(t, y)
            losses_valid.append(loss.tolist())

            acc = accuracy_score(t.argmax(axis=1), y.argmax(axis=1), normalize=False)
            valid_num += x.shape[0]
            valid_true_num += acc

        print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
            epoch,
            np.mean(losses_train),
            train_true_num/train_num,
            np.mean(losses_valid),
            valid_true_num/valid_num
        ))


train_model(mlp, x_train, t_train, x_val, t_val, n_epochs)

EPOCH: 0, Train [Loss: 0.686, Accuracy: 0.749], Valid [Loss: 0.465, Accuracy: 0.832]
EPOCH: 1, Train [Loss: 0.456, Accuracy: 0.832], Valid [Loss: 0.459, Accuracy: 0.824]
EPOCH: 2, Train [Loss: 0.399, Accuracy: 0.854], Valid [Loss: 0.414, Accuracy: 0.851]
EPOCH: 3, Train [Loss: 0.370, Accuracy: 0.865], Valid [Loss: 0.364, Accuracy: 0.868]
EPOCH: 4, Train [Loss: 0.347, Accuracy: 0.872], Valid [Loss: 0.350, Accuracy: 0.874]
EPOCH: 5, Train [Loss: 0.327, Accuracy: 0.878], Valid [Loss: 0.349, Accuracy: 0.872]
EPOCH: 6, Train [Loss: 0.318, Accuracy: 0.882], Valid [Loss: 0.353, Accuracy: 0.874]
EPOCH: 7, Train [Loss: 0.302, Accuracy: 0.887], Valid [Loss: 0.341, Accuracy: 0.877]
EPOCH: 8, Train [Loss: 0.291, Accuracy: 0.890], Valid [Loss: 0.319, Accuracy: 0.885]
EPOCH: 9, Train [Loss: 0.281, Accuracy: 0.894], Valid [Loss: 0.335, Accuracy: 0.879]
EPOCH: 10, Train [Loss: 0.274, Accuracy: 0.899], Valid [Loss: 0.319, Accuracy: 0.884]
EPOCH: 11, Train [Loss: 0.264, Accuracy: 0.902], Valid [Loss: 0.

In [None]:
t_pred = []
for x in x_test:
    # 順伝播
    x = x[np.newaxis, :]
    y = mlp(x)

    # モデルの出力を予測値のスカラーに変換
    pred = y.argmax(1).tolist()

    t_pred.extend(pred)

submission = pd.Series(t_pred, name='label')
submission.to_csv(work_dir + '/Lecture03/submission_pred_03_35_003.csv', header=True, index_label='id')

## 対象エポックの特定

### Subtask:
学習結果から、検証用データの精度が0.88以上となったエポックを特定


In [None]:
import re

training_log = """
EPOCH: 0, Train [Loss: 0.682, Accuracy: 0.766], Valid [Loss: 0.546, Accuracy: 0.802]
EPOCH: 1, Train [Loss: 0.476, Accuracy: 0.832], Valid [Loss: 0.455, Accuracy: 0.840]
EPOCH: 2, Train [Loss: 0.427, Accuracy: 0.849], Valid [Loss: 0.438, Accuracy: 0.843]
EPOCH: 3, Train [Loss: 0.397, Accuracy: 0.858], Valid [Loss: 0.406, Accuracy: 0.858]
EPOCH: 4, Train [Loss: 0.375, Accuracy: 0.866], Valid [Loss: 0.402, Accuracy: 0.851]
EPOCH: 5, Train [Loss: 0.360, Accuracy: 0.871], Valid [Loss: 0.396, Accuracy: 0.860]
EPOCH: 6, Train [Loss: 0.346, Accuracy: 0.875], Valid [Loss: 0.373, Accuracy: 0.868]
EPOCH: 7, Train [Loss: 0.334, Accuracy: 0.880], Valid [Loss: 0.360, Accuracy: 0.871]
EPOCH: 8, Train [Loss: 0.325, Accuracy: 0.884], Valid [Loss: 0.366, Accuracy: 0.867]
EPOCH: 9, Train [Loss: 0.314, Accuracy: 0.887], Valid [Loss: 0.361, Accuracy: 0.872]
EPOCH: 10, Train [Loss: 0.306, Accuracy: 0.891], Valid [Loss: 0.351, Accuracy: 0.875]
EPOCH: 11, Train [Loss: 0.298, Accuracy: 0.893], Valid [Loss: 0.363, Accuracy: 0.871]
EPOCH: 12, Train [Loss: 0.291, Accuracy: 0.896], Valid [Loss: 0.328, Accuracy: 0.882]
EPOCH: 13, Train [Loss: 0.284, Accuracy: 0.898], Valid [Loss: 0.336, Accuracy: 0.880]
EPOCH: 14, Train [Loss: 0.277, Accuracy: 0.900], Valid [Loss: 0.327, Accuracy: 0.885]
EPOCH: 15, Train [Loss: 0.273, Accuracy: 0.902], Valid [Loss: 0.341, Accuracy: 0.874]
EPOCH: 16, Train [Loss: 0.266, Accuracy: 0.904], Valid [Loss: 0.317, Accuracy: 0.886]
EPOCH: 17, Train [Loss: 0.260, Accuracy: 0.906], Valid [Loss: 0.329, Accuracy: 0.883]
EPOCH: 18, Train [Loss: 0.257, Accuracy: 0.907], Valid [Loss: 0.359, Accuracy: 0.871]
EPOCH: 19, Train [Loss: 0.252, Accuracy: 0.909], Valid [Loss: 0.331, Accuracy: 0.880]
EPOCH: 20, Train [Loss: 0.246, Accuracy: 0.910], Valid [Loss: 0.337, Accuracy: 0.880]
EPOCH: 21, Train [Loss: 0.241, Accuracy: 0.912], Valid [Loss: 0.327, Accuracy: 0.882]
EPOCH: 22, Train [Loss: 0.236, Accuracy: 0.914], Valid [Loss: 0.309, Accuracy: 0.890]
EPOCH: 23, Train [Loss: 0.233, Accuracy: 0.915], Valid [Loss: 0.323, Accuracy: 0.884]
EPOCH: 24, Train [Loss: 0.230, Accuracy: 0.917], Valid [Loss: 0.321, Accuracy: 0.884]
EPOCH: 25, Train [Loss: 0.226, Accuracy: 0.918], Valid [Loss: 0.318, Accuracy: 0.888]
EPOCH: 26, Train [Loss: 0.223, Accuracy: 0.919], Valid [Loss: 0.310, Accuracy: 0.890]
EPOCH: 27, Train [Loss: 0.217, Accuracy: 0.923], Valid [Loss: 0.343, Accuracy: 0.880]
EPOCH: 28, Train [Loss: 0.215, Accuracy: 0.923], Valid [Loss: 0.308, Accuracy: 0.888]
EPOCH: 29, Train [Loss: 0.210, Accuracy: 0.924], Valid [Loss: 0.302, Accuracy: 0.893]
EPOCH: 30, Train [Loss: 0.206, Accuracy: 0.926], Valid [Loss: 0.309, Accuracy: 0.888]
EPOCH: 31, Train [Loss: 0.203, Accuracy: 0.927], Valid [Loss: 0.304, Accuracy: 0.892]
EPOCH: 32, Train [Loss: 0.201, Accuracy: 0.928], Valid [Loss: 0.310, Accuracy: 0.888]
EPOCH: 33, Train [Loss: 0.197, Accuracy: 0.929], Valid [Loss: 0.321, Accuracy: 0.886]
EPOCH: 34, Train [Loss: 0.194, Accuracy: 0.930], Valid [Loss: 0.318, Accuracy: 0.887]
EPOCH: 0, Train [Loss: 0.686, Accuracy: 0.749], Valid [Loss: 0.465, Accuracy: 0.832]
EPOCH: 1, Train [Loss: 0.456, Accuracy: 0.832], Valid [Loss: 0.459, Accuracy: 0.824]
EPOCH: 2, Train [Loss: 0.399, Accuracy: 0.854], Valid [Loss: 0.414, Accuracy: 0.851]
EPOCH: 3, Train [Loss: 0.370, Accuracy: 0.865], Valid [Loss: 0.364, Accuracy: 0.868]
EPOCH: 4, Train [Loss: 0.347, Accuracy: 0.872], Valid [Loss: 0.350, Accuracy: 0.874]
EPOCH: 5, Train [Loss: 0.327, Accuracy: 0.878], Valid [Loss: 0.349, Accuracy: 0.872]
EPOCH: 6, Train [Loss: 0.318, Accuracy: 0.882], Valid [Loss: 0.353, Accuracy: 0.874]
EPOCH: 7, Train [Loss: 0.302, Accuracy: 0.887], Valid [Loss: 0.341, Accuracy: 0.877]
EPOCH: 8, Train [Loss: 0.291, Accuracy: 0.890], Valid [Loss: 0.319, Accuracy: 0.885]
EPOCH: 9, Train [Loss: 0.281, Accuracy: 0.894], Valid [Loss: 0.335, Accuracy: 0.879]
EPOCH: 10, Train [Loss: 0.274, Accuracy: 0.899], Valid [Loss: 0.319, Accuracy: 0.884]
EPOCH: 11, Train [Loss: 0.264, Accuracy: 0.902], Valid [Loss: 0.347, Accuracy: 0.870]
EPOCH: 12, Train [Loss: 0.257, Accuracy: 0.905], Valid [Loss: 0.320, Accuracy: 0.885]
EPOCH: 13, Train [Loss: 0.252, Accuracy: 0.906], Valid [Loss: 0.333, Accuracy: 0.881]
EPOCH: 14, Train [Loss: 0.245, Accuracy: 0.909], Valid [Loss: 0.318, Accuracy: 0.886]
EPOCH: 15, Train [Loss: 0.236, Accuracy: 0.911], Valid [Loss: 0.314, Accuracy: 0.885]
EPOCH: 16, Train [Loss: 0.233, Accuracy: 0.912], Valid [Loss: 0.313, Accuracy: 0.890]
EPOCH: 17, Train [Loss: 0.226, Accuracy: 0.916], Valid [Loss: 0.313, Accuracy: 0.892]
EPOCH: 18, Train [Loss: 0.221, Accuracy: 0.917], Valid [Loss: 0.321, Accuracy: 0.885]
EPOCH: 19, Train [Loss: 0.215, Accuracy: 0.921], Valid [Loss: 0.333, Accuracy: 0.884]
EPOCH: 20, Train [Loss: 0.214, Accuracy: 0.920], Valid [Loss: 0.326, Accuracy: 0.888]
EPOCH: 21, Train [Loss: 0.205, Accuracy: 0.923], Valid [Loss: 0.337, Accuracy: 0.882]
EPOCH: 22, Train [Loss: 0.205, Accuracy: 0.924], Valid [Loss: 0.323, Accuracy: 0.888]
EPOCH: 23, Train [Loss: 0.197, Accuracy: 0.927], Valid [Loss: 0.321, Accuracy: 0.888]
EPOCH: 24, Train [Loss: 0.198, Accuracy: 0.925], Valid [Loss: 0.320, Accuracy: 0.889]
EPOCH: 25, Train [Loss: 0.191, Accuracy: 0.929], Valid [Loss: 0.334, Accuracy: 0.886]
EPOCH: 26, Train [Loss: 0.187, Accuracy: 0.930], Valid [Loss: 0.321, Accuracy: 0.891]
EPOCH: 27, Train [Loss: 0.183, Accuracy: 0.931], Valid [Loss: 0.346, Accuracy: 0.886]
EPOCH: 28, Train [Loss: 0.181, Accuracy: 0.932], Valid [Loss: 0.328, Accuracy: 0.885]
EPOCH: 29, Train [Loss: 0.177, Accuracy: 0.933], Valid [Loss: 0.326, Accuracy: 0.888]
EPOCH: 30, Train [Loss: 0.169, Accuracy: 0.938], Valid [Loss: 0.318, Accuracy: 0.893]
EPOCH: 31, Train [Loss: 0.169, Accuracy: 0.937], Valid [Loss: 0.317, Accuracy: 0.891]
EPOCH: 32, Train [Loss: 0.163, Accuracy: 0.939], Valid [Loss: 0.352, Accuracy: 0.886]
EPOCH: 33, Train [Loss: 0.161, Accuracy: 0.940], Valid [Loss: 0.380, Accuracy: 0.876]
EPOCH: 34, Train [Loss: 0.162, Accuracy: 0.940], Valid [Loss: 0.359, Accuracy: 0.880]
"""

# Extract validation accuracies
valid_accuracies = [float(acc) for acc in re.findall(r"Valid \[Loss: [\d\.]+, Accuracy: ([\d\.]+)\]", training_log)]

# Identify epochs with accuracy >= 0.88
high_accuracy_epochs = [i for i, acc in enumerate(valid_accuracies) if acc >= 0.88]

print("Epochs with validation accuracy >= 0.88:", high_accuracy_epochs)

Epochs with validation accuracy >= 0.88: [12, 13, 14, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 43, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69]


## モデルの再学習またはロード

### Subtask:
特定した各エポックに対応するモデルのパラメータをロードするか、必要であればそのエポックまで再学習を行います。


In [None]:
trained_models = []

# Define the training function with epoch limit
def train_model_upto_epoch(mlp, x_train, t_train, x_val, t_val, target_epoch):
    for epoch in range(target_epoch + 1):
        losses_train = []
        losses_valid = []
        train_num = 0
        train_true_num = 0
        valid_num = 0
        valid_true_num = 0

        x_train, t_train = shuffle(x_train, t_train)
        x_train_batches, t_train_batches = create_batch(x_train, batch_size), create_batch(t_train, t_train.shape[0] if batch_size > t_train.shape[0] else batch_size)

        x_val, t_val = shuffle(x_val, t_val)
        x_val_batches, t_val_batches = create_batch(x_val, batch_size), create_batch(t_val, t_val.shape[0] if batch_size > t_val.shape[0] else batch_size)

        # モデルの訓練
        for x, t in zip(x_train_batches, t_train_batches):
            y = mlp(x)
            loss = crossentropy_loss(t, y)
            losses_train.append(loss.tolist())
            grads = mlp.backward(t, y)
            mlp.update(grads, lr)
            acc = accuracy_score(t.argmax(axis=1), y.argmax(axis=1), normalize=False)
            train_num += x.shape[0]
            train_true_num += acc

        # モデルの評価 (validation at the end of each epoch)
        for x, t in zip(x_val_batches, t_val_batches):
            y = mlp(x)
            loss = crossentropy_loss(t, y)
            losses_valid.append(loss.tolist())
            acc = accuracy_score(t.argmax(axis=1), y.argmax(axis=1), normalize=False)
            valid_num += x.shape[0]
            valid_true_num += acc

        # Print progress for the target epoch
        if epoch == target_epoch:
             print('Finished training up to EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
                epoch,
                np.mean(losses_train),
                train_true_num/train_num,
                np.mean(losses_valid),
                valid_true_num/valid_num
            ))


# Retrain models up to high accuracy epochs
for epoch in high_accuracy_epochs:
    print(f"Retraining model up to epoch {epoch}...")
    current_mlp = Model(rng) # Initialize a new model for each epoch
    train_model_upto_epoch(current_mlp, x_train, t_train, x_val, t_val, epoch)
    trained_models.append(current_mlp)

print(f"Finished training {len(trained_models)} models.")

Retraining model up to epoch 12...
Finished training up to EPOCH: 12, Train [Loss: 0.253, Accuracy: 0.905], Valid [Loss: 0.329, Accuracy: 0.880]
Retraining model up to epoch 13...
Finished training up to EPOCH: 13, Train [Loss: 0.247, Accuracy: 0.908], Valid [Loss: 0.329, Accuracy: 0.881]
Retraining model up to epoch 14...
Finished training up to EPOCH: 14, Train [Loss: 0.243, Accuracy: 0.909], Valid [Loss: 0.321, Accuracy: 0.886]
Retraining model up to epoch 16...
Finished training up to EPOCH: 16, Train [Loss: 0.231, Accuracy: 0.914], Valid [Loss: 0.322, Accuracy: 0.885]
Retraining model up to epoch 17...
Finished training up to EPOCH: 17, Train [Loss: 0.223, Accuracy: 0.916], Valid [Loss: 0.326, Accuracy: 0.885]
Retraining model up to epoch 19...
Finished training up to EPOCH: 19, Train [Loss: 0.216, Accuracy: 0.919], Valid [Loss: 0.327, Accuracy: 0.885]
Retraining model up to epoch 20...
Finished training up to EPOCH: 20, Train [Loss: 0.210, Accuracy: 0.922], Valid [Loss: 0.317, Ac

KeyboardInterrupt: 

## 結果の保存

In [None]:
test_predictions = []

for model in trained_models:
    model_predictions = []
    # Process test data in batches if necessary (similar to training/validation)
    x_test_batches = create_batch(x_test, batch_size) # Use the same batch size as training

    for x in x_test_batches:
        y = model(x)
        # Get the predicted class label (0-9)
        pred = y.argmax(axis=1).tolist()
        model_predictions.extend(pred)

    test_predictions.append(model_predictions)

print(f"Generated predictions for {len(test_predictions)} models.")

Generated predictions for 13 models.


In [None]:
from collections import Counter

ensemble_predictions = []

# Transpose the list of lists to group predictions by sample
predictions_by_sample = zip(*test_predictions)

for sample_predictions in predictions_by_sample:
    # Use Counter to find the most common prediction (majority vote)
    most_common = Counter(sample_predictions).most_common(1)
    ensemble_predictions.append(most_common[0][0])

print(f"Generated {len(ensemble_predictions)} ensemble predictions.")

Generated 10000 ensemble predictions.


In [None]:
import pandas as pd

# Create a pandas Series from the ensemble predictions
submission = pd.Series(ensemble_predictions, name='label')

# Save the submission file in the specified format
# Make sure the directory exists before saving
os.makedirs(work_dir + '/Lecture03', exist_ok=True)
submission.to_csv(work_dir + '/Lecture03/submission_ensemble.csv', header=True, index_label='id')

print(f"Submission file saved to {work_dir + '/Lecture03/submission_ensemble.csv'}")

Submission file saved to drive/MyDrive/Colab Notebooks/DLBasics2025_colab/Lecture03/submission_ensemble.csv
