# 第3回講義 宿題

## 課題

今Lessonで学んだことを元に，MNISTのファッション版 (Fashion MNIST，クラス数10) を多層パーセプトロンによって分類してみましょう．

Fashion MNISTの詳細については以下のリンクを参考にしてください．

Fashion MNIST: https://github.com/zalandoresearch/fashion-mnist

### 目標値

Accuracy 85%

### ルール

- 訓練データはx_train， t_train，テストデータはx_testで与えられます．
- 予測ラベルは one_hot表現ではなく0~9のクラスラベル で表してください．
- **下のセルで指定されているx_train，t_train以外の学習データは使わないでください．**
- **多層パーセプトロンのアルゴリズム部分は第3回の演習を参考に，NumPyのみで実装してください．** (sklearnやtensorflowなどは使用しないでください)．
    - データの前処理部分でsklearnの関数を使う (例えば sklearn.model_selection.train_test_split) のは問題ありません．

### 提出方法

- 2つのファイルを提出していただきます．
  - テストデータ (x_test) に対する予測ラベルをcsvファイル (ファイル名: submission_pred.csv) で提出してください．
  - それに対応するpythonのコードをsubmission_code.pyとして提出してください (%%writefileコマンドなどを利用してください)．

### 評価方法

- 予測ラベルのt_testに対する精度 (Accuracy) で評価します．
- 提出後即時採点を行い，Leader Boardが更新されます．
- 締切後の点数を最終的な評価とします．

In [None]:
# ドライブのマウント
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### データの読み込み

- この部分は修正しないでください

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import inspect


#学習データ
x_train = np.load('drive/MyDrive/Colab Notebooks/DLBasics2023_colab/Lecture03/data/x_train.npy')
t_train = np.load('drive/MyDrive/Colab Notebooks/DLBasics2023_colab/Lecture03/data/y_train.npy')
    
#テストデータ
x_test = np.load('drive/MyDrive/Colab Notebooks/DLBasics2023_colab/Lecture03/data/x_test.npy')

# データの前処理（正規化， one-hot encoding)
x_train, x_test = x_train / 255., x_test / 255.
x_train, x_test = x_train.reshape(x_train.shape[0], -1), x_test.reshape(x_test.shape[0], -1)
t_train = np.eye(N=10)[t_train.astype("int32").flatten()]

### 多層パーセプトロンの実装

In [None]:
# データの分割
x_train, x_val, t_train, t_val =\
    train_test_split(x_train, t_train, test_size=10000)

In [None]:
def np_log(x):
    return np.log(np.clip(x, 1e-10, 1e+10))


def create_batch(data, batch_size):
    """
    :param data: np.ndarray，入力データ
    :param batch_size: int，バッチサイズ
    """
    num_batches, mod = divmod(data.shape[0], batch_size)
    batched_data = np.split(data[: batch_size * num_batches], num_batches)
    if mod:
        batched_data.append(data[batch_size * num_batches:])

    return batched_data

In [None]:
# シード値を変えることで何が起きるかも確かめてみてください．
rng = np.random.RandomState(1234)
random_state = 42


# 発展: 今回の講義で扱っていない活性化関数について調べ，実装してみましょう
def relu(x):
    return np.maximum(x, 0)

def deriv_relu(x):
    return (x > 0).astype(x.dtype)
  
class PReLU:
    def __init__(self, alpha=0.01):
        self.alpha = alpha

    def __call__(self, x):
        return np.where(x >= 0, x, self.alpha * x)

    def deriv(self, x):
        return np.where(x >= 0, 1, self.alpha)

def tanh(x):
    return np.tanh(x)

def deriv_tanh(x):
    return 1 - np.tanh(x)**2

def softmax(x):
    x -= x.max(axis=1, keepdims=True)  # オーバーフローを避ける
    x_exp = np.exp(x)
    return x_exp / np.sum(x_exp, axis=1, keepdims=True)


def deriv_softmax(x):
    return softmax(x) * (1 - softmax(x))


def crossentropy_loss(t, y):
    return (-t * np_log(y)).sum(axis=1).mean()


class Dense:
    def __init__(self, in_dim, out_dim, function, deriv_function):
        self.W = np.random.uniform(low=-0.08, high=0.08,
                                   size=(in_dim, out_dim)).astype("float64")
        self.b = np.zeros(out_dim).astype("float64")
        self.function = function
        self.deriv_function = deriv_function

        self.x = None
        self.u = None

        self.dW = None
        self.db = None

        self.params_idxs = np.cumsum([self.W.size, self.b.size])

    def __call__(self, x):
        """
        順伝播処理を行うメソッド．
        x: shape=(batch_size, in_dim_{j})
        h: shape=(batch_size, out_dim_{j})
        """
        self.x = x
        self.u = np.matmul(self.x, self.W) + self.b
        h = self.function(self.u)
        return h

    def b_prop(self, delta, W):
        """
        誤差逆伝播を行うメソッド．
        delta (=delta_{j+1}): shape=(batch_size, out_dim_{j+1})
        W (=W_{j+1}): shape=(out_dim_{j}, out_dim_{j+1})
        self.delta (=delta_{j}: shape=(batch_size, out_dim_{j})
        """
        self.delta = self.deriv_function(self.u) * np.matmul(delta, W.T)
        return self.delta

    def compute_grad(self):
        """
        勾配を計算するメソッド．
        self.x: shape=(batch_size, in_dim_{j})
        self.delta: shape=(batch_size, out_dim_{j})
        self.dW: shape=(in_dim_{j}, out_dim_{j})
        self.db: shape=(out_dim_{j})
        """
        batch_size = self.delta.shape[0]

        self.dW = np.matmul(self.x.T, self.delta) / batch_size
        self.db = np.matmul(np.ones(batch_size), self.delta) / batch_size

    def get_params(self):
        return np.concatenate([self.W.ravel(), self.b], axis=0)

    def set_params(self, params):
        """
        params: List[np.ndarray, np.ndarray]
            1つ目の要素が重みW(shape=(in_dim, out_dim)，2つ目の要素がバイアス(shape=(out_dim))
        """
        _W, _b = np.split(params, self.params_idxs)[:-1]
        self.W = _W.reshape(self.W.shape)
        self.b = _b

    def get_grads(self):
        return np.concatenate([self.dW.ravel(), self.db], axis=0)


class Model:
    def __init__(self, hidden_dims, activation_functions, deriv_functions):
        """
        :param hiden_dims: List[int]，各層のノード数を格納したリスト．
        :params activation_functions: List, 各層で用いる活性化関数を格納したリスト．
        :params derive_functions: List，各層で用いる活性化関数の導関数を格納したリスト．
        """
        # 各層をリストに格納していく
        self.layers = []
        for i in range(len(hidden_dims)-2):  # 出力層以外は同じ構造
            self.layers.append(Dense(hidden_dims[i], hidden_dims[i+1],
                                     activation_functions[i], deriv_functions[i]))
        self.layers.append(Dense(hidden_dims[-2], hidden_dims[-1],
                                 activation_functions[-1], deriv_functions[-1]))  # 出力層を追加

    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        """順伝播処理を行うメソッド"""
        for layer in self.layers:
            x = layer(x)
        return x

    def backward(self, delta):
        """誤差逆伝播，勾配計算を行うメソッド"""
        batch_size = delta.shape[0]

        for i, layer in enumerate(self.layers[::-1]):
            if i == 0:  # 出力層の場合
                layer.delta = delta  # y - t
                layer.compute_grad()
            else:  # 出力層以外の場合
                delta = layer.b_prop(delta, W)  # 逆伝播
                layer.compute_grad()  # 勾配の計算

            W = layer.W

    def update(self, eps=0.01):
        """パラメータの更新を行うメソッド"""
        for layer in self.layers:
            layer.W -= eps * layer.dW
            layer.b -= eps * layer.db

lr = 0.01
n_epochs = 100
batch_size = 50
p = PReLU()
mlp = Model(hidden_dims=[784, 255, 255, 255, 255, 10],
            activation_functions=[p, p, p, p, softmax],
            deriv_functions=[p.deriv, p.deriv, p.deriv, p.deriv, deriv_softmax])

### モデルの学習

In [None]:
# 初期の学習率
initial_lr = 0.01
# 減衰率
decay_rate = 0.1

def learning_rate_decay(epoch, initial_lr, decay_rate):
    return initial_lr * (1.0 / (1.0 + decay_rate * epoch))


def train_model(mlp, x_train, t_train, x_val, t_val, n_epochs=10):
    for epoch in range(n_epochs):
        losses_train = []
        losses_valid = []
        train_num = 0
        train_true_num = 0
        valid_num = 0
        valid_true_num = 0

        

        x_train, t_train = shuffle(x_train, t_train)
        x_train_batches, t_train_batches = create_batch(x_train, batch_size), create_batch(t_train, batch_size)

        x_val, t_val = shuffle(x_val, t_val)
        x_val_batches, t_val_batches = create_batch(x_val, batch_size), create_batch(t_val, batch_size)
        
        count = 0
        # モデルの訓練
        for x, t in zip(x_train_batches, t_train_batches):
            # 順伝播
            y = mlp(x)

            # 損失の計算
            loss = (-t * np_log(y)).sum(axis=1).mean()     
            losses_train.append(loss.tolist())

                # 逆伝播
            delta = y - t
            mlp.backward(delta)

            # パラメータの更新
            mlp.update(lr)


            # 精度を計算
            acc = accuracy_score(t.argmax(axis=1), y.argmax(axis=1), normalize=False)
            train_num += x.shape[0]
            train_true_num += acc


        # モデルの評価
        for x, t in zip(x_val_batches, t_val_batches):
            # 順伝播
            # WRITE ME
            y = mlp(x)

            # 損失の計算
            # WRITE ME
            loss = (-t * np_log(y)).sum(axis=1).mean() 
            losses_valid.append(loss.tolist())

            acc = accuracy_score(t.argmax(axis=1), y.argmax(axis=1), normalize=False)
            valid_num += x.shape[0]
            valid_true_num += acc.sum().item()

        print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
            epoch,
            np.mean(losses_train),
            train_true_num/train_num,
            np.mean(losses_valid),
            valid_true_num/valid_num
        ))
        


train_model(mlp, x_train, t_train, x_val, t_val, n_epochs)


EPOCH: 0, Train [Loss: 1.394, Accuracy: 0.528], Valid [Loss: 0.759, Accuracy: 0.730]
EPOCH: 1, Train [Loss: 0.679, Accuracy: 0.753], Valid [Loss: 0.609, Accuracy: 0.783]
EPOCH: 2, Train [Loss: 0.564, Accuracy: 0.802], Valid [Loss: 0.547, Accuracy: 0.811]
EPOCH: 3, Train [Loss: 0.507, Accuracy: 0.820], Valid [Loss: 0.483, Accuracy: 0.829]
EPOCH: 4, Train [Loss: 0.476, Accuracy: 0.831], Valid [Loss: 0.457, Accuracy: 0.841]
EPOCH: 5, Train [Loss: 0.452, Accuracy: 0.839], Valid [Loss: 0.453, Accuracy: 0.841]
EPOCH: 6, Train [Loss: 0.434, Accuracy: 0.846], Valid [Loss: 0.440, Accuracy: 0.848]
EPOCH: 7, Train [Loss: 0.416, Accuracy: 0.853], Valid [Loss: 0.405, Accuracy: 0.860]
EPOCH: 8, Train [Loss: 0.404, Accuracy: 0.857], Valid [Loss: 0.408, Accuracy: 0.859]
EPOCH: 9, Train [Loss: 0.392, Accuracy: 0.859], Valid [Loss: 0.391, Accuracy: 0.862]
EPOCH: 10, Train [Loss: 0.380, Accuracy: 0.864], Valid [Loss: 0.390, Accuracy: 0.863]
EPOCH: 11, Train [Loss: 0.369, Accuracy: 0.869], Valid [Loss: 0.

In [None]:
t_pred = []
for x in x_test:
    # 順伝播
    x = x[np.newaxis, :]
    y = mlp(x)

    # モデルの出力を予測値のスカラーに変換
    pred = y.argmax(1).tolist()

    t_pred.extend(pred)

submission = pd.Series(t_pred, name='label')
submission.to_csv('drive/MyDrive/Colab Notebooks/DLBasics2023_colab/Lecture03/submission_pred.csv', header=True, index_label='id')