# fashion-mnist
- [github](https://github.com/zalandoresearch/fashion-mnist/blob/master/utils/mnist_reader.py)

In [68]:
import os, sys, gzip
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(os.path.dirname(os.pardir)))))

# data handling
import numpy as np
from sklearn.model_selection import train_test_split

from mnist_test.multi_layer_net_extend import MultiLayerNetExtend
from mnist_test.optimizer import AdaGrad, Adam

# data visulization
import matplotlib.pyplot as plt

#### 데이터 로딩 과정

In [None]:
def load_mnist(path, kind='train'):
    labels_path = os.path.join(path, "%s-labels-idx1-ubyte.gz" % kind)
    images_path = os.path.join(path, "%s-images-idx3-ubyte.gz" % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                    offset=16).reshape(len(labels), 784)

    return images, labels

In [69]:
X_train, y_train = load_mnist("C:\\Github Projects\\study_store\\Deep Learning Projects\\mini projects\\fashion_mnist", kind='train')
X_test, y_test = load_mnist("C:\\Github Projects\\study_store\\Deep Learning Projects\\mini projects\\fashion_mnist", kind='t10k')
# types = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(60000, 784) (60000,) (10000, 784) (10000,)


fashion data의 train input & output, test input & output을 확인
<!-- 이.. os.path를 추가해도 path 제대로 못 읽는... -->

## 본격적으로
- 2층 신경망 구현 -> 7층 신경망으로 확장 -> CNN
- train & test / train & validation & test
- dropout / batch size / max epoch
- learning rate 범위 / weight decay 범위
- batch normalization 필요한 작업이지만, 식이 너무 어

#### custom functions

In [None]:
def img_show(data, label, times=3):
    for i in range(times):
        index = np.random.randint(data.shape[-2])
        plt.subplot(1, times, i + 1)
        plt.imshow(data[index].reshape(28, 28))
        plt.title(label[index])
    plt.show()

### validaion set
- sklearn (scikit learn) -> model_selection -> train_test_split (-> split train & validation)
- train_test_split(Xtrain, Ytrain, test_size=테스트비율, shuffle=셔플여부, ...)

In [70]:
X_train2, X_val, y_train2, y_val = train_test_split(X_train, y_train, test_size=0.2)
print("train:", X_train2.shape, y_train2.shape,
      "\n test:", X_val.shape, y_val.shape,
      "\ntotal: ", X_train2.shape[0] + X_val.shape[0],
      "\t   /", y_train2.shape[0] + y_val.shape[0])

train: (48000, 784) (48000,) 
 test: (12000, 784) (12000,) 
total:  60000 	   / 60000


In [None]:
img_show(X_train, y_train)

### hyper-parameter

In [71]:
dropout_ratio = 0.2
batch_size = 100

### Network 구성
- Batch Normalization: True
- Dropout: 0.2
- Regularization: weight decay
- Init Weight: False

<br />

- Hidden Layer: hidden layer num, hidden node size
- Activation function: ReLU
- Gradient Descent: AdaGrad, Adam

In [None]:
for i in range(100):
    learning_rate = 10 ** np.random.uniform(-3, -1)
    weight_decay = 10 ** np.random.uniform(-4, -1)

    network = MultiLayerNetExtend(
        input_size=X_train.shape[-1], hidden_size_list=[100, 50, 100, 50, 100], output_size=10,
        use_dropout=True, dropout_rate=dropout_ratio,
        use_batchnorm=True, weight_decay_lambda=weight_decay)
    optimizer = AdaGrad(lr=learning_rate)

    # 모형 적합 시작
    for j in range(1000):
        batch_mask = np.random.choice(X_train2.shape[0], batch_size)
        X_batch = X_train2[batch_mask]
        y_batch = y_train2[batch_mask]

        grads = network.gradient(X_batch, y_batch)
        optimizer.update(network.params, grads)
    print(f"learning_rate: {learning_rate}, \tweight_decay: {weight_decay}, \tvalidation accuracy: {network.accuracy(X_val, y_val)}")

#### addition) 확장?

<!-- accuracies = {}
for i in range(100):
    learning_rate = 10 ** np.random.uniform(-4, 1)
    weight_decay = 10 ** np.random.uniform(-4, 0)

    network = MultiLayerNetExtend(
        input_size=X_train.shape[-1], hidden_size_list=[100, 50, 100, 50, 100], output_size=10,
        use_dropout=True, dropout_rate=dropout_ratio,
        use_batchnorm=True, weight_decay_lambda=weight_decay)
    optimizer = AdaGrad(lr=learning_rate)

    # 모형 적합 시작
    for j in range(1000):
        batch_mask = np.random.choice(X_train2.shape[0], batch_size)
        X_batch = X_train2[batch_mask]
        y_batch = y_train2[batch_mask]

        grads = network.gradient(X_batch, y_batch)
        optimizer.update(network.params, grads)
    accu = network.accuracy(X_val, y_val)
    accuracies["learning_rate: " + str(learning_rate) + "\tweight_decay: " + str(weight_decay)] = accu
    # print(f"learning_rate: {learning_rate}, \tweight_decay: {weight_decay}, \tvalidation accuracy: {network.accuracy(X_val, y_val)}")

accu_sorted = sorted(accuracies.items(), key=lambda w:w[1], reverse=True)
for key, value in acc_sorted:
    print(key, "\tvalidation accuracy:", value) -->

```
for opt in [AdaGrad, Adam]:
    hypa = []
    accuracy = []
    for i in range(100):
        learning_rate = 10 ** np.random.uniform(-3, 3)
        weight_decay = 10 ** np.random.uniform(-4, 4)
        
        network = MultiLayerNetExtend(
            input_size=X_train.shape[-1], hidden_size_list=[100, 50, 100, 50, 100], output_size=10,
            use_dropout=True, dropout_rate=dropout_ratio,
            use_batchnorm=True, weight_decay_lambda=weight_decay)
        optimizer = opt(lr=learning_rate)

        # 모형 적합 시작
        for j in range(1000):
            batch_mask = np.random.choice(X_train2.shape[0], batch_size)
            X_batch = X_train2[batch_mask]
            y_batch = y_train2[batch_mask]

            grads = network.gradient(X_batch, y_batch)
            optimizer.update(network.params, grads)
        
        hypa.append([learning_rate, weight_decay])
        accu = network.accuracy(X_val, y_val)
        accuracy.append(accu)
    
    # 여기서 Best 찾기
    max_key = max(accuracy.index, key=lambda w:accuracy[w])
    print(str(opt), "\t" + max_key, "\tvalidation accuracy:", accuracy[max_key])
# print(f"learning_rate: {learning_rate}, \tweight_decay: {weight_decay}, \tvalidation accuracy: {network.accuracy(X_val, y_val)}")
```

### output 확인

In [None]:
y_pred = np.argmax(network.predict(X_train), axis=1)
img_show(X_train, y_pred)

In [None]:
y_pred = np.argmax(network.predict(X_test), axis=1)
img_show(X_test, y_pred)
print(network.accuracy(X_test, y_test))

### 다른 모형을 적합해보자.

In [None]:
from mnist_test.trainer import Trainer

def __train(network, x_train, t_train, x_val, t_val, lr, opt="sgd", epocs=50):
    trainer = Trainer(network, x_train, t_train, x_val, t_val,
        epochs=epocs, mini_batch_size=100, optimizer=opt, optimizer_param={"lr": lr},
        verbose=False)
    trainer.train()
    # return trainer.test_acc_list, trainer.train_acc_list

제대로 작동하는지 확인

In [75]:
for i in range(100):
    learning_rate = 10 ** np.random.uniform(-3, -1)
    weight_decay = 10 ** np.random.uniform(-4, -1)

    network = MultiLayerNetExtend(
        input_size=X_train.shape[-1], hidden_size_list=[100, 50, 100, 50, 100], output_size=10,
        use_dropout=True, dropout_rate=dropout_ratio,
        activation="sigmoid", weight_init_std="sigmoid",
        use_batchnorm=True, weight_decay_lambda=weight_decay)
    optimizer = AdaGrad(lr=learning_rate)

    # 모형 적합 시작
    for j in range(1000):
        batch_mask = np.random.choice(X_train2.shape[0], batch_size)
        X_batch = X_train2[batch_mask]
        y_batch = y_train2[batch_mask]

        grads = network.gradient(X_batch, y_batch)
        optimizer.update(network.params, grads)
    print(f"learning_rate: {learning_rate}, \tweight_decay: {weight_decay}, \tvalidation accuracy: {network.accuracy(X_val, y_val)}")

y_pred = np.argmax(network.predict(X_train), axis=1)
print(network.accuracy(X_train, y_train))
y_pred = np.argmax(network.predict(X_test), axis=1)
print(network.accuracy(X_test, y_test))

learning_rate: 0.010435555614413346, 	weight_decay: 0.0016788230303147903, 	validation accuracy: 0.8045833333333333
learning_rate: 0.0013239241176477784, 	weight_decay: 0.08158533834432241, 	validation accuracy: 0.60175
learning_rate: 0.005526934025531127, 	weight_decay: 0.016823037262989535, 	validation accuracy: 0.7209166666666667
learning_rate: 0.0024453573661994093, 	weight_decay: 0.05925329795668869, 	validation accuracy: 0.71925
learning_rate: 0.08557023165840626, 	weight_decay: 0.0049542313575567576, 	validation accuracy: 0.655
learning_rate: 0.014484737619278496, 	weight_decay: 0.008307400028009474, 	validation accuracy: 0.7306666666666667
learning_rate: 0.03198670451395344, 	weight_decay: 0.013059108617018691, 	validation accuracy: 0.64475
learning_rate: 0.005056321955187653, 	weight_decay: 0.003714618085498175, 	validation accuracy: 0.7746666666666666
learning_rate: 0.011741339176727296, 	weight_decay: 0.011728384469747897, 	validation accuracy: 0.7709166666666667
learning_ra

In [None]:
learning_rate = 10 ** np.random.uniform(-3, -1)
weight_decay = 10 ** np.random.uniform(-4, -1)

network = MultiLayerNetExtend(
    input_size=X_train.shape[-1], hidden_size_list=[100, 50, 100, 50, 100], output_size=10,
    use_dropout=True, dropout_rate=dropout_ratio,
    use_batchnorm=True, weight_decay_lambda=weight_decay)
__train(network, x_train=X_train2, t_train=y_train2, x_val=X_val, t_val=y_val, lr=learning_rate, opt="adagrad")

print(f"learning_rate: {learning_rate}, \tweight_decay: {weight_decay}, \tvalidation accuracy: {network.accuracy(X_val, y_val)}")

In [None]:
for _ in range(100):
    learning_rate = 10 ** np.random.uniform(-3, -1)
    weight_decay = 10 ** np.random.uniform(-4, -1)

    network = MultiLayerNetExtend(
        input_size=X_train.shape[-1], hidden_size_list=[100, 50, 100, 50, 100], output_size=10,
        use_dropout=False, dropout_rate=dropout_ratio,
        use_batchnorm=True, weight_decay_lambda=weight_decay)

    __train(network, x_train=X_train2, t_train=y_train2, x_val=X_val, t_val=y_val, lr=learning_rate, opt="adam")

    print(f"learning_rate: {learning_rate}, \tweight_decay: {weight_decay}, \tvalidation accuracy: {network.accuracy(X_val, y_val)}")

y_pred = np.argmax(network.predict(X_test), axis=1)
print(network.accuracy(X_test, y_test))

In [None]:
y_pred = np.argmax(network.predict(X_train), axis=1)
print(network.accuracy(X_train, y_train))
y_pred = np.argmax(network.predict(X_test), axis=1)
print(network.accuracy(X_test, y_test))