# 라이브러리 호출

In [400]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import models, layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.model_selection import KFold

---
# 데이터 불러오기

In [401]:
train = pd.read_csv('./dataset/train.csv')
test = pd.read_csv('./dataset/test.csv')
submission = pd.read_csv('./dataset/submission.csv')

In [402]:
train.head(10)

Unnamed: 0,id,digit,letter,0,1,2,3,4,5,6,...,774,775,776,777,778,779,780,781,782,783
0,1,5,L,1,1,1,4,3,0,0,...,2,1,0,1,2,4,4,4,3,4
1,2,0,B,0,4,0,0,4,1,1,...,0,3,0,1,4,1,4,2,1,2
2,3,4,L,1,1,2,2,1,1,1,...,3,3,3,0,2,0,3,0,2,2
3,4,9,D,1,2,0,2,0,4,0,...,3,3,2,0,1,4,0,0,1,1
4,5,6,A,3,0,2,4,0,3,0,...,4,4,3,2,1,3,4,3,1,2
5,6,8,C,4,3,0,3,3,4,3,...,4,3,0,4,4,4,2,2,3,4
6,7,1,Q,0,0,4,2,4,0,4,...,4,3,2,0,4,4,4,3,1,3
7,8,3,M,1,0,3,4,4,0,2,...,2,0,4,4,4,0,2,2,3,1
8,9,6,F,0,1,0,4,0,1,2,...,3,2,4,4,4,1,0,1,3,3
9,10,8,J,4,3,4,0,0,0,4,...,2,0,0,1,3,0,3,3,1,2


In [403]:
train.shape

(2048, 787)

In [404]:
test.shape

(20480, 786)

---
# 전처리

In [405]:
x_train = train.iloc[:, 3:] / 255.
y_train = train['digit']

In [407]:
albumentation = ImageDataGenerator(
    rotation_range=10,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    zoom_range = 0.10,
)

---
# 모델링

In [408]:
model = Sequential()
    
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.20))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Dropout(0.20))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Dropout(0.20))

model.add(layers.Flatten())
model.add(layers.Dense(400, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.summary()

Model: "sequential_54"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_233 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_59 (MaxPooling (None, 13, 13, 32)        0         
_________________________________________________________________
dropout_95 (Dropout)         (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_234 (Conv2D)          (None, 11, 11, 64)        18496     
_________________________________________________________________
dropout_96 (Dropout)         (None, 11, 11, 64)        0         
_________________________________________________________________
max_pooling2d_60 (MaxPooling (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_235 (Conv2D)          (None, 3, 3, 64)        

In [409]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

---
# 모델 학습

In [None]:
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=1)
epoch = 45
k_fold = KFold(n_splits=5, shuffle=True, random_state=777)
mds = []

for train_idx, val_idx in k_fold.split(x_train):
    x_train2 = x_train.iloc[train_idx]
    y_train2 = y_train.iloc[train_idx]
    x_val = x_train.iloc[val_idx]
    y_val = y_train.iloc[val_idx]
    
    x_train2 = x_train2.values.reshape(-1, 28, 28, 1)
    y_train2 = to_categorical(y_train2.values)
    
    x_val = x_val.values.reshape(-1, 28, 28, 1)
    y_val = to_categorical(y_val.values)
    
    mds.append(model.fit_generator(
        albumentation.flow(x_train2, y_train2, batch_size = 32),
        epochs = epoch,
        steps_per_epoch= x_train.shape[0] // 32,
        validation_data=(x_val, y_val),
        callbacks=[annealer], 
        verbose=100
    ))


Epoch 00001: LearningRateScheduler reducing learning rate to 0.001.
Epoch 1/45

Epoch 00002: LearningRateScheduler reducing learning rate to 0.00095.
Epoch 2/45

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0009025.
Epoch 3/45

Epoch 00004: LearningRateScheduler reducing learning rate to 0.000857375.
Epoch 4/45

Epoch 00005: LearningRateScheduler reducing learning rate to 0.0008145062499999999.
Epoch 5/45

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0007737809374999998.
Epoch 6/45

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0007350918906249999.
Epoch 7/45

Epoch 00008: LearningRateScheduler reducing learning rate to 0.0006983372960937497.
Epoch 8/45

Epoch 00009: LearningRateScheduler reducing learning rate to 0.0006634204312890623.
Epoch 9/45

Epoch 00010: LearningRateScheduler reducing learning rate to 0.0006302494097246091.
Epoch 10/45

Epoch 00011: LearningRateScheduler reducing learning rate to 0.0005987369392383787.
Epoch 

---
# 모델 예측

In [418]:
x_test = (test.iloc[:, 2:] / 255.).values.reshape(-1, 28, 28, 1)

preds = []
for md in mds:
    preds.append(md.predict(x_test))
len(preds)

NameError: name 'mds' is not defined

In [264]:
submission.digit = np.mean(preds, axis = 0)
submission.to_csv('predict.csv', index=False)