# 라이브러리 호출

In [265]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import models, layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.model_selection import train_test_split

---
# 데이터 불러오기

In [266]:
train = pd.read_csv('./dataset/train.csv')
test = pd.read_csv('./dataset/test.csv')
submission = pd.read_csv('./dataset/submission.csv')

In [267]:
train.head(10)

Unnamed: 0,id,digit,letter,0,1,2,3,4,5,6,...,774,775,776,777,778,779,780,781,782,783
0,1,5,L,1,1,1,4,3,0,0,...,2,1,0,1,2,4,4,4,3,4
1,2,0,B,0,4,0,0,4,1,1,...,0,3,0,1,4,1,4,2,1,2
2,3,4,L,1,1,2,2,1,1,1,...,3,3,3,0,2,0,3,0,2,2
3,4,9,D,1,2,0,2,0,4,0,...,3,3,2,0,1,4,0,0,1,1
4,5,6,A,3,0,2,4,0,3,0,...,4,4,3,2,1,3,4,3,1,2
5,6,8,C,4,3,0,3,3,4,3,...,4,3,0,4,4,4,2,2,3,4
6,7,1,Q,0,0,4,2,4,0,4,...,4,3,2,0,4,4,4,3,1,3
7,8,3,M,1,0,3,4,4,0,2,...,2,0,4,4,4,0,2,2,3,1
8,9,6,F,0,1,0,4,0,1,2,...,3,2,4,4,4,1,0,1,3,3
9,10,8,J,4,3,4,0,0,0,4,...,2,0,0,1,3,0,3,3,1,2


In [268]:
train.shape

(2048, 787)

In [269]:
test.shape

(20480, 786)

---
# 전처리

In [270]:
x_train = (train.iloc[:, 3:] / 255.).values.reshape(-1, 28, 28, 1)
y_train = to_categorical(train['digit'].values)

In [271]:
albumentation = ImageDataGenerator(
    rotation_range=10,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    zoom_range = 0.10,
)

---
# 모델링

In [272]:
model = Sequential()
model.add(Conv2D(32, kernel_size = 3, activation='relu', input_shape = (28, 28, 1)))
model.add(Conv2D(32, kernel_size = 3, activation='relu'))
model.add(Conv2D(32, kernel_size = 5, strides=2, padding='same', activation='relu'))
model.add(Dropout(0.4))

model.add(Conv2D(64, kernel_size = 3, activation='relu'))
model.add(Conv2D(64, kernel_size = 3, activation='relu'))
model.add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
model.add(Dropout(0.4))

model.add(Conv2D(128, kernel_size = 4, activation='relu'))
model.add(Flatten())
model.add(Dropout(0.4))
model.add(Dense(10, activation='softmax'))

# COMPILE WITH ADAM OPTIMIZER AND CROSS ENTROPY COST

In [273]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

---
# 모델 학습

In [274]:
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=1)
epoch = 45
x_train2, x_val, y_train2, y_val = train_test_split(x_train, y_train, test_size = 0.1)

history = model.fit_generator(
    albumentation.flow(x_train2, y_train2, batch_size = 32),
    epochs = epoch,
    steps_per_epoch= x_train.shape[0] // 32,
    validation_data=(x_val, y_val),
    callbacks=[annealer], 
    verbose=1
)


Epoch 00001: LearningRateScheduler reducing learning rate to 0.001.
Epoch 1/45

Epoch 00002: LearningRateScheduler reducing learning rate to 0.00095.
Epoch 2/45

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0009025.
Epoch 3/45

Epoch 00004: LearningRateScheduler reducing learning rate to 0.000857375.
Epoch 4/45

Epoch 00005: LearningRateScheduler reducing learning rate to 0.0008145062499999999.
Epoch 5/45

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0007737809374999998.
Epoch 6/45

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0007350918906249999.
Epoch 7/45

Epoch 00008: LearningRateScheduler reducing learning rate to 0.0006983372960937497.
Epoch 8/45

Epoch 00009: LearningRateScheduler reducing learning rate to 0.0006634204312890623.
Epoch 9/45

Epoch 00010: LearningRateScheduler reducing learning rate to 0.0006302494097246091.
Epoch 10/45

Epoch 00011: LearningRateScheduler reducing learning rate to 0.0005987369392383787.
Epoch 


Epoch 00038: LearningRateScheduler reducing learning rate to 0.00014989025404881545.
Epoch 38/45

Epoch 00039: LearningRateScheduler reducing learning rate to 0.00014239574134637466.
Epoch 39/45

Epoch 00040: LearningRateScheduler reducing learning rate to 0.00013527595427905592.
Epoch 40/45

Epoch 00041: LearningRateScheduler reducing learning rate to 0.00012851215656510312.
Epoch 41/45

Epoch 00042: LearningRateScheduler reducing learning rate to 0.00012208654873684796.
Epoch 42/45

Epoch 00043: LearningRateScheduler reducing learning rate to 0.00011598222130000556.
Epoch 43/45

Epoch 00044: LearningRateScheduler reducing learning rate to 0.00011018311023500529.
Epoch 44/45

Epoch 00045: LearningRateScheduler reducing learning rate to 0.00010467395472325501.
Epoch 45/45


---
# 모델 예측

In [263]:
x_test = (test.iloc[:, 2:] / 255.).values.reshape(-1, 28, 28, 1)
pred = model.predict(x_test)

In [264]:
submission.digit = pred
submission.to_csv('predict.csv', index=False)