# MNIST DATA SET

## Deep Learning을 이용한 비정형 데이터 학습 및 예측

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers import Flatten, Dense
from keras.optimizers import Adam

In [9]:
# raw data loading
df = pd.read_csv('/content/drive/MyDrive/KCC 3기 Colab Home/data/mnist/train.csv')

# 독립변수와 종속변수 분리
x_data = df.drop('label', axis=1, inplace=False).values
t_data = df['label'].values.reshape(-1,1)

# 정규화
scaler = MinMaxScaler()
scaler.fit(x_data)
x_data_norm = scaler.transform(x_data)

# 데이터 분리
x_train_norm, x_test_norm, t_train, t_test = \
train_test_split(x_data_norm,
                 t_data,
                 test_size=0.3,
                 random_state=0)

# 모델 생성
keras_model = Sequential()

# layer 추가
keras_model.add(Flatten(input_shape=(784,)))

# hidden layer 추가
keras_model.add(Dense(256, activation='relu'))
keras_model.add(Dense(128, activation='relu'))

keras_model.add(Dense(10, activation='softmax'))

# model 설정
keras_model.compile(optimizer=Adam(learning_rate=1e-2),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])

from keras.callbacks import ModelCheckpoint

my_callback = ModelCheckpoint(filepath='tmp_checkpoint.ckpt',
                              save_weights_only=True,
                              save_best_only=True,
                              monitor='val_loss',
                              verbose=1)

from keras.callbacks import EarlyStopping

earlystopping_callback = EarlyStopping(monitor='val_loss',
                                       patience=5,
                                       verbose=1,
                                       restore_best_weights=True)


# model 학습
keras_model_result = keras_model.fit(x_train_norm,
                                     t_train,
                                     epochs=100,
                                     batch_size=100,
                                     verbose=1,
                                     callbacks=[my_callback, earlystopping_callback],
                                     validation_split=0.2)

Epoch 1/100
Epoch 1: val_loss improved from inf to 0.24088, saving model to tmp_checkpoint.ckpt
Epoch 2/100
Epoch 2: val_loss improved from 0.24088 to 0.21520, saving model to tmp_checkpoint.ckpt
Epoch 3/100
Epoch 3: val_loss improved from 0.21520 to 0.17213, saving model to tmp_checkpoint.ckpt
Epoch 4/100
Epoch 4: val_loss did not improve from 0.17213
Epoch 5/100
Epoch 5: val_loss did not improve from 0.17213
Epoch 6/100
Epoch 6: val_loss did not improve from 0.17213
Epoch 7/100
Epoch 7: val_loss did not improve from 0.17213
Epoch 8/100
Epoch 8: val_loss did not improve from 0.17213
Restoring model weights from the end of the best epoch: 3.
Epoch 8: early stopping


In [3]:
# 평가
eval_result = keras_model.evaluate(x_test_norm, t_test)
eval_result



[0.751109778881073, 0.9661111235618591]

In [4]:
# 모델 저장
keras_model.save('/content/drive/MyDrive/KCC 3기 Colab Home/my_model.h5')

## 저장한 모델 불러오기

In [5]:
from keras.models import load_model

new_model = load_model('/content/drive/MyDrive/KCC 3기 Colab Home/my_model.h5')

In [6]:
new_model.evaluate(x_test_norm, t_test)



[0.751109778881073, 0.9661111235618591]

## Checkpoint로 저장된 weight를 가지고 model load하기

In [None]:
keras_model.load_weights('./tmp_checkpoint.ckpt')

## Test Data 예측 및 CSV 파일로 저장

In [6]:
test_df = pd.read_csv('/content/drive/MyDrive/KCC 3기 Colab Home/data/mnist/test.csv')
test_df

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
x_data = test_df.values
x_data

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [8]:
scaler = MinMaxScaler()
scaler.fit(x_data)
x_data_norm = scaler.transform(x_data)

In [11]:
import tensorflow as tf

result = tf.argmax(keras_model.predict(x_data_norm), axis=1).numpy()
result



array([2, 0, 9, ..., 3, 9, 2])

In [12]:
dl_df = pd.read_csv('/content/drive/MyDrive/KCC 3기 Colab Home/data/mnist/sample_submission.csv')
dl_df

Unnamed: 0,ImageId,Label
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0
...,...,...
27995,27996,0
27996,27997,0
27997,27998,0
27998,27999,0


In [13]:
dl_df['Label'] = result
dl_df

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [15]:
dl_df.to_csv('/content/drive/MyDrive/KCC 3기 Colab Home/data/mnist/dl_result.csv', index=None)