# MLP model
## Data https://www.kaggle.com/c/digit-recognizer/data

In [1]:
import numpy as np

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Activation

Using TensorFlow backend.


In [2]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
# 設置 GPU
config = tf.ConfigProto()
config.gpu_options.allocator_type = 'BFC' #A "Best-fit with coalescing" algorithm, simplified from a version of dlmalloc.
config.gpu_options.per_process_gpu_memory_fraction = 0.7
config.gpu_options.allow_growth = True
set_session(tf.Session(config=config)) 

In [3]:
import pandas as pd
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sample = pd.read_csv("sample_submission.csv")
labels = train.pop("label")

In [4]:
train.shape

(42000, 784)

In [5]:
test.shape

(28000, 784)

## 預處理

In [17]:
# Input shape format: (28, 28, 1)
# If 128x128 RGB, (128,128,3)
train = np.array(train).reshape(train.shape[0],28,28,1) / 255 #除255加速模型收斂
test = np.array(test).reshape(test.shape[0],28,28,1) / 255

In [19]:
# one-hot encoding
y_train_onehot = np_utils.to_categorical(labels, num_classes=10)

In [6]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.optimizers import RMSprop
from keras.callbacks import ReduceLROnPlateau, LearningRateScheduler #for annealer


In [14]:
?Dense

In [15]:
model = Sequential()
# 第一層卷積
model.add(Conv2D(filters = 16, kernel_size = (5,5), strides = 1, padding = "same",activation = "relu",kernel_initializer = "normal",input_shape = (28,28,1)))
# 池化
model.add(MaxPool2D(pool_size = (2,2)))
# 避免overfitting
model.add(Dropout(0.2))

model.add(Conv2D(filters = 16, kernel_size = (5,5), strides = 1, padding = "same",activation = "relu",kernel_initializer = "normal"))
model.add(MaxPool2D(pool_size = (2,2)))
model.add(Dropout(0.5))
# 展平(28,28,1)至(784,1)
model.add(Flatten())
model.add(Dense(100,activation = "relu",kernel_initializer = "normal"))
model.add(Dropout(0.2))
model.add(Dense(50,activation = "relu",kernel_initializer = "normal"))
model.add(Dropout(0.2))
# 輸出答案
model.add(Dense(10,activation = "softmax"))
# 建成模型
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) 

In [23]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 28, 28, 16)        416       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 14, 14, 16)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 14, 14, 16)        6416      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 7, 7, 16)          0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 7, 7, 16)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 784)               0         
__________

## 儲存與讀取模型

In [26]:
#save only the architecture as a json file
# model_json = model.to_json()
# with open ('CNN_architecture.json','w') as json_file:
#     json_file.write(model_json)

In [27]:
# from keras.models import model_from_json
# ensembles_num=15;
# with open('CNN_architecture.json','r') as f:
#     models_array =[model_from_json(f.read())]*ensembles_num

In [22]:
# 訓練模型
history=model.fit(train, y_train_onehot, validation_split=0.2, epochs=8, batch_size=200, verbose=1)

Train on 33600 samples, validate on 8400 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [25]:
model.evaluate(train, y_train_onehot)



[0.029728921567688565, 0.9911428571428571]

In [26]:
answer = model.predict_classes(test)

In [27]:
sample["Label"] = answer
sample = sample.set_index("ImageId",drop = True)
sample

Unnamed: 0_level_0,Label
ImageId,Unnamed: 1_level_1
1,2
2,0
3,9
4,9
5,3
6,7
7,0
8,3
9,0
10,3


In [28]:
sample.to_csv("answer3.csv")