In [1]:
# python warning off
import warnings
warnings.filterwarnings('ignore')

# tensorflow warning off
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [6]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Conv2D
from keras.layers import MaxPool2D
from keras.utils import np_utils
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((60000, 28, 28), (10000, 28, 28), (60000,), (10000,))

In [15]:
np.set_printoptions(linewidth=1000)
X_train[-1]

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  38,  48,  48,  22

In [13]:
y_train[-1]

8

### CNN을 실행시키기전에 data를 변경해야 한다.
### (60000, 28, 28) ==> (60000, 28, 28, 1) : 1은 흑백을 의미, 3은 컬러를 의미

In [16]:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
X_train.shape, X_test.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

In [17]:
# image scaling
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

In [18]:
model = Sequential()
# padding='same'을 붙이면 이미지에 0을 패딩해서 출력 이미지가 동일한 크기가 나오도록 한다. (Ex. 28 x 28 -> 28 x 28)
model.add(Conv2D(32, (3,3), activation='relu', padding='same', input_shape=(28,28,1))) # 32개의 filter을 사용하면 1장의 이미지가 convolution이 끝난 후 32장의 이미지가 된다.
model.add(Conv2D(32, (3,3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.summary()
# parameters 개수만큼 메모리가 더 필요하고, 연산량이 더 늘어난다.
# None : 아직 훈련 개수가 정해지지 않았다.
# param의 개수는 y' = Wx + b 로 정해진다. 만약 x가 2개이고(input 값 2개) features의 개수가 10개이면 행렬곱 연산으로 20개의 Wx가 나오고, b(bias)를 더해준다. b의 값은 W의 개수와 동일하다.
# softmax를 통해서 값이 나오면 그 값을 사용해서 backpropagation을 통해서 weight 값을 조정해준다. 결국 weight 값이 0에 도달할수록 학습이 좋은 것이다. 
# backpropagation의 결과 값은 loss에서 확인할 수 있다.

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 14, 14, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
__________

In [19]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # sparse 접두어가 붙어있으면 y값을 자동으로 one-hot-encoding 해준다.
model.fit(X_train, y_train, batch_size=256, epochs=2) # batch_size : 한번에 메모리에 올려서 처리할 이미지 개수

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x2918071d308>

In [20]:
score = model.evaluate(X_test, y_test)



In [21]:
score
# loss, Test Accuracy

[0.03534022697162582, 0.9893]