## 손글씨 숫자 식별 모델
---
- 데 이 터 : MNIST 데이터 사용
- 학습모델 : 신경망 모델
- 동   작 : 0 ~ 9 사이 숫자 판별

In [27]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.datasets.mnist import load_data
from sklearn.model_selection import train_test_split

### [1] 데이터 로딩 및 준비

In [28]:
(x_train, y_train), (x_test, y_test)=load_data()

In [29]:
print(f'x_train.shape : {x_train.shape}, x_test.shape: {x_test.shape}')

x_train.shape : (60000, 28, 28), x_test.shape: (10000, 28, 28)


In [30]:
# 검증용 데이터 셋 준비
x_train, x_val, y_train, y_val=train_test_split(x_train, y_train, stratify=y_train)

In [31]:
print(f'x_train.shape : {x_train.shape}, x_val.shape: {x_val.shape}')
print(f'y_train.shape : {y_train.shape}, y_val.shape: {y_val.shape}')

x_train.shape : (45000, 28, 28), x_val.shape: (15000, 28, 28)
y_train.shape : (45000,), y_val.shape: (15000,)


### [2] 데이터 전처리
---
- 수치 데이터 => 스케일링
- 범주형 데이터 => 원핫인코딩

In [32]:
# 저장된 값 확인
x_train[0], y_train[0]

(array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          64, 255, 184,  51,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         121, 253, 252,  96,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         196, 253, 252,  96,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  96,
         248, 253, 247,  78,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0

In [33]:
# 이미지 데이터의 픽셀값 즉 특성(Feature) 0.0~1.0으로 스케일링
# 5/255.    # numpy 브로드캐스팅
x_train=x_train/255.0
x_val=x_val/255.0
x_test=x_test/255.0

In [34]:
# 타겟 라벨 => OneHotEncoding으로 변환
from sklearn.preprocessing import OneHotEncoder

In [35]:
encoder=OneHotEncoder()
encoder.fit(y_train.reshape(-1, 1))
y_train=encoder.transform(y_train.reshape(-1, 1))

In [36]:
type(y_train)

scipy.sparse.csr.csr_matrix

In [37]:
y_train = y_train.toarray()
type(y_train)

numpy.ndarray

In [42]:
print(f' y_train : {type(y_train)}]\n y_train[0] : {y_train[0]}')

 y_train : <class 'numpy.ndarray'>]
 y_train[0] : [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]


In [43]:
y_test=encoder.transform(y_test.reshape(-1, 1))
y_val=encoder.transform(y_val.reshape(-1, 1))

In [44]:
y_test = y_test.toarray()
y_val = y_val.toarray()

### [3] 모델 생성
---
- (1) 모델 구성
- (2) 모델 생성

#### [3-1] 모델 구성
---
- 입력 데이터 : 784 => 28 * 28 이미지 데이터
- 출력 데이터 : 10  => 0 ~ 9 숫자 라벨
- 학습 방  법 : 분류 - 다중분류

In [45]:
model=Sequential(name='MNIST_NNModel')

In [46]:
# Layer 생성 => Node(퍼셉트론, 뉴런) 갯수, 활성화함수, [옵]input_shape param
model.add( Dense(30, activation='relu', input_shape=(784,)))

In [47]:
# 출력 Layer 생성 => Node(퍼셉트론, 뉴런) 갯수, 활성화함수
model.add( Dense(10, activation='softmax'))

In [48]:
model.summary()

Model: "MNIST_NNModel"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 30)                23550     
                                                                 
 dense_1 (Dense)             (None, 10)                310       
                                                                 
Total params: 23,860
Trainable params: 23,860
Non-trainable params: 0
_________________________________________________________________


#### [3-2] 모델 생성
---