# 제 8 장 __인공신경망의 이해와 활용__
___

## __사전설정__
---

(1) 저장소 데이터 가져오기

In [None]:
!rm -rf /content/BizDataAnalysis/

In [None]:
!git clone https://github.com/BizStat/BizDataAnalysis.git


(2) matplotlib 환경에서 한글 사용

In [None]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -f -v
!rm ~/.cache/matplotlib -rf

런타임 메뉴에서 '세션 다시 시작' 후 다음의 명령문 실행

In [None]:
from matplotlib import rc
rc('font', family='NanumGothicCoding')
rc('axes', unicode_minus=False)

(3) 구글 드라이브 연결

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

___

## 8.3 __신경망 모형의 활용__

### (1) Convolution Neural Network

* MNIST 데이터 가져오기

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
!unzip /content/BizDataAnalysis/DATA/mnist.zip # /content 폴더에 풀림
mnist = pd.read_table('/content/mnist.csv',header=None,sep=',')

* 데이터 분리 및 변환

In [None]:
X = mnist.iloc[:,1:]  # 28 x 29 픽셀 이미지
y = mnist.iloc[:,[0]] # 해당 이미지가 나타내는 숫자

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=60000,shuffle=False)

* CNN 적용을 위해 데이터 변환

In [None]:
X_train = np.array(X_train).reshape((60000, 28 ,28, 1))
X_test = np.array(X_test).reshape((10000, 28 ,28, 1))

* CNN 모형 설정

In [None]:
from tensorflow.keras import layers, models

mnist_cnn_mod = models.Sequential([
    layers.Conv2D(32, kernel_size=(5, 5), strides=(1, 1),
                  padding='same', activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
    layers.Conv2D(64, kernel_size=(2, 2), activation='relu', padding='same'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(1000, activation='relu'),
    layers.Dense(10, activation='softmax')
])

In [None]:
mnist_cnn_mod.summary()

In [None]:
mnist_cnn_mod.compile(optimizer='Adam', loss = 'sparse_categorical_crossentropy', metrics=['acc'])

* CNN 모형 학습 및 평가

In [None]:
history = mnist_cnn_mod.fit(
  X_train, y_train,
  validation_data=(X_test,y_test),
  epochs=10, batch_size=1000,
  verbose=1
)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist

In [None]:
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.plot(hist['epoch'], hist['acc'],label='Train Error')
plt.plot(hist['epoch'], hist['val_acc'],label = 'Test Error')
plt.legend()
plt.show()

In [None]:
score = mnist_cnn_mod.evaluate(X_test, y_test)
print(f'Test Loss : {score[0]}')
print(f'Test Accuracy  : {score[1]}')

* 모형의 분류 결과 살펴보기

In [None]:
pred_result = mnist_cnn_mod.predict(X_test)
pred_labels = np.argmax(pred_result,  axis=1)
y_test_pred = pd.DataFrame(pred_labels)

In [None]:
y_test.reset_index(inplace=True)

In [None]:
result = pd.concat([y_test,y_test_pred],axis=1,ignore_index=True)

In [None]:
pd.crosstab(index=result.iloc[:,2],columns=result.iloc[:,3],normalize='index')

---