<a href="https://colab.research.google.com/github/Kimuksung/bigdata/blob/master/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""

MNIST + CNN basic
    - Convolution layer : 특징맵(feature mat)
    - Pooling layer : 픽셀 축소(down sampling), 특징 강조  

"""
import tensorflow.compat.v1 as tf # ver1.x
tf.disable_v2_behavior() # ver2.0 사용안함

from tensorflow.keras.datasets.mnist import load_data # dataset load
import numpy as np
import matplotlib.pyplot as plt

# 1. image read 
(x_train, y_train), (x_test, y_test) = load_data()
print(x_train.shape) # (60000, 28, 28)
print(y_train.shape) # (60000,) : 10진수 
print(x_test.shape) # (10000, 28, 28)
print(y_test.shape) # (10000,) : 10진수 
print(x_train[0]) # 0 ~ 255

# 2. 실수형 변환 : int -> float32
x_train = x_train.astype('float32') 
x_test = x_test.astype('float32')

# 3. 정규화 
x_train = x_train / 255 # x_train = x_train / 255
x_test = x_test / 255

# first image 
img = x_train[0]
plt.imshow(img, cmap='gray') # 숫자 5  

# input image reshape  
firstImg = img.reshape(1,28,28,1) #(size , h , w, color)

# Filter 변수 정의 
Filter = tf.Variable(tf.random_normal([3,3,1,5])) # 난수 (row , column , color ,fmap) 
 
# 1. Convolution layer : 특징 추출
conv2d = tf.nn.conv2d(firstImg, Filter, strides=[1,1,1,1], padding='SAME')
print(conv2d) 

# 2. Pool layer : down sampling
pool = tf.nn.max_pool(conv2d, ksize=[1,2,2,1],strides=[1,2,2,1],
            padding = 'SAME')
print(pool) 


with tf.Session() as sess :
    sess.run(tf.global_variables_initializer()) # filter 초기화 
    
    # 합성곱 연산 
    conv2d_img = sess.run(conv2d)    
    conv2d_img = np.swapaxes(conv2d_img, 0, 3) # 축 교환 
    print("this:" , conv2d_img.shape) # (5, 28, 28, 1)
    
    for i, img in enumerate(conv2d_img) :
        plt.subplot(1, 5, i+1) # 1행5열,1~5 
        plt.imshow(img.reshape(28,28), cmap='gray') # 
    plt.show()
    
    # 폴링 연산 
    pool_img = sess.run(pool)
    pool_img = np.swapaxes(pool_img, 0, 3)
    
    for i, img in enumerate(pool_img) :
        plt.subplot(1,5, i+1) # 1행5열,1~5 
        plt.imshow(img.reshape(14,14), cmap='gray') 
    plt.show()

In [None]:
'''

- Keras CNN model + cifar10 

1. image dataset load
2. image preprocessing : 실수형 , 정규화 , one-hot-encoding
3. Keras Model
4. Model evaluate
5. Model history

'''

# keras dataset 적용
import tensorflow as tf
from tensorflow.keras.datasets.cifar10 import load_data
from tensorflow.keras import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv2D , MaxPooling2D
from tensorflow.keras.layers import Dense , Flatten , Dropout


# 1. dataset load
(x_train , y_train ) , (x_val , y_val) = load_data()
x_train.shape # (50000, 32, 32, 3)
y_train.shape # (50000, 1)

# image 전처리 : 실수형 -> 정규화
x_train[0] # 0 ~ 255 : 정수형
x_train = x_train.astype("float32")
x_val = x_val.astype("float32")

# 정규화
x_train = x_train / 255
x_val = x_val / 255
x_train[0]

# label 전처리 : one-hot
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)

# 2. Model
# [5,5,3,32] : kernel_size -> Filter
input_shape = (x_train.shape[1] , x_train.shape[2], x_train.shape[3])

# conv layer1
model = Sequential()
model.add(Conv2D( 32 , kernel_size = (5,5) , input_shape = input_shape , activation = "relu" ))
model.add(MaxPooling2D(pool_size=(3,3), strides = (2,2)))
model.add(Dropout(0.2))

# conv layer2 : [5,5,32,64]
model.add(Conv2D( 64 , kernel_size = (5,5) , activation = "relu" ))
model.add(MaxPooling2D(pool_size=(3,3), strides = (2,2)))
model.add(Dropout(0.2))

# Flatten : 3d -> 1d
model.add(Flatten())

# DNN layer
model.add(Dense(64, activation = "relu" ))

# DNN output layer
model.add(Dense(10, activation = "softmax" ))

# 3. Model evnironment setting
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()

# 4. model training
model_fit = model.fit( x= x_train, y=y_train , batch_size = 100 ,epochs=10 , verbose=1 , validation_data = (x_val , y_val))

# 5. model evaluation
model.evaluate( x = x_val , y=y_val)



labels = [ "airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

# 6. model history
model_fit.history.keys()
train_loss = model_fit.history['loss']
train_acc = model_fit.history['accuracy']
val_loss = model_fit.history['val_loss']
val_acc = model_fit.history['val_accuracy']

import matplotlib.pyplot as plt
plt.plot(train_loss , label = 'train loss',color = 'y' )
plt.plot(val_loss , label = 'val loss' , color='r')
plt.legend(loc='best')
plt.show


plt.plot(train_acc , label = 'train_acc loss',color = 'y' )
plt.plot(val_acc , label = 'val val_acc' , color='r')
plt.legend(loc='best')
plt.show

# 7. model test ( new data set )
from sklearn.metrics import classification_report

import numpy as np
idx = np.random.choice(a = x_val.shape[0] ,size = 100 , replace = False)
x_test = x_val[idx] # new dataset images
y_test = y_val[idx] # new dataset labels

y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred , 1)
y_true = np.argmax(y_test , 1)

report = classification_report(y_true, y_pred  )
print(report)



# CNN

- convolution newlearn network
- 이미지와 같은 큰 data를 fully connected layer 방식으로 하면 엄청난 연산이 필요하며 모든 feature를 나타낸다.
- locally connected 개념을 이용하여 filter 
- filterling을 통해 전체를 보지 않고 특정 영역만 본다.
-이미지의 특성상 같은 object라도 회전하거나 뒤집으면 DNN(1차원)은 다르게 보지만 CNN은 filter의 feature를 보기 때문에 이를 동일하게 본다.
- max pooling은 locally connected된 feature 중에서 가장 영향이 큰 feature를 선택한 것이다. 최근에는 이 때문에 pooling을 하지 않는다.


## 1. convolution : input x filter -> feature

<img src="http://deeplearning.stanford.edu/wiki/images/6/6c/Convolution_schematic.gif" alt="합성곱 처리 절치, 출처: http://deeplearning.stanford.edu/wiki/index.php/Feature_extraction_using_convolution" style="border:2px solid black">

## 2. filter : window size 개념으로 특정 영역을 얼만큼 보겠다.

## 3. stride : filter를 얼만큼씩 움직이겠다.

## 4. padding : 데이터가 줄어드는 것을 방지하는 방법이 패딩
<img src="https://taewanmerepo.github.io/2018/01/cnn/conv2.jpg" alt="멀티 채널 입력 데이터에 필터를 적용한 합성곱 계산 절차" style="border:2px solid black">

## 5. pooling :  특정 데이터를 강조하는 용도로 사용되어 특정 feature를 나타낸다.