# <span style="color:#0073e6">0. 사례준비</span>
<style>
@media print
{
h1 {page-break-before:always}
}
</style>

> **Libaray & Environment Settings** 

In [None]:
import pandas as pd
import numpy as np
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.rc('axes', unicode_minus=False)
np.random.seed(123)
%matplotlib inline

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers

> **MNIST DATA SET LOADING**

In [None]:
import gzip
f = gzip.open('/content/mnist.pkl.gz', 'rb')
train_set, validation_set, t = pickle.load(f, encoding='latin1')
f.close()

x, y = t[0], t[1]
del t
print(x.shape)
print(y.shape)

- - -
# <span style="color:#0073e6">1. 지도학습을 이용한 특성추출<span>
### 1.1. 출력층 바로 앞의 은닉층의 출력 추출
> **간단한 CNN 구현을 위한 데이터 준비**<br/>
>> `input_shape` = (10000, 28,28,1) <br/>
>> `output_shape` = (10000, 10)

In [None]:
num_class = 10
input_shape = (28, 28, 1)
num_dim = 100

In [None]:
x_train = x.reshape(10000,28,28,1)
y_train = to_categorical(y, num_class)
print(x_train.shape)
print(y_train.shape)

- - -
# <span style="color:#2EFE9A">.</span>

> **모델 정의**<br/>

In [None]:
model = Sequential([
        Input(shape=input_shape),
        Conv2D(32, kernel_size=(3, 3), activation="relu"),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, kernel_size=(3, 3), activation="relu"),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dropout(0.5),
        Dense(num_dim, activation="sigmoid"),
        Dense(num_class, activation="softmax"),
    ])
model.summary()

- - -
# <span style="color:#2EFE9A">.</span>

> **모델 학습**<br/>

In [None]:
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="RMSProp", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs)

- - -
# <span style="color:#2EFE9A">.</span>

> **마지막 바로 이전 층 출력 추출**

In [None]:
model.layers

In [None]:
layer_outputs = [layer.output for layer in model.layers[:7]]
activ_model = Model(inputs=model.input, outputs=layer_outputs)
activations = activ_model.predict(x_train)
cnn = activations[6]

### 1.2. 특성 추출 결과의 비교
> **PCA와 특성 비교**<br/>
>> PCA 학습

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=num_dim, whiten=False)
pca.fit(x)
x_pca = pca.transform(x)

- - -
# <span style="color:#2EFE9A">.</span>

>> tSNE를 통한 시각화 비교

In [None]:
from sklearn.manifold import TSNE
def t_scatter(x, y, ax):
    if x.shape[1]!=2:
        t = TSNE(n_components=2, learning_rate=300)
        x_t = t.fit_transform(x)
    else:
        x_t = x
    g = pd.DataFrame(x_t, columns=['Vector-1','Vector-2'])
    g['Label'] = y
    sns.scatterplot(x='Vector-1',y='Vector-2', hue='Label',
                           palette=sns.color_palette('Paired', 10), 
                           data=g, ax=ax)
    return x_t

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(12,6))

pca_t = t_scatter(x_pca, y, ax[0])
cnn_t = t_scatter(cnn, y, ax[1])
ax[0].set_title('PCA')
ax[1].set_title('CNN');

- - -
# <span style="color:#0073e6">2. AutoEncoder<span>

### 2.2. 과소완전(undercomplete) 오토인코더<br/>

- - -
# <span style="color:#2EFE9A">.</span>

> **모델링**
>> 선형오토인코더와 비선형오토인코더 비교

In [None]:
linear_auto = Sequential(
    [
        Input(shape=(x.shape[1],)), 
        Dense(num_dim, activation='linear'), 
        Dense(x.shape[1], activation='linear')
    ])
linear_auto.summary()

In [None]:
nonlinear_auto = Sequential(
    [
        Input(shape=(x.shape[1],)), 
        Dense(num_dim, activation='relu'), 
        Dense(x.shape[1], activation='relu')
    ])
nonlinear_auto.summary()

- - -
# <span style="color:#2EFE9A">.</span>
- 모델 학습

In [None]:
epochs = 10
linear_auto.compile(loss="mean_squared_error", optimizer="RMSProp", metrics=["mse"])
linear_auto.fit(x, x, batch_size=batch_size, epochs=epochs)

In [None]:
nonlinear_auto.compile(loss="mean_squared_error", optimizer="RMSProp", metrics=["mse"])
nonlinear_auto.fit(x, x, batch_size=batch_size, epochs=epochs)

- - -
# <span style="color:#2EFE9A">.</span>

In [None]:
def outputs(my_auto, encoder_loc):
    layer_outputs = [layer.output for layer in my_auto.layers[:encoder_loc]]
    activ_model = Model(inputs=my_auto.input, outputs=layer_outputs)
    activations = activ_model.predict(x)
    if type(activations)==list:
        activations = activations[encoder_loc-1]
    return activations

In [None]:
l_auto = outputs(linear_auto, 1)
nl_auto = outputs(nonlinear_auto, 1)

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(10,9))
pca_t = t_scatter(pca_t, y, ax[0,0])
cnn_t = t_scatter(cnn_t, y, ax[0,1])
linear_t = t_scatter(l_auto, y, ax[1,0])
nonlinear_t = t_scatter(nl_auto, y, ax[1,1])
ax[0,0].set_title('PCA')
ax[0,1].set_title('CNN')
ax[1,0].set_title('Liner AutoEncoder')
ax[1,1].set_title('NonLiner AutoEncoder');

- - -
# <span style="color:#2EFE9A">.</span>

### 2.3. 과대완전(overcomplete) 오토인코더

In [None]:
overcomp = Sequential(
    [
        Input(shape=(x.shape[1],)), 
        Dense(x.shape[1]*2, activation='relu'), 
        Dropout(0.1),
        Dense(num_dim, activation='relu'), 
        Dense(x.shape[1], activation='relu')
    ])
overcomp.summary()

- - -
# <span style="color:#2EFE9A">.</span>

In [None]:
overcomp.compile(loss="mean_squared_error", optimizer="RMSProp", metrics=["mse"])
overcomp.fit(x, x, batch_size=batch_size, epochs=epochs)

- - -
# <span style="color:#2EFE9A">.</span>

In [None]:
overcomp_auto = outputs(overcomp, 3)

fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(10,10))

pca_t = t_scatter(pca_t, y, ax[0,0])
cnn_t = t_scatter(cnn_t, y, ax[0,1])
nonlinear_t = t_scatter(nonlinear_t, y, ax[1,0])
over_t = t_scatter(overcomp_auto, y, ax[1,1])
ax[0,0].set_title('PCA')
ax[0,1].set_title('CNN')
ax[1,0].set_title('NonLiner AutoEncoder')
ax[1,1].set_title('Overcomplete AutoEncoder');

- - -
# <span style="color:#2EFE9A">.</span>

### 2.4. 희소(Sparse) 오토인코더

In [None]:
sparse_over = Sequential(
    [
        Input(shape=(x.shape[1],)), 
        Dense(x.shape[1]*2, activation='relu', 
             activity_regularizer=regularizers.l1(0.001)), 
        Dropout(0.1),
        Dense(num_dim, activation='relu'), 
        Dense(x.shape[1], activation='relu')
    ])
sparse_over.summary()

- - -
# <span style="color:#2EFE9A">.</span>

In [None]:
sparse_over.compile(loss="mean_squared_error", optimizer="RMSProp", metrics=["mse"])
sparse_over.fit(x, x, batch_size=batch_size, epochs=epochs)

- - -
# <span style="color:#2EFE9A">.</span>

In [None]:
sparse_auto = outputs(sparse_over, 3)
fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(10,12))
pca_t = t_scatter(pca_t, y, ax[0,0])
cnn_t = t_scatter(cnn_t, y, ax[0,1])
linear_t = t_scatter(linear_t, y, ax[1,0])
nonlinear_t = t_scatter(nonlinear_t, y, ax[1,1])
over_t = t_scatter(over_t, y, ax[2,0])
sparse_t = t_scatter(sparse_auto, y, ax[2,1])
ax[0,0].set_title('PCA')
ax[0,1].set_title('CNN')
ax[1,0].set_title('NonLiner AutoEncoder')
ax[1,1].set_title('NonLiner AutoEncoder')
ax[2,0].set_title('Overcomplete AutoEncoder')
ax[2,1].set_title('Sparse Overcomplete AutoEncoder');