# CNN 모델 성능 평가

In [1]:
import pickle
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Embedding, Dense, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
test_data = pd.read_csv("../data/test.csv")
y_test = np.array(test_data['label'])

In [3]:
with open("../data/vocab_sizes.pkl", "rb") as f:
    vocab_sizes = pickle.load(f)

## Khaiii 형태소 분석기

In [4]:
with open("../data/pickle/khaiii_X_train.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("../data/pickle/khaiii_y_train.pkl", "rb") as f:
    y_train = pickle.load(f)
    
with open("../data/pickle/khaiii_X_test.pkl", "rb") as f:
    X_test = pickle.load(f)

In [5]:
model = Sequential()
model.add(Embedding(vocab_sizes[0], 100))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('saved_models/khaiii_CNN.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 100)         2887400   
_________________________________________________________________
lstm (LSTM)                  (None, 128)               117248    
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 3,004,777
Trainable params: 3,004,777
Non-trainable params: 0
_________________________________________________________________
Train on 119995 samples, validate on 29999 samples
Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.92976, saving model to saved_models/khaiii_CNN.h5
Epoch 2/15
Epoch 00002: val_acc improved from 0.92976 to 0.93113, saving model to saved_models/khaiii_CNN.h5
Epoch 3/15
Epoch 00003: val_acc improved from 0.93113 to 0.93550, saving model to saved_models/khaiii_CNN.h5
E

In [6]:
loaded_model = load_model('saved_models/khaiii_CNN.h5')
print("\n테스트 정확도: %.4f" % (loaded_model.evaluate(X_test, y_test)[1]))


테스트 정확도: 0.9398


In [7]:
Y_pred = loaded_model.predict(X_test)
y_pred = Y_pred > 0.5

print("Khaiii 형태소 분석기")
print(classification_report(y_test, y_pred, target_names=["Normal", "Toxic"], digits=4))

cf_matrix = confusion_matrix(y_test, y_pred)
percentages = ["{:.2f}%".format(value * 100) for value in
               cf_matrix.flatten()/np.sum(cf_matrix)]


cf_dict = dict(zip(cf_matrix.flatten(), percentages))
print("confusion_matrix")
print(cf_matrix)
print(cf_dict)

Khaiii 형태소 분석기
              precision    recall  f1-score   support

      Normal     0.9440    0.9384    0.9412     18550
       Toxic     0.9354    0.9413    0.9383     17571

    accuracy                         0.9398     36121
   macro avg     0.9397    0.9399    0.9398     36121
weighted avg     0.9398    0.9398    0.9398     36121

confusion_matrix
[[17408  1142]
 [ 1032 16539]]
{17408: '48.19%', 1142: '3.16%', 1032: '2.86%', 16539: '45.79%'}


## Okt 형태소 분석기

In [8]:
with open("../data/pickle/okt_train_X.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("../data/pickle/okt_train_y.pkl", "rb") as f:
    y_train = pickle.load(f)
    
with open("../data/pickle/okt_test_X.pkl", "rb") as f:
    X_test = pickle.load(f)

In [9]:
model = Sequential()
model.add(Embedding(vocab_sizes[1], 100))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('saved_models/okt_CNN.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 100)         2513000   
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               117248    
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 2,630,377
Trainable params: 2,630,377
Non-trainable params: 0
_________________________________________________________________
Train on 119988 samples, validate on 29997 samples
Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.93813, saving model to saved_models/okt_CNN.h5
Epoch 2/15
Epoch 00002: val_acc improved from 0.93813 to 0.94123, saving model to saved_models/okt_CNN.h5
Epoch 3/15
Epoch 00003: val_acc did not improve from 0.94123
Epoch 4/15
Epoch 00004: val_acc improved from 0.94123

In [10]:
loaded_model = load_model('saved_models/okt_CNN.h5')
print("\n테스트 정확도: %.4f" % (loaded_model.evaluate(X_test, y_test)[1]))


테스트 정확도: 0.9479


In [11]:
Y_pred = loaded_model.predict(X_test)
y_pred = Y_pred > 0.5

print("Okt 형태소 분석기")
print(classification_report(y_test, y_pred, target_names=["Normal", "Toxic"], digits=4))

cf_matrix = confusion_matrix(y_test, y_pred)
percentages = ["{:.2f}%".format(value * 100) for value in
               cf_matrix.flatten()/np.sum(cf_matrix)]

cf_dict = dict(zip(cf_matrix.flatten(), percentages))
print("confusion_matrix")
print(cf_matrix)
print(cf_dict)

Okt 형태소 분석기
              precision    recall  f1-score   support

      Normal     0.9527    0.9454    0.9491     18550
       Toxic     0.9429    0.9505    0.9467     17571

    accuracy                         0.9479     36121
   macro avg     0.9478    0.9480    0.9479     36121
weighted avg     0.9479    0.9479    0.9479     36121

confusion_matrix
[[17538  1012]
 [  870 16701]]
{17538: '48.55%', 1012: '2.80%', 870: '2.41%', 16701: '46.24%'}


## Mecab 형태소 분석기

In [12]:
with open("../data/pickle/mecab_train_X.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("../data/pickle/mecab_train_y.pkl", "rb") as f:
    y_train = pickle.load(f)
    
with open("../data/pickle/mecab_test_X.pkl", "rb") as f:
    X_test = pickle.load(f)

In [13]:
model = Sequential()
model.add(Embedding(vocab_sizes[2], 100))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('saved_models/mecab_CNN.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 100)         2805600   
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               117248    
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 2,922,977
Trainable params: 2,922,977
Non-trainable params: 0
_________________________________________________________________
Train on 119986 samples, validate on 29997 samples
Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.94573, saving model to saved_models/mecab_CNN.h5
Epoch 2/15
Epoch 00002: val_acc improved from 0.94573 to 0.94649, saving model to saved_models/mecab_CNN.h5
Epoch 3/15
Epoch 00003: val_acc improved from 0.94649 to 0.94899, saving model to saved_models/mecab_CNN.h5
Ep

In [14]:
loaded_model = load_model('saved_models/mecab_CNN.h5')
print("\n테스트 정확도: %.4f" % (loaded_model.evaluate(X_test, y_test)[1]))


테스트 정확도: 0.9526


In [15]:
Y_pred = loaded_model.predict(X_test)
y_pred = Y_pred > 0.5

print("Mecab 형태소 분석기")
print(classification_report(y_test, y_pred, target_names=["Normal", "Toxic"], digits=4))

cf_matrix = confusion_matrix(y_test, y_pred)
percentages = ["{:.2f}%".format(value * 100) for value in
               cf_matrix.flatten()/np.sum(cf_matrix)]

cf_dict = dict(zip(cf_matrix.flatten(), percentages))
print("confusion_matrix")
print(cf_matrix)
print(cf_dict)

Mecab 형태소 분석기
              precision    recall  f1-score   support

      Normal     0.9601    0.9470    0.9535     18550
       Toxic     0.9448    0.9585    0.9516     17571

    accuracy                         0.9526     36121
   macro avg     0.9525    0.9527    0.9526     36121
weighted avg     0.9527    0.9526    0.9526     36121

confusion_matrix
[[17567   983]
 [  730 16841]]
{17567: '48.63%', 983: '2.72%', 730: '2.02%', 16841: '46.62%'}
