# RNN 모델 성능 평가

In [1]:
import pickle
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Embedding, Dense, LSTM, SimpleRNN
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
test_data = pd.read_csv("../data/test.csv")
y_test = np.array(test_data['label'])

In [3]:
with open("../data/pickle/vocab_sizes.pkl", "rb") as f:
    vocab_sizes = pickle.load(f)

## Khaiii 형태소 분석기

In [5]:
with open("../data/pickle/khaiii_X_train.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("../data/pickle/khaiii_y_train.pkl", "rb") as f:
    y_train = pickle.load(f)
    
with open("../data/pickle/khaiii_X_test.pkl", "rb") as f:
    X_test = pickle.load(f)

In [6]:
model = Sequential()
model.add(Embedding(vocab_sizes[0], 100))
model.add(SimpleRNN(128))
model.add(Dense(1, activation='sigmoid'))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('saved_models/khaiii_RNN.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 100)         2887400   
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 128)               29312     
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 2,916,841
Trainable params: 2,916,841
Non-trainable params: 0
_________________________________________________________________
Train on 119995 samples, validate on 29999 samples
Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.92496, saving model to saved_models/khaiii_RNN.h5
Epoch 2/15
Epoch 00002: val_acc improved from 0.92496 to 0.92776, saving model to saved_models/khaiii_RNN.h5
Epoch 3/15
Epoch 00003: val_acc did not improve from 0.92776
Epoch 4/15
Epoch 00004: val_acc improved from 0.9

In [7]:
loaded_model = load_model('saved_models/khaiii_RNN.h5')
print("\n테스트 정확도: %.4f" % (loaded_model.evaluate(X_test, y_test)[1]))


테스트 정확도: 0.9347


In [8]:
Y_pred = loaded_model.predict(X_test)
y_pred = Y_pred > 0.5

print("Khaiii 형태소 분석기")
print(classification_report(y_test, y_pred, target_names=["Normal", "Toxic"], digits=4))

cf_matrix = confusion_matrix(y_test, y_pred)
percentages = ["{:.2f}%".format(value * 100) for value in
               cf_matrix.flatten()/np.sum(cf_matrix)]


cf_dict = dict(zip(cf_matrix.flatten(), percentages))
print("confusion_matrix")
print(cf_matrix)
print(cf_dict)

Khaiii 형태소 분석기
              precision    recall  f1-score   support

      Normal     0.9455    0.9264    0.9358     18550
       Toxic     0.9239    0.9436    0.9336     17571

    accuracy                         0.9347     36121
   macro avg     0.9347    0.9350    0.9347     36121
weighted avg     0.9350    0.9347    0.9348     36121

confusion_matrix
[[17184  1366]
 [  991 16580]]
{17184: '47.57%', 1366: '3.78%', 991: '2.74%', 16580: '45.90%'}


## Okt 형태소 분석기

In [9]:
with open("../data/pickle/okt_train_X.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("../data/pickle/okt_train_y.pkl", "rb") as f:
    y_train = pickle.load(f)
    
with open("../data/pickle/okt_test_X.pkl", "rb") as f:
    X_test = pickle.load(f)

In [10]:
model = Sequential()
model.add(Embedding(vocab_sizes[0], 100))
model.add(SimpleRNN(128))
model.add(Dense(1, activation='sigmoid'))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('saved_models/okt_RNN.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 100)         2887400   
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 128)               29312     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 2,916,841
Trainable params: 2,916,841
Non-trainable params: 0
_________________________________________________________________
Train on 119988 samples, validate on 29997 samples
Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.93136, saving model to saved_models/okt_RNN.h5
Epoch 2/15
Epoch 00002: val_acc did not improve from 0.93136
Epoch 3/15
Epoch 00003: val_acc improved from 0.93136 to 0.93903, saving model to saved_models/okt_RNN.h5
Epoch 4/15
Epoch 00004: val_acc improved from 0.93903

In [11]:
loaded_model = load_model('saved_models/okt_RNN.h5')
print("\n테스트 정확도: %.4f" % (loaded_model.evaluate(X_test, y_test)[1]))


테스트 정확도: 0.9416


In [12]:
Y_pred = loaded_model.predict(X_test)
y_pred = Y_pred > 0.5

print("Okt 형태소 분석기")
print(classification_report(y_test, y_pred, target_names=["Normal", "Toxic"], digits=4))

cf_matrix = confusion_matrix(y_test, y_pred)
percentages = ["{:.2f}%".format(value * 100) for value in
               cf_matrix.flatten()/np.sum(cf_matrix)]


cf_dict = dict(zip(cf_matrix.flatten(), percentages))
print("confusion_matrix")
print(cf_matrix)
print(cf_dict)

Okt 형태소 분석기
              precision    recall  f1-score   support

      Normal     0.9547    0.9304    0.9424     18550
       Toxic     0.9284    0.9534    0.9408     17571

    accuracy                         0.9416     36121
   macro avg     0.9416    0.9419    0.9416     36121
weighted avg     0.9419    0.9416    0.9416     36121

confusion_matrix
[[17258  1292]
 [  818 16753]]
{17258: '47.78%', 1292: '3.58%', 818: '2.26%', 16753: '46.38%'}


## Mecab 형태소 분석기

In [13]:
with open("../data/pickle/mecab_train_X.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("../data/pickle/mecab_train_y.pkl", "rb") as f:
    y_train = pickle.load(f)
    
with open("../data/pickle/mecab_test_X.pkl", "rb") as f:
    X_test = pickle.load(f)

In [14]:
model = Sequential()
model.add(Embedding(vocab_sizes[2], 100))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('saved_models/mecab_RNN.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 100)         2805600   
_________________________________________________________________
lstm (LSTM)                  (None, 128)               117248    
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 2,922,977
Trainable params: 2,922,977
Non-trainable params: 0
_________________________________________________________________
Train on 119986 samples, validate on 29997 samples
Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.94479, saving model to saved_models/mecab_RNN.h5
Epoch 2/15
Epoch 00002: val_acc improved from 0.94479 to 0.94696, saving model to saved_models/mecab_RNN.h5
Epoch 3/15
Epoch 00003: val_acc improved from 0.94696 to 0.94943, saving model to saved_models/mecab_RNN.h5
Ep

In [15]:
loaded_model = load_model('saved_models/mecab_RNN.h5')
print("\n테스트 정확도: %.4f" % (loaded_model.evaluate(X_test, y_test)[1]))


테스트 정확도: 0.9521


In [16]:
Y_pred = loaded_model.predict(X_test)
y_pred = Y_pred > 0.5

print("Mecab 형태소 분석기")
print(classification_report(y_test, y_pred, target_names=["Normal", "Toxic"], digits=4))

cf_matrix = confusion_matrix(y_test, y_pred)
percentages = ["{:.2f}%".format(value * 100) for value in
               cf_matrix.flatten()/np.sum(cf_matrix)]


cf_dict = dict(zip(cf_matrix.flatten(), percentages))
print("confusion_matrix")
print(cf_matrix)
print(cf_dict)

Mecab 형태소 분석기
              precision    recall  f1-score   support

      Normal     0.9578    0.9486    0.9531     18550
       Toxic     0.9463    0.9558    0.9510     17571

    accuracy                         0.9521     36121
   macro avg     0.9520    0.9522    0.9521     36121
weighted avg     0.9522    0.9521    0.9521     36121

confusion_matrix
[[17596   954]
 [  776 16795]]
{17596: '48.71%', 954: '2.64%', 776: '2.15%', 16795: '46.50%'}
