# LSTM 모델 성능 평가

In [10]:
import pickle
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Embedding, Dense, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix

In [11]:
test_data = pd.read_csv("../data/test.csv")
y_test = np.array(test_data['label'])

In [12]:
with open("../data/pickle/vocab_sizes.pkl", "rb") as f:
    vocab_sizes = pickle.load(f)

## Khaiii 형태소 분석기

In [17]:
with open("../data/pickle/khaiii_X_train.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("../data/pickle/khaiii_y_train.pkl", "rb") as f:
    y_train = pickle.load(f)
    
with open("../data/pickle/khaiii_X_test.pkl", "rb") as f:
    X_test = pickle.load(f)

In [14]:
model = Sequential()
model.add(Embedding(vocab_sizes[0], 100))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('saved_models/khaiii_LSTM.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 100)         2887400   
_________________________________________________________________
lstm (LSTM)                  (None, 128)               117248    
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 3,004,777
Trainable params: 3,004,777
Non-trainable params: 0
_________________________________________________________________
Train on 119995 samples, validate on 29999 samples
Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.92983, saving model to t_khaiii_LSTM_single.h5
Epoch 2/15
Epoch 00002: val_acc improved from 0.92983 to 0.93250, saving model to t_khaiii_LSTM_single.h5
Epoch 3/15
Epoch 00003: val_acc improved from 0.93250 to 0.93530, saving model to t_khaiii_LSTM_single.h5
Epoch 4/

In [18]:
loaded_model = load_model('saved_models/khaiii_LSTM.h5')
print("\n테스트 정확도: %.4f" % (loaded_model.evaluate(X_test, y_test)[1]))


테스트 정확도: 0.9399


In [19]:
Y_pred = loaded_model.predict(X_test)
y_pred = Y_pred > 0.5

print("Khaiii 형태소 분석기")
print(classification_report(y_test, y_pred, target_names=["Normal", "Toxic"], digits=4))

cf_matrix = confusion_matrix(y_test, y_pred)
percentages = ["{:.2f}%".format(value * 100) for value in
               cf_matrix.flatten()/np.sum(cf_matrix)]


cf_dict = dict(zip(cf_matrix.flatten(), percentages))
print("confusion_matrix")
print(cf_matrix)
print(cf_dict)

Khaiii 형태소 분석기
              precision    recall  f1-score   support

      Normal     0.9427    0.9402    0.9414     18550
       Toxic     0.9371    0.9396    0.9383     17571

    accuracy                         0.9399     36121
   macro avg     0.9399    0.9399    0.9399     36121
weighted avg     0.9399    0.9399    0.9399     36121

confusion_matrix
[[17441  1109]
 [ 1061 16510]]
{17441: '48.28%', 1109: '3.07%', 1061: '2.94%', 16510: '45.71%'}


## Okt 형태소 분석기

In [13]:
with open("../data/pickle/okt_train_X.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("../data/pickle/okt_train_y.pkl", "rb") as f:
    y_train = pickle.load(f)
    
with open("../data/pickle/okt_test_X.pkl", "rb") as f:
    X_test = pickle.load(f)

In [14]:
model = Sequential()
model.add(Embedding(vocab_sizes[1], 100))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('saved_models/okt_LSTM.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 100)         2513000   
_________________________________________________________________
lstm (LSTM)                  (None, 128)               117248    
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 2,630,377
Trainable params: 2,630,377
Non-trainable params: 0
_________________________________________________________________
Train on 119988 samples, validate on 29997 samples
Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.93913, saving model to tokt_LSTM_single.h5
Epoch 2/15
Epoch 00002: val_acc improved from 0.93913 to 0.94173, saving model to tokt_LSTM_single.h5
Epoch 3/15
Epoch 00003: val_acc improved from 0.94173 to 0.94369, saving model to tokt_LSTM_single.h5
Epoch 4/15
Epoch 00004

In [15]:
loaded_model = load_model('saved_models/okt_LSTM.h5')
print("\n테스트 정확도: %.4f" % (loaded_model.evaluate(X_test, y_test)[1]))


테스트 정확도: 0.9481


In [16]:
Y_pred = loaded_model.predict(X_test)
y_pred = Y_pred > 0.5

print("Okt 형태소 분석기")
print(classification_report(y_test, y_pred, target_names=["Normal", "Toxic"], digits=4))

cf_matrix = confusion_matrix(y_test, y_pred)
percentages = ["{:.2f}%".format(value * 100) for value in
               cf_matrix.flatten()/np.sum(cf_matrix)]


cf_dict = dict(zip(cf_matrix.flatten(), percentages))
print("confusion_matrix")
print(cf_matrix)
print(cf_dict)

Okt 형태소 분석기
              precision    recall  f1-score   support

      Normal     0.9569    0.9415    0.9491     18550
       Toxic     0.9392    0.9552    0.9472     17571

    accuracy                         0.9481     36121
   macro avg     0.9481    0.9483    0.9481     36121
weighted avg     0.9483    0.9481    0.9482     36121

confusion_matrix
[[17464  1086]
 [  787 16784]]
{17464: '48.35%', 1086: '3.01%', 787: '2.18%', 16784: '46.47%'}


## Mecab 형태소 분석기

In [22]:
with open("../data/pickle/mecab_train_X.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("../data/pickle/mecab_train_y.pkl", "rb") as f:
    y_train = pickle.load(f)
    
with open("../data/pickle/mecab_test_X.pkl", "rb") as f:
    X_test = pickle.load(f)

In [23]:
model = Sequential()
model.add(Embedding(vocab_sizes[2], 100))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('saved_models/mecab_LSTM.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 100)         2805600   
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               117248    
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 2,922,977
Trainable params: 2,922,977
Non-trainable params: 0
_________________________________________________________________
Train on 119986 samples, validate on 29997 samples
Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.94563, saving model to tmecab_LSTM_single.h5
Epoch 2/15
Epoch 00002: val_acc improved from 0.94563 to 0.94809, saving model to tmecab_LSTM_single.h5
Epoch 3/15
Epoch 00003: val_acc improved from 0.94809 to 0.94839, saving model to tmecab_LSTM_single.h5
Epoch 4/15
Epo

In [24]:
loaded_model = load_model('saved_models/mecab_LSTM.h5')
print("\n테스트 정확도: %.4f" % (loaded_model.evaluate(X_test, y_test)[1]))


테스트 정확도: 0.9526


In [25]:
Y_pred = loaded_model.predict(X_test)
y_pred = Y_pred > 0.5

print("Mecab 형태소 분석기")
print(classification_report(y_test, y_pred, target_names=["Normal", "Toxic"], digits=4))

cf_matrix = confusion_matrix(y_test, y_pred)
percentages = ["{:.2f}%".format(value * 100) for value in
               cf_matrix.flatten()/np.sum(cf_matrix)]


cf_dict = dict(zip(cf_matrix.flatten(), percentages))
print("confusion_matrix")
print(cf_matrix)
print(cf_dict)

Mecab 형태소 분석기
              precision    recall  f1-score   support

      Normal     0.9574    0.9500    0.9537     18550
       Toxic     0.9477    0.9554    0.9515     17571

    accuracy                         0.9526     36121
   macro avg     0.9525    0.9527    0.9526     36121
weighted avg     0.9527    0.9526    0.9526     36121

confusion_matrix
[[17623   927]
 [  784 16787]]
{17623: '48.79%', 927: '2.57%', 784: '2.17%', 16787: '46.47%'}
