In [16]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Conv1D ,MaxPooling1D, GlobalMaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [17]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [18]:
import ast

In [19]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import tensorflow_addons as tfa

In [20]:
seed = 0
np.random.seed(seed)
tf.random.set_seed(3)

In [21]:
data = pd.read_csv("FINAL_RE_PLC_review_tokenized_okt_30377.csv")

In [22]:
data['review_tokens'] = data['review_tokens'].apply(lambda x: ast.literal_eval(x))

In [23]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['review_tokens'])
X = tokenizer.texts_to_sequences(data['review_tokens']) # Sequence 변환
max_len = 82
X = pad_sequences(X, max_len)
le = LabelEncoder()
y = le.fit_transform(data.plc)
y = to_categorical(y)

In [24]:
vocab_size = len(tokenizer.word_index) + 2

In [25]:
embedding_matrix = np.load('embedding_mat.npy')

## LSTM + CNN 구현

In [26]:
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, GRU, Concatenate, Dropout, SpatialDropout1D
from tensorflow.keras import Input, Model
from tensorflow.keras import optimizers
import os

In [27]:
model = Sequential()
model.add(Embedding(vocab_size, 300, weights=[embedding_matrix], input_length=max_len, trainable=False))
model.add(SpatialDropout1D(rate = 0.2))
model.add(Conv1D(filters = 64, kernel_size = 5, padding = 'same', activation = 'relu', strides = 1)) # padding = 'valid'
model.add(MaxPooling1D())
model.add(Bidirectional(LSTM(128)))
model.add(Dense(units = 300, activation='relu'))
model.add(Dropout(rate = 0.2))  
model.add(Dense(349, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[tfa.metrics.FBetaScore(num_classes=349, average='micro', beta=0.5)])

In [28]:
early_stopping = EarlyStopping(monitor='loss', patience=5)
val_f1_score = float('-inf')
model_checkpoint = ModelCheckpoint('CNN_BiLSTM.h5', monitor = 'fbeta_score', mode = 'max', verbose = 1, save_best_only = True)

In [29]:
history = model.fit(X, y, epochs=150, callbacks=[early_stopping, model_checkpoint], batch_size=128)

Epoch 1/150

Epoch 00001: fbeta_score improved from -inf to 0.06130, saving model to CNN_BiLSTM.h5
Epoch 2/150

Epoch 00002: fbeta_score improved from 0.06130 to 0.12572, saving model to CNN_BiLSTM.h5
Epoch 3/150

Epoch 00003: fbeta_score improved from 0.12572 to 0.19265, saving model to CNN_BiLSTM.h5
Epoch 4/150

Epoch 00004: fbeta_score improved from 0.19265 to 0.23399, saving model to CNN_BiLSTM.h5
Epoch 5/150

Epoch 00005: fbeta_score improved from 0.23399 to 0.26635, saving model to CNN_BiLSTM.h5
Epoch 6/150

Epoch 00006: fbeta_score improved from 0.26635 to 0.28752, saving model to CNN_BiLSTM.h5
Epoch 7/150

Epoch 00007: fbeta_score improved from 0.28752 to 0.30984, saving model to CNN_BiLSTM.h5
Epoch 8/150

Epoch 00008: fbeta_score improved from 0.30984 to 0.32623, saving model to CNN_BiLSTM.h5
Epoch 9/150

Epoch 00009: fbeta_score improved from 0.32623 to 0.34236, saving model to CNN_BiLSTM.h5
Epoch 10/150

Epoch 00010: fbeta_score improved from 0.34236 to 0.35688, saving model


Epoch 00042: fbeta_score improved from 0.55884 to 0.56131, saving model to CNN_BiLSTM.h5
Epoch 43/150

Epoch 00043: fbeta_score improved from 0.56131 to 0.56539, saving model to CNN_BiLSTM.h5
Epoch 44/150

Epoch 00044: fbeta_score improved from 0.56539 to 0.57096, saving model to CNN_BiLSTM.h5
Epoch 45/150

Epoch 00045: fbeta_score improved from 0.57096 to 0.57346, saving model to CNN_BiLSTM.h5
Epoch 46/150

Epoch 00046: fbeta_score improved from 0.57346 to 0.57728, saving model to CNN_BiLSTM.h5
Epoch 47/150

Epoch 00047: fbeta_score improved from 0.57728 to 0.57945, saving model to CNN_BiLSTM.h5
Epoch 48/150

Epoch 00048: fbeta_score improved from 0.57945 to 0.58498, saving model to CNN_BiLSTM.h5
Epoch 49/150

Epoch 00049: fbeta_score did not improve from 0.58498
Epoch 50/150

Epoch 00050: fbeta_score improved from 0.58498 to 0.59315, saving model to CNN_BiLSTM.h5
Epoch 51/150

Epoch 00051: fbeta_score did not improve from 0.59315
Epoch 52/150

Epoch 00052: fbeta_score improved from 


Epoch 00085: fbeta_score did not improve from 0.67531
Epoch 86/150

Epoch 00086: fbeta_score did not improve from 0.67531
Epoch 87/150

Epoch 00087: fbeta_score improved from 0.67531 to 0.67943, saving model to CNN_BiLSTM.h5
Epoch 88/150

Epoch 00088: fbeta_score improved from 0.67943 to 0.68440, saving model to CNN_BiLSTM.h5
Epoch 89/150

Epoch 00089: fbeta_score improved from 0.68440 to 0.68476, saving model to CNN_BiLSTM.h5
Epoch 90/150

Epoch 00090: fbeta_score did not improve from 0.68476
Epoch 91/150

Epoch 00091: fbeta_score did not improve from 0.68476
Epoch 92/150

Epoch 00092: fbeta_score improved from 0.68476 to 0.68654, saving model to CNN_BiLSTM.h5
Epoch 93/150

Epoch 00093: fbeta_score improved from 0.68654 to 0.68944, saving model to CNN_BiLSTM.h5
Epoch 94/150

Epoch 00094: fbeta_score improved from 0.68944 to 0.69029, saving model to CNN_BiLSTM.h5
Epoch 95/150

Epoch 00095: fbeta_score improved from 0.69029 to 0.69171, saving model to CNN_BiLSTM.h5
Epoch 96/150

Epoch 

Epoch 130/150

Epoch 00130: fbeta_score improved from 0.72976 to 0.73312, saving model to CNN_BiLSTM.h5
Epoch 131/150

Epoch 00131: fbeta_score improved from 0.73312 to 0.73602, saving model to CNN_BiLSTM.h5
Epoch 132/150

Epoch 00132: fbeta_score did not improve from 0.73602
Epoch 133/150

Epoch 00133: fbeta_score did not improve from 0.73602
Epoch 134/150

Epoch 00134: fbeta_score did not improve from 0.73602
Epoch 135/150

Epoch 00135: fbeta_score improved from 0.73602 to 0.73849, saving model to CNN_BiLSTM.h5
Epoch 136/150

Epoch 00136: fbeta_score did not improve from 0.73849
Epoch 137/150

Epoch 00137: fbeta_score did not improve from 0.73849
Epoch 138/150

Epoch 00138: fbeta_score improved from 0.73849 to 0.74277, saving model to CNN_BiLSTM.h5
Epoch 139/150

Epoch 00139: fbeta_score did not improve from 0.74277
Epoch 140/150

Epoch 00140: fbeta_score improved from 0.74277 to 0.74385, saving model to CNN_BiLSTM.h5
Epoch 141/150

Epoch 00141: fbeta_score did not improve from 0.743