양방향 LSTM을 이용한 IMDB 감정분류

In [1]:
from __future__ import print_function
import numpy as np
import pandas as pd

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb

from sklearn.metrics import accuracy_score,classification_report

In [2]:
max_features = 15000
max_len = 300
batch_size = 64

In [3]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train observations')
print(len(x_test), 'test observations')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 train observations
25000 test observations


In [4]:
x_train_2 = sequence.pad_sequences(x_train, maxlen=max_len)
x_test_2 = sequence.pad_sequences(x_test, maxlen=max_len)
print('x_train shape:', x_train_2.shape)
print('x_test shape:', x_test_2.shape)

y_train = np.array(y_train)
y_test = np.array(y_test)

x_train shape: (25000, 300)
x_test shape: (25000, 300)


In [6]:
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len))
model.add(Bidirectional(LSTM(64))) # 양방향 LSTM 레이어 정의
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
print (model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 300, 128)          1920000   
                                                                 
 bidirectional_1 (Bidirectio  (None, 128)              98816     
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 2,018,945
Trainable params: 2,018,945
Non-trainable params: 0
_________________________________________________________________
None


In [7]:
model.fit(x_train_2, y_train,batch_size=batch_size,epochs=4,validation_split=0.2)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f681f219910>

In [9]:
y_train_predclass = np.round(model.predict(x_train_2)).astype(int)
y_test_predclass = np.round(model.predict(x_test_2)).astype(int)

y_train_predclass.shape = y_train.shape
y_test_predclass.shape = y_test.shape

In [10]:
print (("\n\nLSTM Bidirectional Sentiment Classification  - Train accuracy:"),(round(accuracy_score(y_train,y_train_predclass),3)))
print ("\nLSTM Bidirectional Sentiment Classification of Training data\n",classification_report(y_train, y_train_predclass))
print ("\nLSTM Bidirectional Sentiment Classification - Train Confusion Matrix\n\n",pd.crosstab(y_train, y_train_predclass,rownames = ["Actuall"],colnames = ["Predicted"]))      

print (("\nLSTM Bidirectional Sentiment Classification  - Test accuracy:"),(round(accuracy_score(y_test,y_test_predclass),3)))
print ("\nLSTM Bidirectional Sentiment Classification of Test data\n",classification_report(y_test, y_test_predclass))
print ("\nLSTM Bidirectional Sentiment Classification - Test Confusion Matrix\n\n",pd.crosstab(y_test, y_test_predclass,rownames = ["Actuall"],colnames = ["Predicted"]))      



LSTM Bidirectional Sentiment Classification  - Train accuracy: 0.943

LSTM Bidirectional Sentiment Classification of Training data
               precision    recall  f1-score   support

           0       0.98      0.91      0.94     12500
           1       0.91      0.98      0.94     12500

    accuracy                           0.94     25000
   macro avg       0.95      0.94      0.94     25000
weighted avg       0.95      0.94      0.94     25000


LSTM Bidirectional Sentiment Classification - Train Confusion Matrix

 Predicted      0      1
Actuall                
0          11317   1183
1            252  12248

LSTM Bidirectional Sentiment Classification  - Test accuracy: 0.844

LSTM Bidirectional Sentiment Classification of Test data
               precision    recall  f1-score   support

           0       0.90      0.77      0.83     12500
           1       0.80      0.92      0.85     12500

    accuracy                           0.84     25000
   macro avg       0.85  