# Chapter 7: NLP LSTM
## Ex2: IMDB Sentiment Classification with LSTM - Keras
* Cho dữ liệu https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz (https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz) (có sẵn trong from tensorflow.keras.datasets import imdb)
* Xây dựng model dự đoán một nội dung là 1 hay là 0

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/LDS8_K275_ONLINE_NGUYENTHIKIMHOANG/Week_4/Chapter7

/content/drive/MyDrive/LDS8_K275_ONLINE_NGUYENTHIKIMHOANG/Week_4/Chapter7


In [None]:
import warnings 
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.datasets import imdb
%matplotlib inline

In [None]:
max_features = 20000
maxlen = 80 # cut texts after this number of words (among top max_features most 
batch_size = 32

In [None]:
print('Loading data...')
(x_train_original, y_train), (x_test_original, y_test) = imdb.load_data(num_words = max_features)
print(len(x_train_original), 'train sequences')
print(len(x_test_original), 'test sequences')

Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 train sequences
25000 test sequences


In [None]:
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train_original, maxlen=maxlen) 
x_test = sequence.pad_sequences(x_test_original, maxlen=maxlen) 
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Pad sequences (samples x time)
x_train shape: (25000, 80)
x_test shape: (25000, 80)


In [None]:
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128)) 
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) 
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

Build model...


In [None]:
# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, None, 128)         2560000   
                                                                 
 lstm_3 (LSTM)               (None, 128)               131584    
                                                                 
 dense_4 (Dense)             (None, 256)               33024     
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_5 (Dense)             (None, 256)               65792     
                                                                 
 dropout_2 (Dropout)         (None, 256)               0         
                                                                 
 dense_6 (Dense)             (None, 1)                

In [None]:
print('Train...')
history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=15,
                    validation_data=(x_test, y_test), 
                    callbacks=[EarlyStopping(monitor='val_loss', patience=3)])

Train...
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15


In [None]:
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

In [None]:
from tensorflow.keras.utils import plot_model 
from IPython.display import Image

In [None]:
plot_model(model, to_file='LSTM_model_Sentiment.png', show_shapes=True) 
Image(filename='LSTM_model_Sentiment.png')

In [None]:
#Save the result
from tensorflow.keras.models import load_model 
# Creates a HDF5 file 'my_model.h5' 
model.save('LSTM_sentiment_model.h5')