In [None]:
import data_preprocessing as dp
import numpy as np
from keras.models import Sequential
from keras.layers import Conv1D, Dense, GlobalMaxPooling1D, Dropout, BatchNormalization, LSTM
from tqdm import tqdm

Using TensorFlow backend.


In [2]:
train_data, test_data, train_labels, test_labels = dp.load_imdb_data()

100%|██████████| 12500/12500 [01:01<00:00, 202.91it/s]
100%|██████████| 12500/12500 [01:03<00:00, 197.60it/s]
100%|██████████| 12500/12500 [01:07<00:00, 186.01it/s]
100%|██████████| 12500/12500 [01:05<00:00, 190.71it/s]


In [3]:
%%time
train_tokens, test_tokens = dp.process_data(train_data), dp.process_data(test_data)

CPU times: user 29.8 s, sys: 303 ms, total: 30.1 s
Wall time: 30.1 s


In [4]:
embd = dp.load_embedding_matrix('embeddings')

In [21]:
model = Sequential()
model.add(Conv1D(filters=256, kernel_size=3, padding='valid', input_shape=(140, 100)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv1D(filters=256, kernel_size=3, padding='valid'))
model.add(BatchNormalization())

model.add(GlobalMaxPooling1D())

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

In [22]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_3 (Conv1D)            (None, 138, 256)          77056     
_________________________________________________________________
batch_normalization_1 (Batch (None, 138, 256)          1024      
_________________________________________________________________
dropout_3 (Dropout)          (None, 138, 256)          0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 136, 256)          196864    
_________________________________________________________________
batch_normalization_2 (Batch (None, 136, 256)          1024      
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               65792     
__________

In [23]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [24]:
data_generator = dp.generate_batches(train_tokens, train_labels, 100, embd, 140)

history = model.fit_generator(data_generator, 250)

Epoch 1/1


In [25]:
data_generator = dp.generate_batches(test_tokens, test_labels, 100, embd, 140)

score = model.evaluate_generator(data_generator, 250)

print(score)

[0.33594644415378572, 0.85327999854087833]


In [26]:
data_generator = dp.generate_batches(train_tokens, train_labels, 100, embd, 140)

score = model.evaluate_generator(data_generator, 250)

print(score)

[0.3772220200896263, 0.83259999752044678]


In [6]:
modelRNN = Sequential()
modelRNN.add(LSTM(64, input_shape=(140, 100), return_sequences=True))
modelRNN.add(BatchNormalization())
modelRNN.add(Dropout(.2))

modelRNN.add(LSTM(64, return_sequences=True))
modelRNN.add(BatchNormalization())
modelRNN.add(Dropout(.2))

modelRNN.add(LSTM(32))
modelRNN.add(BatchNormalization())
modelRNN.add(Dropout(.2))

modelRNN.add(Dense(100, activation='relu'))
modelRNN.add(Dense(1, activation='sigmoid'))

In [7]:
modelRNN.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 140, 64)           42240     
_________________________________________________________________
batch_normalization_3 (Batch (None, 140, 64)           256       
_________________________________________________________________
dropout_3 (Dropout)          (None, 140, 64)           0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 140, 64)           33024     
_________________________________________________________________
batch_normalization_4 (Batch (None, 140, 64)           256       
_________________________________________________________________
dropout_4 (Dropout)          (None, 140, 64)           0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 32)                12416     
__________

In [8]:
modelRNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
data_generator = dp.generate_batches(test_tokens, test_labels, 100, embd, 140)

history = modelRNN.fit_generator(data_generator, 250, epochs=2)

Epoch 1/2
Epoch 2/2


In [13]:
data_generator = dp.generate_batches(test_tokens, test_labels, 100, embd, 140)

score = modelRNN.evaluate_generator(data_generator, 250)

print(score)

[0.36172615367174149, 0.85151999831199643]


In [14]:
data_generator = dp.generate_batches(train_tokens, train_labels, 100, embd, 140)

score = modelRNN.evaluate_generator(data_generator, 250)

print(score)

[0.39784332531690597, 0.83775999951362612]
