# Multi layer percptron for IMDB reviews

In [1]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

Using Theano backend.


In [2]:
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [3]:
max_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

In [4]:
def get_mlp_model():
    model = Sequential()
    model.add(Embedding(top_words, 32, input_length=max_words))
    model.add(Flatten())
    model.add(Dense(250,activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    return model
    

In [5]:
model = get_mlp_model()

model.compile(loss='binary_crossentropy', optimizer = 'adam',metrics=['accuracy'])

print(model.summary())

# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=2, batch_size=128, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
flatten_1 (Flatten)          (None, 16000)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 250)               4000250   
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 251       
Total params: 4,160,501
Trainable params: 4,160,501
Non-trainable params: 0
_________________________________________________________________
None
Train on 25000 samples, validate on 25000 samples
Epoch 1/2
38s - loss: 0.5449 - acc: 0.6813 - val_loss: 0.3287 - val_acc: 0.8571
Epoch 2/2
28s - loss: 0.2081 - acc: 0.9186 - val_loss: 0.3108 - val_acc: 0.8712
Accuracy: 87.12%


# Convolution neural net for IMDB

In [6]:
from keras.layers.convolutional import MaxPooling1D,Conv1D

In [7]:
def get_cnn_model():
    model = Sequential()
    model.add(Embedding(top_words, 32, input_length=max_words))
    model.add(Conv1D(filters=32,kernel_size=3,padding='same',activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(250, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    return model

In [8]:
model = get_cnn_model()

model.compile(loss='binary_crossentropy', optimizer = 'adam',metrics=['accuracy'])

print(model.summary())

# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=2, batch_size=128, verbose=2)
# Final evaluation of the model
score,acc = model.evaluate(X_test, y_test, verbose=0)
print("Score: %.2f%%" % (score))
print("Accuracy: %.2f%%" % (acc*100))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 500, 32)           3104      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 250, 32)           0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 8000)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 250)               2000250   
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 251       
Total params: 2,163,605
Trainable params: 2,163,605
Non-trainable params: 0
_________________________________________________________________


# LSTM net for IMDB

In [9]:
from keras.layers import LSTM

In [10]:
def get_lstm_model():
    model = Sequential()
    model.add(Embedding(top_words, 32, input_length=max_words))
    model.add(LSTM(32, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(1, activation='sigmoid'))
    
    return model

In [None]:
model = get_lstm_model()

model.compile(loss='binary_crossentropy', optimizer = 'adam',metrics=['accuracy'])

print(model.summary())

# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=15, batch_size=128, verbose=2)
# Final evaluation of the model
score,acc = model.evaluate(X_test, y_test, verbose=0)
print("Score: %.2f%%" % (score))
print("Accuracy: %.2f%%" % (acc*100))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 33        
Total params: 168,353
Trainable params: 168,353
Non-trainable params: 0
_________________________________________________________________
None




Train on 25000 samples, validate on 25000 samples
Epoch 1/15
376s - loss: 0.5463 - acc: 0.7240 - val_loss: 0.4233 - val_acc: 0.8151
Epoch 2/15


In [None]:
#86% after 6 iteration. taking a lot of time so skipped this.