## Convolution Neural Network for Text Classfication of IMDB reviews

We're going to classify IMDB reviews using CNN

#### Loading Dependencies

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Embedding, Dense, Conv1D, Activation, Dropout, GlobalMaxPool1D, SpatialDropout1D
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import Callback
from keras.callbacks import ModelCheckpoint
import os

#### Setting up constants and hyperparameters

In [7]:
output_dir = '../model_files/conv/'
epochs = 4
batch_size = 128

#Hyperparameters
n_emb_size = 64
n_unique_words = 5000
n_words_to_skip = 50

max_review_length = 400
pad_type = trunc_type = 'pre'

n_dense = 64
dropout_emb = 0.2

# of filters
n_conv=256
#kernel size
k_conv = 3

#Dense layer architecture
n_dense = 256
dropout = 0.2

#### Load data

In [4]:
(X_train, y_train), (X_val, y_val) = imdb.load_data(num_words=n_unique_words)

#### Preprocess data

In [5]:
X_train = pad_sequences(X_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)
X_val = pad_sequences(X_val, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)

#### Designing Neural Network Architecture

In [8]:
model = Sequential()
model.add(Embedding(n_unique_words, n_emb_size, input_length=max_review_length))
model.add(SpatialDropout1D(dropout_emb))
model.add(Conv1D(n_conv, k_conv, activation='relu'))
model.add(GlobalMaxPool1D())
model.add(Dense(n_dense, activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(1, activation='sigmoid'))

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 400, 64)           320000    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 400, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 398, 256)          49408     
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total para

#### Configure Model

In [11]:
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
model_checkpoint = ModelCheckpoint(filepath=output_dir+"/weights.{epoch:02d}.hdf5")
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [14]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(X_val, y_val), \
         callbacks=[model_checkpoint])

Train on 25000 samples, validate on 25000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7feaeecb40b8>