In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import time
from tensorflow import set_random_seed

np.random.seed(13)
set_random_seed(13)

from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, Conv1D
from keras.layers import Bidirectional, GlobalMaxPool1D
from keras.models import Model
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.wrappers.scikit_learn import KerasClassifier
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split
from wordcloud import WordCloud
from xml.sax import ContentHandler, parse
from zipfile import ZipFile

stemmer = SnowballStemmer('english', ignore_stopwords=True)
stop = set(stopwords.words('english'))

%matplotlib inline
sns.set(rc={'figure.figsize':(11.7,8.27)})

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
%%time

# Reference https://goo.gl/KaOBG3
class ExcelHandler(ContentHandler):
    def __init__(self):
        self.chars = [  ]
        self.cells = [  ]
        self.rows = [  ]
        self.tables = [  ]
    def characters(self, content):
        self.chars.append(content)
    def startElement(self, name, atts):
        if name=="Cell":
            self.chars = [  ]
        elif name=="Row":
            self.cells=[  ]
        elif name=="Table":
            self.rows = [  ]
    def endElement(self, name):
        if name=="Cell":
            self.cells.append(''.join(self.chars))
        elif name=="Row":
            self.rows.append(self.cells)
        elif name=="Table":
            self.tables.append(self.rows)

excelHandler = ExcelHandler()
parse('features.xls', excelHandler)
features = pd.DataFrame(excelHandler.tables[0][1:], columns=excelHandler.tables[0][0])
# objective = 0, subjective = 1
y = np.where(features['Label'] == 'objective', 0, 1)

texts = []
preprocessed_texts = []

for i in range(1, 1001):
    if i // 10 == 0:
        number = '000' + str(i)
    elif i // 100 == 0:
        number = '00' + str(i)
    elif i // 1000 == 0:
        number = '0' + str(i)
    else:
        number = '1000'
    
    f = open('Raw_data/Text' + number + '.txt', 'r', encoding='latin-1')
    text = f.read()
    preprocessed_text = ' '.join([stemmer.stem(w) for w in word_tokenize(text) if (w.isalpha() and w not in stop)])
    texts.append(text)
    preprocessed_texts.append(preprocessed_text)

data = pd.DataFrame({'texts': np.array(texts), 'preprocessed_texts': np.array(preprocessed_texts), 'label': y})
objective_texts = ' '.join(data[data['label'] == 0]['preprocessed_texts'].tolist())
subjective_texts = ' '.join(data[data['label'] == 1]['preprocessed_texts'].tolist())

Wall time: 8.42 s


In [3]:
X_train, X_test, y_train, y_test = train_test_split(np.array(preprocessed_texts), y, random_state=13, stratify=y)

Preparing embeddings in order to train deep learning models (we used pre-trained [Glove](https://nlp.stanford.edu/projects/glove/) embeddings based on 6B tokens).

In [4]:
%%time

embed_size = 50 # how big is each word vector
max_features = 10000 # how many unique words to use (i.e num rows in embedding vector)
maxlen = 200 # max number of words in text to use

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(X_train)
tokenized_train = tokenizer.texts_to_sequences(X_train)
tokenized_test = tokenizer.texts_to_sequences(X_test)
X_t = pad_sequences(tokenized_train, maxlen=maxlen)
X_te = pad_sequences(tokenized_test, maxlen=maxlen)

def get_coefs(word, *arr):
    return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.strip().split()) for o in ZipFile('glove.6B.zip').open('glove.6B.50d.txt'))
#embeddings_index = dict(get_coefs(*o.strip().split()) for o in ZipFile('glove.840B.300d.zip').open('glove.840B.300d.txt'))

all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

Wall time: 7.05 s


# LSTM: Tuning

Hyperparameters tuning for the deep learning model - in case of texts, we used RNN. We chose LSTM and didn't experiment with GRU at all, because in the papers it is mentioned in the papers [[1]](https://arxiv.org/pdf/1412.3555.pdf) and [[2]](http://proceedings.mlr.press/v37/jozefowicz15.pdf) that they are quite equivalent in terms on quality. So we chose LSTM to work with, and tuned several hyperparameters.

_Note. There is an [issue](https://github.com/keras-team/keras/issues/2280) with random seeds in keras that it may be hard to reproduce the exact same results, and this is unfortunate, because there are lots of parameters in LSTM which are initialized randomly. It means that if you re-run the code, some results may probably change insignificantly. Also take into the consideration that it took a long time to run some of the cells (the time was printed out using `%%time`)._

### batch_size, epochs

In [5]:
def create_model():
    inp = Input(shape=(maxlen,))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
    x = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x)
    x = GlobalMaxPool1D()(x)
    x = Dropout(0.1)(x)
    x = Dense(50, activation="relu")(x)
    x = Dropout(0.1)(x)
    x = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [6]:
%%time

kf = StratifiedKFold(n_splits=5, random_state=13)

model = KerasClassifier(build_fn=create_model, verbose=1)
# define the grid search parameters
batch_size = [10, 20, 40, 60]
epochs = [2, 5]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, verbose=2)
grid_result = grid.fit(X_t, y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV] batch_size=10, epochs=2 .........................................
Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=10, epochs=2, total=  58.6s
[CV] batch_size=10, epochs=2 .........................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.1min remaining:    0.0s


Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=10, epochs=2, total=  58.3s
[CV] batch_size=10, epochs=2 .........................................
Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=10, epochs=2, total=  57.8s
[CV] batch_size=10, epochs=2 .........................................
Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=10, epochs=2, total=  58.5s
[CV] batch_size=10, epochs=2 .........................................
Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=10, epochs=2, total= 1.0min
[CV] batch_size=10, epochs=5 .........................................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] .......................... batch_size=10, epochs=5, total= 2.3min
[CV] batch_size=10, epochs=5 .........................................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] .......................... batch_size=10, epochs=5, total= 2.3min
[CV] batch_size=10, epochs=5 ..........

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] .......................... batch_size=40, epochs=5, total=  55.9s
[CV] batch_size=60, epochs=2 .........................................
Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=60, epochs=2, total=  29.9s
[CV] batch_size=60, epochs=2 .........................................
Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=60, epochs=2, total=  30.4s
[CV] batch_size=60, epochs=2 .........................................
Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=60, epochs=2, total=  30.9s
[CV] batch_size=60, epochs=2 .........................................
Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=60, epochs=2, total=  31.6s
[CV] batch_size=60, epochs=2 .........................................
Epoch 1/2
Epoch 2/2
[CV] .......................... batch_size=60, epochs=2, total=  34.2s
[CV] batch_size=60, epochs=5 ........................................

[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed: 42.1min finished


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Best: 0.737333 using {'batch_size': 20, 'epochs': 5}
0.684000 (0.032411) with: {'batch_size': 10, 'epochs': 2}
0.737333 (0.042145) with: {'batch_size': 10, 'epochs': 5}
0.625333 (0.019399) with: {'batch_size': 20, 'epochs': 2}
0.737333 (0.030736) with: {'batch_size': 20, 'epochs': 5}
0.634667 (0.001732) with: {'batch_size': 40, 'epochs': 2}
0.680000 (0.019572) with: {'batch_size': 40, 'epochs': 5}
0.634667 (0.001732) with: {'batch_size': 60, 'epochs': 2}
0.686667 (0.025681) with: {'batch_size': 60, 'epochs': 5}
Wall time: 43min 53s


In [19]:
%%time

np.random.seed(13)
set_random_seed(13)

inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
x = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x)
x = GlobalMaxPool1D()(x)
x = Dropout(0.1)(x)
x = Dense(50, activation="relu")(x)
x = Dropout(0.1)(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print('Fitting model...')
model.fit(X_t, y_train, batch_size=20, epochs=5);
prediction = model.predict([X_te], batch_size=1024, verbose=1)
print(roc_auc_score(y_test, prediction))

Fitting model...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.8209275001727832
Wall time: 1min 56s


### dropout

In [24]:
%%time

np.random.seed(13)
set_random_seed(13)

def create_model(dropout_rate=0.0, recurr_dropout=0.0):
    inp = Input(shape=(maxlen,))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
    x = Bidirectional(LSTM(50, return_sequences=True, dropout=recurr_dropout, recurrent_dropout=recurr_dropout))(x)
    x = GlobalMaxPool1D()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(50, activation="relu")(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

kf = StratifiedKFold(n_splits=5, random_state=13)

model = KerasClassifier(build_fn=create_model, epochs=5, batch_size=20, verbose=1)
# define the grid search parameters
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4]
recurr_dropout = [0.0, 0.1, 0.2]
param_grid = dict(dropout_rate=dropout_rate, recurr_dropout=recurr_dropout)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, verbose=2)
grid_result = grid.fit(X_t, y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV] dropout_rate=0.0, recurr_dropout=0.0 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.0, recurr_dropout=0.0, total= 1.5min
[CV] dropout_rate=0.0, recurr_dropout=0.0 ............................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.6min remaining:    0.0s


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.0, recurr_dropout=0.0, total= 1.6min
[CV] dropout_rate=0.0, recurr_dropout=0.0 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.0, recurr_dropout=0.0, total= 1.6min
[CV] dropout_rate=0.0, recurr_dropout=0.0 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.0, recurr_dropout=0.0, total= 1.6min
[CV] dropout_rate=0.0, recurr_dropout=0.0 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.0, recurr_dropout=0.0, total= 1.6min
[CV] dropout_rate=0.0, recurr_dropout=0.1 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.0, recurr_dropout=0.1, total= 1.8min
[CV] dropout_rate=0.0, recurr_dropout=0.1 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5


Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.1, recurr_dropout=0.1, total= 2.1min
[CV] dropout_rate=0.1, recurr_dropout=0.1 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.1, recurr_dropout=0.1, total= 2.1min
[CV] dropout_rate=0.1, recurr_dropout=0.2 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.1, recurr_dropout=0.2, total= 2.2min
[CV] dropout_rate=0.1, recurr_dropout=0.2 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.1, recurr_dropout=0.2, total= 2.2min
[CV] dropout_rate=0.1, recurr_dropout=0.2 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.1, recurr_dropout=0.2, total= 2.2min
[CV] dropout_rate=0.1, recurr_dropout=0.2 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. d

[CV] ............. dropout_rate=0.3, recurr_dropout=0.0, total= 2.3min
[CV] dropout_rate=0.3, recurr_dropout=0.0 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.3, recurr_dropout=0.0, total= 2.3min
[CV] dropout_rate=0.3, recurr_dropout=0.0 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.3, recurr_dropout=0.0, total= 2.3min
[CV] dropout_rate=0.3, recurr_dropout=0.0 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.3, recurr_dropout=0.0, total= 2.4min
[CV] dropout_rate=0.3, recurr_dropout=0.0 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.3, recurr_dropout=0.0, total= 2.4min
[CV] dropout_rate=0.3, recurr_dropout=0.1 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.3, rec

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.4, recurr_dropout=0.1, total= 3.2min
[CV] dropout_rate=0.4, recurr_dropout=0.1 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.4, recurr_dropout=0.1, total= 3.3min
[CV] dropout_rate=0.4, recurr_dropout=0.2 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.4, recurr_dropout=0.2, total= 3.3min
[CV] dropout_rate=0.4, recurr_dropout=0.2 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.4, recurr_dropout=0.2, total= 3.4min
[CV] dropout_rate=0.4, recurr_dropout=0.2 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] ............. dropout_rate=0.4, recurr_dropout=0.2, total= 3.3min
[CV] dropout_rate=0.4, recurr_dropout=0.2 ............................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5


[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed: 183.0min finished


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Best: 0.734667 using {'dropout_rate': 0.0, 'recurr_dropout': 0.1}
0.729333 (0.017455) with: {'dropout_rate': 0.0, 'recurr_dropout': 0.0}
0.734667 (0.018573) with: {'dropout_rate': 0.0, 'recurr_dropout': 0.1}
0.705333 (0.051343) with: {'dropout_rate': 0.0, 'recurr_dropout': 0.2}
0.698667 (0.025588) with: {'dropout_rate': 0.1, 'recurr_dropout': 0.0}
0.705333 (0.029960) with: {'dropout_rate': 0.1, 'recurr_dropout': 0.1}
0.666667 (0.034664) with: {'dropout_rate': 0.1, 'recurr_dropout': 0.2}
0.698667 (0.034723) with: {'dropout_rate': 0.2, 'recurr_dropout': 0.0}
0.686667 (0.038618) with: {'dropout_rate': 0.2, 'recurr_dropout': 0.1}
0.676000 (0.025641) with: {'dropout_rate': 0.2, 'recurr_dropout': 0.2}
0.670667 (0.021658) with: {'dropout_rate': 0.3, 'recurr_dropout': 0.0}
0.696000 (0.033378) with: {'dropout_rate': 0.3, 'recurr_dropout': 0.1}
0.664000 (0.024146) with: {'dropout_rate': 0.3, 'recurr_dropout': 0.2}
0.656000 (0.019029) with: {'drop

In [25]:
%%time

np.random.seed(13)
set_random_seed(13)

inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
x = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x)
x = GlobalMaxPool1D()(x)
#x = Dropout(0.1)(x)
x = Dense(50, activation="relu")(x)
#x = Dropout(0.1)(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print('Fitting model...')
model.fit(X_t, y_train, batch_size=20, epochs=5);
#model.save('lstm_binary.h5')
prediction = model.predict([X_te], batch_size=1024, verbose=1)
print(roc_auc_score(y_test, prediction))

Fitting model...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.8241067108991638
Wall time: 4min 1s


### activation

In [6]:
%%time

np.random.seed(13)
set_random_seed(13)

def create_model(activation='relu'):
    inp = Input(shape=(maxlen,))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
    x = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x)
    x = GlobalMaxPool1D()(x)
    x = Dense(50, activation=activation)(x)
    x = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

kf = StratifiedKFold(n_splits=5, random_state=13)

model = KerasClassifier(build_fn=create_model, epochs=5, batch_size=20, verbose=1)
# define the grid search parameters
activation = ['relu', 'tanh', 'sigmoid', 'linear']
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, verbose=2)
grid_result = grid.fit(X_t, y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] activation=relu .................................................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] .................................. activation=relu, total= 1.2min
[CV] activation=relu .................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.2min remaining:    0.0s


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] .................................. activation=relu, total= 1.2min
[CV] activation=relu .................................................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] .................................. activation=relu, total= 1.2min
[CV] activation=relu .................................................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] .................................. activation=relu, total= 1.2min
[CV] activation=relu .................................................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] .................................. activation=relu, total= 1.2min
[CV] activation=tanh .................................................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] .................................. activation=tanh, total= 1.2min
[CV] activation=tanh .................................................
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5


[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed: 26.6min finished


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Best: 0.718667 using {'activation': 'tanh'}
0.692000 (0.057158) with: {'activation': 'relu'}
0.718667 (0.035008) with: {'activation': 'tanh'}
0.709333 (0.049749) with: {'activation': 'sigmoid'}
0.717333 (0.031353) with: {'activation': 'linear'}
Wall time: 28min 12s


In [19]:
%%time

np.random.seed(13)
set_random_seed(13)

inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
x = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x)
x = GlobalMaxPool1D()(x)
x = Dense(50, activation="tanh")(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print('Fitting model...')
model.fit(X_t, y_train, batch_size=20, epochs=5);
model.save('lstm_model.h5')
prediction = model.predict([X_te], batch_size=1024, verbose=1)
print(roc_auc_score(y_test, prediction))

Fitting model...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.8268712419655815
Wall time: 1min 43s


## Adding a convolutional layer

Motivated by [3](https://arxiv.org/pdf/1704.06125.pdf) and [4](https://www.academia.edu/35947062/Twitter_Sentiment_Analysis_using_combined_LSTM-CNN_Models), we also experimented with adding the convolutional layer on the top of LSTM, and as a result it performed better than the single LSTM.

In [7]:
%%time

np.random.seed(13)
set_random_seed(13)

inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
x = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x)
x = Conv1D(50, kernel_size=3, padding='valid', kernel_initializer='glorot_uniform')(x)
x = GlobalMaxPool1D()(x)
x = Dense(50, activation="tanh")(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print('Fitting model...')
model.fit(X_t, y_train, batch_size=20, epochs=5);
prediction = model.predict([X_te], batch_size=1024, verbose=1)
print(roc_auc_score(y_test, prediction))

Fitting model...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.8460156196005253
Wall time: 1min 31s


### init

In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 500)               0         
_________________________________________________________________
embedding_3 (Embedding)      (None, 500, 50)           500000    
_________________________________________________________________
bidirectional_3 (Bidirection (None, 500, 100)          40400     
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 498, 50)           15050     
_________________________________________________________________
global_max_pooling1d_3 (Glob (None, 50)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 51        
Total para