In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import SimpleRNN, CuDNNLSTM, CuDNNGRU, Activation, Dense, Dropout, Input, Embedding
from keras.optimizers import Adam, SGD, RMSprop
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [2]:
dataset = pd.read_csv('data/train.csv')
dataset.head()
dataset.shape

(20800, 5)

In [3]:
nan_indexes = dataset.loc[pd.isna(dataset["text"]), :].index
dataset = dataset.drop(nan_indexes)
dataset.shape

(20761, 5)

In [4]:
X = dataset.text
y = dataset.label

In [5]:
max_words = 2000
max_len = 1000
tok = Tokenizer(num_words=max_words)
tok.fit_on_texts(X)
sequences = tok.texts_to_sequences(X)
sequences_matrix = sequence.pad_sequences(sequences,maxlen=max_len)

In [17]:
from keras.callbacks import ModelCheckpoint
import keras as K

def train_model(model_id, model, optimizer, epochs, X_train, y_train, validation_split, batch_size):

    model.summary()
    
    checkpoint = ModelCheckpoint('NNs/' + model_id + '-{epoch:03d}.h5', verbose=1, monitor='val_loss',save_best_only=True, mode='auto')  
    
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    tb_callback = K.callbacks.TensorBoard(log_dir='./new_logs/nn_' + str(model_id))

    hist = model.fit(X_train, y_train, validation_split=validation_split, epochs=epochs, batch_size=batch_size, callbacks=[tb_callback, checkpoint])
    
    return hist

In [7]:
def LSTM13_network():
    inputs = Input(name='inputs',shape=[max_len])
    layer = Embedding(max_words,50,input_length=max_len)(inputs)
    layer = CuDNNLSTM(64)(layer)
    layer = Dense(196,name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.2)(layer)
    layer = Dense(1,name='out_layer')(layer)
    layer = Activation('sigmoid')(layer)
    model = Model(inputs=inputs,outputs=layer)
    return model

In [8]:
model = LSTM13_network()
train_model('LSTM12 - 2000 words', model, RMSprop(), 10, sequences_matrix, y, 0.2, 128)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1000)              0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 1000, 50)          100000    
_________________________________________________________________
cu_dnnlstm_1 (CuDNNLSTM)     (None, 64)                29696     
_________________________________________________________________
FC1 (Dense)                  (None, 196)               12740     
_________________________________________________________________
activation_1 (Activation)    (None, 196)               0         
_________________________________________________________________
dropout_1 (Dropout)  

<keras.callbacks.History at 0x7f55243304e0>

In [9]:
max_words = 1000
max_len = 1000
tok = Tokenizer(num_words=max_words)
tok.fit_on_texts(X)
sequences = tok.texts_to_sequences(X)
sequences_matrix = sequence.pad_sequences(sequences,maxlen=max_len)

In [10]:
model = LSTM13_network()
train_model('LSTM12 - 2000 words', model, RMSprop(), 10, sequences_matrix, y, 0.2, 128)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1000)              0         
_________________________________________________________________
embedding_2 (Embedding)      (None, 1000, 50)          50000     
_________________________________________________________________
cu_dnnlstm_2 (CuDNNLSTM)     (None, 64)                29696     
_________________________________________________________________
FC1 (Dense)                  (None, 196)               12740     
_________________________________________________________________
activation_3 (Activation)    (None, 196)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 196)               0         
_________________________________________________________________
out_layer (Dense)            (None, 1)                 197       
__________

<keras.callbacks.History at 0x7f55418d75f8>

In [11]:
max_words = 500
max_len = 1000
tok = Tokenizer(num_words=max_words)
tok.fit_on_texts(X)
sequences = tok.texts_to_sequences(X)
sequences_matrix = sequence.pad_sequences(sequences,maxlen=max_len)

In [12]:
model = LSTM13_network()
train_model('LSTM13 - 500 words', model, RMSprop(), 10, sequences_matrix, y, 0.2, 128)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1000)              0         
_________________________________________________________________
embedding_3 (Embedding)      (None, 1000, 50)          25000     
_________________________________________________________________
cu_dnnlstm_3 (CuDNNLSTM)     (None, 64)                29696     
_________________________________________________________________
FC1 (Dense)                  (None, 196)               12740     
_________________________________________________________________
activation_5 (Activation)    (None, 196)               0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 196)               0         
_________________________________________________________________
out_layer (Dense)            (None, 1)                 197       
__________

<keras.callbacks.History at 0x7f5541b39278>

In [13]:
def LSTM13_network():
    inputs = Input(name='inputs',shape=[max_len])
    layer = Embedding(max_words,25,input_length=max_len)(inputs)
    layer = CuDNNLSTM(64)(layer)
    layer = Dense(196,name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.2)(layer)
    layer = Dense(1,name='out_layer')(layer)
    layer = Activation('sigmoid')(layer)
    model = Model(inputs=inputs,outputs=layer)
    return model

In [14]:
model = LSTM13_network()
train_model('LSTM13 - 25 w2v', model, RMSprop(), 10, sequences_matrix, y, 0.2, 128)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1000)              0         
_________________________________________________________________
embedding_4 (Embedding)      (None, 1000, 25)          12500     
_________________________________________________________________
cu_dnnlstm_4 (CuDNNLSTM)     (None, 64)                23296     
_________________________________________________________________
FC1 (Dense)                  (None, 196)               12740     
_________________________________________________________________
activation_7 (Activation)    (None, 196)               0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 196)               0         
_________________________________________________________________
out_layer (Dense)            (None, 1)                 197       
__________

<keras.callbacks.History at 0x7f553eb99f28>

In [15]:
def LSTM13_network():
    inputs = Input(name='inputs',shape=[max_len])
    layer = Embedding(max_words,50,input_length=max_len)(inputs)
    layer = CuDNNLSTM(64)(layer)
    layer = Dense(196,name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.2)(layer)
    layer = Dense(1,name='out_layer')(layer)
    layer = Activation('sigmoid')(layer)
    model = Model(inputs=inputs,outputs=layer)
    return model

In [21]:
model = LSTM13_network()
train_model('LSTM13 - SGD', model, SGD(0.1), 10, sequences_matrix, y, 0.2, 128)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1000)              0         
_________________________________________________________________
embedding_9 (Embedding)      (None, 1000, 75)          37500     
_________________________________________________________________
cu_dnnlstm_9 (CuDNNLSTM)     (None, 64)                36096     
_________________________________________________________________
FC1 (Dense)                  (None, 196)               12740     
_________________________________________________________________
activation_17 (Activation)   (None, 196)               0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 196)               0         
_________________________________________________________________
out_layer (Dense)            (None, 1)                 197       
__________

<keras.callbacks.History at 0x7f5510300780>

In [22]:
model = LSTM13_network()
train_model('LSTM13 - Adam', model, Adam(), 10, sequences_matrix, y, 0.2, 128)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1000)              0         
_________________________________________________________________
embedding_10 (Embedding)     (None, 1000, 75)          37500     
_________________________________________________________________
cu_dnnlstm_10 (CuDNNLSTM)    (None, 64)                36096     
_________________________________________________________________
FC1 (Dense)                  (None, 196)               12740     
_________________________________________________________________
activation_19 (Activation)   (None, 196)               0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 196)               0         
_________________________________________________________________
out_layer (Dense)            (None, 1)                 197       
__________

<keras.callbacks.History at 0x7f54c07bae80>

In [23]:
model = LSTM13_network()
train_model('LSTM13 - RMSprop', model, RMSprop(), 10, sequences_matrix, y, 0.2, 128)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1000)              0         
_________________________________________________________________
embedding_11 (Embedding)     (None, 1000, 75)          37500     
_________________________________________________________________
cu_dnnlstm_11 (CuDNNLSTM)    (None, 64)                36096     
_________________________________________________________________
FC1 (Dense)                  (None, 196)               12740     
_________________________________________________________________
activation_21 (Activation)   (None, 196)               0         
_________________________________________________________________
dropout_11 (Dropout)         (None, 196)               0         
_________________________________________________________________
out_layer (Dense)            (None, 1)                 197       
__________

<keras.callbacks.History at 0x7f5484719f98>