In [2]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings("ignore")

import numpy as np
from sklearn import *
from lstm.imdb_lstm import *

import matplotlib.pyplot as plt
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Using TensorFlow backend.


# Load Dataset
We reload the dataset with the plain text plots and the labels that reef generated

In [3]:
dataset='imdb'

from data.loader import DataLoader
dl = DataLoader()
_, _, _, train_ground, val_ground, test_ground, train_text, val_text, test_text = dl.load_data(dataset=dataset)
train_reef = np.load('./data/imdb_reef.npy')

# Train an LSTM Model
We now train a simple LSTM model with the labels generated by Reef. The following hyperparameter search is simplistic, and a more fine-tuned search and a more complex model can improve performance!

__Note that this takes ~1 hour to run on CPU__

In [3]:
f1_all = []
pr_all = []
re_all = []
val_acc_all = []


bs_arr = [64,128,256]
n_epochs_arr = [5,10,25]

for bs in bs_arr:
    for n in n_epochs_arr:
        y_pred = lstm_simple(train_text, train_reef, val_text, val_ground, bs=bs, n=n)
        predictions = np.round(y_pred)
        
        val_acc_all.append(np.sum(predictions == val_ground)/float(np.shape(val_ground)[0]))
        f1_all.append(metrics.f1_score(val_ground, predictions))
        pr_all.append(metrics.precision_score(val_ground, predictions))
        re_all.append(metrics.recall_score(val_ground, predictions))

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           225536    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 278,837
Trainable params: 278,837
Non-trainable params: 0
_________________________________________________________________
None
Train on 1136 samples, validate on 284 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 76.06%
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
emb

Epoch 24/25
Epoch 25/25
Accuracy: 78.52%
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (None, 500, 32)           225536    
_________________________________________________________________
lstm_7 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 101       
Total params: 278,837
Trainable params: 278,837
Non-trainable params: 0
_________________________________________________________________
None
Train on 1136 samples, validate on 284 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 54.93%
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, 500, 32)           225536    
__________________________________________

### Validation Performance

In [4]:
ii,jj = np.unravel_index(np.argmax(f1_all), (3,3))
print 'Best Batch Size: ', bs_arr[ii]
print 'Best Epochs: ', n_epochs_arr[jj]

print 'Validation F1 Score: ', max(f1_all)
print 'Validation Best Pr: ', pr_all[np.argmax(f1_all)]
print 'Validation Best Re: ', re_all[np.argmax(f1_all)]

Best Batch Size:  128
Best Epochs:  5
Validation F1 Score:  0.7874564459930314
Validation Best Pr:  0.738562091503268
Validation Best Re:  0.8432835820895522


### Test Performance
We re-train the model with the best validation performance since we don't save weights for the models currently.

In [5]:
y_pred = lstm_simple(train_text, train_reef, test_text, test_ground, bs=bs_arr[ii], n=n_epochs_arr[jj])
predictions = np.round(y_pred)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (None, 500, 32)           245344    
_________________________________________________________________
lstm_10 (LSTM)               (None, 100)               53200     
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 101       
Total params: 298,645
Trainable params: 298,645
Non-trainable params: 0
_________________________________________________________________
None
Train on 1136 samples, validate on 500 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 80.00%


In [6]:
print 'Test F1 Score: ', metrics.f1_score(test_ground, predictions)
print 'Test Precision: ', metrics.precision_score(test_ground, predictions)
print 'Test Recall: ', metrics.recall_score(test_ground, predictions)

Test F1 Score:  0.7907949790794979
Test Precision:  0.7842323651452282
Test Recall:  0.7974683544303798


## [Optional] Ground Truth Performance
We can also train the same model with ground truth labels for the train set to see how far Reef labels are from the best possible performance.

In [8]:
y_pred = lstm_simple(train_text, train_ground, test_text, test_ground, bs=5, n=10)
predictions = np.round(y_pred)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 500, 32)           245344    
_________________________________________________________________
lstm_2 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 298,645
Trainable params: 298,645
Non-trainable params: 0
_________________________________________________________________
None
Train on 1136 samples, validate on 500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 85.00%


### Test Performance

In [9]:
print 'Test F1 Score: ', metrics.f1_score(test_ground, predictions)
print 'Test Precision: ', metrics.precision_score(test_ground, predictions)
print 'Test Recall: ', metrics.recall_score(test_ground, predictions)

Test F1 Score:  0.8508946322067595
Test Precision:  0.8045112781954887
Test Recall:  0.9029535864978903
