# Model Evaluation Example

An example notebook on how to evaluate a model on new data

### Model Parameters

In [5]:
MODEL_VERSION = 'wiki_tox_labels_v0'
MODEL_DIR = '../models/'

# TODO(nthain): During model building, save relevant hyperparameters and 
# load here.
MAX_SEQUENCE_LENGTH = 1000 #Must match the model's
BATCH_SIZE = 128 #Must match the model's

### Load model and tokenizer

In [18]:
from keras.models import load_model
import cPickle
import os

In [20]:
tokenizer = cPickle.load(open(os.path.join(MODEL_DIR, '%s_tokenizer.pkl' % MODEL_VERSION), 'rb'))
model = load_model(os.path.join(MODEL_DIR, '%s_model.h5' % MODEL_VERSION))

### Load data into a numpy array

In [9]:
import numpy as np

# Load the new data as a numpy array somehow
dummy_text = np.array(['This is a fucking toxic sentence.', 'This is a happy little sentence.'])
dummy_labels = np.array([True, False])

### Tokenize the dummy data

In [10]:
# TODO(nthain): Wrap this into a function

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

dummy_sequences = tokenizer.texts_to_sequences(dummy_text)
dummy_data = pad_sequences(dummy_sequences, maxlen = MAX_SEQUENCE_LENGTH)
dummy_labels = to_categorical(dummy_labels)

### Use the model to predict on the dummy data

In [11]:
dummy_preds = model.predict(dummy_data, batch_size=BATCH_SIZE)

### Evaluate AUC

In [15]:
from sklearn import metrics

def compute_auc(y_true, y_pred):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)
    return metrics.auc(fpr, tpr)

In [16]:
dummy_auc = compute_auc(dummy_labels[:,1], dummy_preds[:,1])
print('The model achieves an AUC of %.3f.' % dummy_auc)

The model achieves an AUC of 1.000.
