In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import getpass
from IPython.display import display
import json
import nltk
import numpy as np
import pandas as pd
import pkg_resources
import os
import random
import re
import seaborn as sns
import sklearn.metrics as metrics

import tensorflow as tf
from tensorflow.python.lib.io import file_io

In [4]:
from utils_export.dataset import Dataset, Model
from utils_export import utils_cloudml
from utils_export import utils_tfrecords

In [5]:
# Faster to access GCS file:
# https://github.com/tensorflow/tensorflow/issues/15530
os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0'

In [6]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /usr/local/google/home/msushkov/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [7]:
def tokenizer(text, lowercase=True):
  """Converts text to a list of words.

  Args:
    text: piece of text to tokenize (string).
    lowercase: whether to include lowercasing in preprocessing (bool).

  Returns:
    A list of strings (words).
  """
  words = nltk.word_tokenize(text.decode('utf-8'))
  if lowercase:
    words = [w.lower() for w in words]
  return words

In [8]:
def make_test_input_fn(dataset_path,
                       model_text_feature,
                       dataset_text_feature,
                       data_label,
                       tokenizer_fn,
                       label_data_type=tf.float32,
                       max_n_examples=None,
                       random_filter_keep_rate=1.0):
    """Returns a test input function.
    
    Args:
      dataset_path (str): Path to dataset.
      model_text_feature (str): The feature column corresponding to the
        text input the model expects.
      dataset_text_feature (str): The name of the text feature of the dataset.
      data_label (str): The output label for the dataset.
      tokenizer_fn: Tokenizer function (str -> list).
      max_n_examples (int): How many examples to evaluate on.
      random_filter_keep_rate (float): Filter out test examples with this probability.

    Returns:
      Test input function.
    """
    decoding_input_features = {
      dataset_text_feature: tf.FixedLenFeature([], dtype=tf.string),
      data_label: tf.FixedLenFeature([], dtype=label_data_type)
    }

    def test_input_fn(max_n_examples=max_n_examples,
                      random_filter_keep_rate=random_filter_keep_rate):
        """Test input function.
        
        Args:
          max_n_examples (int): How many examples to evaluate on.
          random_filter_keep_rate (float): Filter out test examples with this probability.
          
        Returns:
          DataFrame with the results.
        """
        res = utils_tfrecords.decode_tf_records_to_pandas(
            decoding_input_features,
            dataset_path,
            max_n_examples,
            random_filter_keep_rate)
        if not tokenizer_fn:
            tok = lambda x: [x]
            res[model_text_feature] = list(map(tok, res[dataset_text_feature]))
        else:
            res[model_text_feature] = list(map(tokenizer_fn, res[dataset_text_feature]))
        res = res.rename(columns={ data_label: 'label' })
        res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))
        final = res.copy(deep=True)
        return final

    return test_input_fn

In [23]:
def print_results(results_df, model_names):
    """Print the classification results.
    
    Args:
      results_df: DataFrame with the results.
      model_names: List of strings representing the models for which we have results.
    """
    labels = results_df['label']
    for _model in model_names:
        print(_model)
        model_preds = results_df[_model]
        fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)
        roc_auc = metrics.auc(fpr, tpr)
        recalls, precisions, thr = metrics.precision_recall_curve(labels, model_preds)
        pr_auc = metrics.auc(precisions, recalls)
        model_preds_binary = (model_preds > 0.5).astype(np.int_)
        f1 = metrics.f1_score(labels, model_preds_binary)
        print('\tROC AUC: {}'.format(roc_auc))
        print('\tPR AUC: {}'.format(pr_auc))
        print('\tF1: {}'.format(f1))

In [10]:
PROJECT_NAME = 'conversationai-models'
SENTENCE_KEY = 'comment_key' #Input key

# Pattern for path of tf_records
OUTPUT_DIR_BASE = os.path.join(
    'gs://conversationai-models',
    getpass.getuser(),
    'tfrecords')

## Evaluate models on Civil Comments dataset

In [175]:
LABEL_NAME_PREDICTION_MODEL = 'toxicity/logistic'
DATASET = 'gs://conversationai-models/resources/civil_comments_data/train_eval_test/test-*.tfrecord'
DATA_LABEL = 'toxicity'
DATASET_TEXT_FEATURE='comment_text'

# Pattern for path of tf_records
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'civil_comments_test')

### CNN, GRU Attention Models

In [140]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
    'tf_cnn_civil_comments_glove:v_20190219_185541',
    'tf_gru_attention_civil_comments_glove:v_20190219_185619',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer)

In [141]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)



INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:Loaded 100000 lines.
INFO:tensorflow:Loaded 200000 lines.
INFO:tensorflow:End of file.


In [142]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:TF-Records already exist - overwriting them.
INFO:tensorflow:Preparing train data: 0/242144
INFO:tensorflow:Preparing train data: 10000/242144
INFO:tensorflow:Preparing train data: 20000/242144
INFO:tensorflow:Preparing train data: 30000/242144
INFO:tensorflow:Preparing train data: 40000/242144
INFO:tensorflow:Preparing train data: 50000/242144
INFO:tensorflow:Preparing train data: 60000/242144
INFO:tensorflow:Preparing train data: 70000/242144
INFO:tensorflow:Preparing train data: 80000/242144
INFO:tensorflow:Preparing train data: 90000/242144
INFO:tensorflow:Preparing train data: 100000/242144
INFO:tensorflow:Preparing train data: 110000/242144
INFO:tensorflow:Preparing train data: 120000/242144
INFO:tensorflow:Preparing train data: 130000/242144
INFO:tensorflow:Preparing train data: 140000/242144
INFO:tensorflow:Preparing train data: 150000/242144
INFO:tensorflow:Preparin



INFO:tensorflow:state : QUEUED




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 20
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 22
INFO:tensorflow:Prediction job completed.




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Prediction job completed.


In [143]:
civil_comments_test_df = test_dataset.show_data()

In [144]:
print_results(civil_comments_test_df, MODEL_NAMES)

tf_cnn_civil_comments_glove:v_20190219_185541
	ROC AUC: 0.9573435242534393
	PR AUC: 0.6729934425219886
tf_gru_attention_civil_comments_glove:v_20190219_185619
	ROC AUC: 0.9649161132104584
	PR AUC: 0.7486011745102973


### TF-Hub Model

In [189]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
    'tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None)

In [190]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:Loaded 100000 lines.
INFO:tensorflow:Loaded 200000 lines.
INFO:tensorflow:End of file.


In [192]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:TF-Records already exist - overwriting them.
INFO:tensorflow:Preparing train data: 0/242144
INFO:tensorflow:Preparing train data: 10000/242144
INFO:tensorflow:Preparing train data: 20000/242144
INFO:tensorflow:Preparing train data: 30000/242144
INFO:tensorflow:Preparing train data: 40000/242144
INFO:tensorflow:Preparing train data: 50000/242144
INFO:tensorflow:Preparing train data: 60000/242144
INFO:tensorflow:Preparing train data: 70000/242144
INFO:tensorflow:Preparing train data: 80000/242144
INFO:tensorflow:Preparing train data: 90000/242144
INFO:tensorflow:Preparing train data: 100000/242144
INFO:tensorflow:Preparing train data: 110000/242144
INFO:tensorflow:Preparing train data: 120000/242144
INFO:tensorflow:Preparing train data: 130000/242144
INFO:tensorflow:Preparing train data: 140000/242144
INFO:tensorflow:Preparing train data: 150000/242144
INFO:tensorflow:Preparin



INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 20
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 22
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 25
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 27
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 30
INFO:tensorflow:

INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 269
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 271
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 274
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 276
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 279
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 281
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 284
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 286
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 289
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 291
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 294
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 296
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 299

INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 538
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 540
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 543
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 545
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 548
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 550
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 553
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 555
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 558
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 560
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 563
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 565
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 568

INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 807
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 809
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 812
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 814
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 817
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 819
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 822
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 824
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 827
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 829
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 832
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 834
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 837

In [193]:
civil_comments_hub_df = test_dataset.show_data()

In [194]:
print_results(civil_comments_hub_df, MODEL_NAMES)

tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552
	ROC AUC: 0.9595451744696132
	PR AUC: 0.7429338592289392


## Evaluate models on Toxicity dataset

In [195]:
LABEL_NAME_PREDICTION_MODEL = 'frac_neg/logistic'
DATASET = 'gs://conversationai-models/resources/toxicity_data/toxicity_q42017_test.tfrecord'
DATA_LABEL = 'frac_neg'
DATASET_TEXT_FEATURE='comment_text'

# Pattern for path of tf_records
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'toxicity_test')

### CNN, GRU Attention Models

In [162]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
    'tf_cnn_toxicity_glove:v_20190219_185532',
    'tf_gru_attention_toxicity_glove:v_20190219_185516',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer)

In [163]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)



INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:Loaded 100000 lines.
INFO:tensorflow:End of file.


In [148]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:Preparing train data: 0/187681
INFO:tensorflow:Preparing train data: 10000/187681
INFO:tensorflow:Preparing train data: 20000/187681
INFO:tensorflow:Preparing train data: 30000/187681
INFO:tensorflow:Preparing train data: 40000/187681
INFO:tensorflow:Preparing train data: 50000/187681
INFO:tensorflow:Preparing train data: 60000/187681
INFO:tensorflow:Preparing train data: 70000/187681
INFO:tensorflow:Preparing train data: 80000/187681
INFO:tensorflow:Preparing train data: 90000/187681
INFO:tensorflow:Preparing train data: 100000/187681
INFO:tensorflow:Preparing train data: 110000/187681
INFO:tensorflow:Preparing train data: 120000/187681
INFO:tensorflow:Preparing train data: 130000/187681
INFO:tensorflow:Preparing train data: 140000/187681
INFO:tensorflow:Preparing train data: 150000/187681
INFO:tensorflow:Preparing train data: 160000/187681
INFO:tensorflow:Preparing train d



INFO:tensorflow:state : QUEUED




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 20
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 22
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 25
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 27
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 30
INFO:tensorflow:



INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Prediction job completed.


In [149]:
toxicity_test_df1 = test_dataset.show_data()

In [150]:
print_results(toxicity_test_df1, MODEL_NAMES)

tf_cnn_toxicity_glove:v_20190219_185532
	ROC AUC: 0.951760553925346
	PR AUC: 0.8740274773143215
tf_gru_attention_toxicity_glove:v_20190219_185516
	ROC AUC: 0.9543916575133977
	PR AUC: 0.8814208812923074


### TF-Hub Model

In [196]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
    'tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None)

In [197]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)



INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:Loaded 100000 lines.
INFO:tensorflow:End of file.


In [199]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:TF-Records already exist - overwriting them.
INFO:tensorflow:Preparing train data: 0/187681
INFO:tensorflow:Preparing train data: 10000/187681
INFO:tensorflow:Preparing train data: 20000/187681
INFO:tensorflow:Preparing train data: 30000/187681
INFO:tensorflow:Preparing train data: 40000/187681
INFO:tensorflow:Preparing train data: 50000/187681
INFO:tensorflow:Preparing train data: 60000/187681
INFO:tensorflow:Preparing train data: 70000/187681
INFO:tensorflow:Preparing train data: 80000/187681
INFO:tensorflow:Preparing train data: 90000/187681
INFO:tensorflow:Preparing train data: 100000/187681
INFO:tensorflow:Preparing train data: 110000/187681
INFO:tensorflow:Preparing train data: 120000/187681
INFO:tensorflow:Preparing train data: 130000/187681
INFO:tensorflow:Preparing train data: 140000/187681
INFO:tensorflow:Preparing train data: 150000/187681
INFO:tensorflow:Preparin



INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 20
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 22
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 25
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 27
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 30
INFO:tensorflow:

INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 269
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 271
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 274
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 276
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 279
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 281
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 284
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 286
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 289
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 291
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 294
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 296
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 299

In [200]:
toxicity_test_df2 = test_dataset.show_data()

In [201]:
print_results(toxicity_test_df2, MODEL_NAMES)

tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427
	ROC AUC: 0.9270843170934745
	PR AUC: 0.8155815559085313


## Evaluate models on Many Communities dataset (full)

In [23]:
LABEL_NAME_PREDICTION_MODEL = 'removed/logistic'
DATASET = 'gs://conversationai-models/resources/transfer_learning_data/many_communities/20181105_answers_all_columns_nthain.tfrecord'
DATA_LABEL = 'removed'
DATASET_TEXT_FEATURE='comment_text'

# Pattern for path of tf_records
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'many_communities_test')

### CNN, GRU Attention Models

In [14]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
    'tf_cnn_many_communities_glove:v_20190219_185551_gpu_p100_4',
    #'tf_gru_attention_many_communities:v20190322_142800_507893_1556085643',
    #'tf_gru_attention_many_communities:v20190315_161037_23271_1555129264',
    'tf_gru_attention_many_communities:v20190705_004839_507000_1562364428_gpu_p100_4',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer, label_data_type=tf.int64)

In [15]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(100000000)

Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.TFRecordDataset`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To constru



INFO:tensorflow:End of file.


In [16]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:TF-Records already exist - overwriting them.
INFO:tensorflow:Preparing train data: 0/1000




INFO:tensorflow:state : QUEUED




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Prediction job completed.




INFO:tensorflow:Prediction job completed.


In [318]:
many_communities_test_df = test_dataset.show_data()

In [319]:
print_results(many_communities_test_df, MODEL_NAMES)

tf_cnn_many_communities_glove:v_20190219_185551
	ROC AUC: 0.7476941464055139
	PR AUC: 0.07604839414024091
tf_gru_attention_many_communities:v20190315_161037_23271_1555129264
	ROC AUC: 0.7215269560475308
	PR AUC: 0.06656538517176142


### TF-Hub Model

In [24]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
    'tf_hub_classifier_many_communities:v20190219_185602_316000_1553563221_gpu_v100_4',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None, label_data_type=tf.int64)

In [25]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:Loaded 100000 lines.
INFO:tensorflow:Loaded 200000 lines.
INFO:tensorflow:Loaded 300000 lines.
INFO:tensorflow:Loaded 400000 lines.
INFO:tensorflow:Loaded 500000 lines.
INFO:tensorflow:Loaded 600000 lines.
INFO:tensorflow:Loaded 700000 lines.
INFO:tensorflow:Loaded 800000 lines.
INFO:tensorflow:Loaded 900000 lines.
INFO:tensorflow:Loaded 1000000 lines.
INFO:tensorflow:Loaded 1100000 lines.
INFO:tensorflow:Loaded 1200000 lines.
INFO:tensorflow:Loaded 1300000 lines.
INFO:tensorflow:Loaded 1400000 lines.
INFO:tensorflow:Loaded 1500000 lines.
INFO:tensorflow:Loaded 1600000 lines.
INFO:tensorflow:Loaded 1700000 lines.
INFO:tensorflow:Loaded 1800000 lines.
INFO:tensorflow:Loaded 1900000 lines.
INFO:tensorflow:Loaded 2000000 lines.
INFO:tensorflow:Loaded 2100000 lines.
INFO:tensorflow:Loaded 2200000 lines.
INFO:tensorflow:Loaded 2300000 lines.
INFO:tensorflow:Loaded 2400000 lines.
INFO:tensorflow:Loaded 2500000 l

In [None]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

In [16]:
many_communities_tfhub_test_df = test_dataset.show_data()

In [None]:
print_results(many_communities_tfhub_test_df, MODEL_NAMES)

## Evaluate models on Many Communities subset

In [11]:
LABEL_NAME_PREDICTION_MODEL = 'label/logistic'
DATASET_VALID = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/validation_query..tfrecord'
DATASET_TEST = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/test_query..tfrecord'
DATA_LABEL = 'label'
DATASET_TEXT_FEATURE='text'

### Pessimistic

In [12]:
# Pattern for path of tf_records
OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/valid')
OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/test')

#### CNN, GRU Attention Models

In [13]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
    'tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1',
    'tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

valid_input_fn = make_test_input_fn(
    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer, label_data_type=tf.int64)

test_input_fn = make_test_input_fn(
    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer, label_data_type=tf.int64)

In [14]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)
valid_dataset.load_data(100000000)

Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.TFRecordDataset`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To constru



INFO:tensorflow:End of file.


In [15]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)
test_dataset.load_data(100000000)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:End of file.


In [16]:
# Set recompute_predictions=False to save time if predictions are available.
valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:Preparing train data: 0/4080




INFO:tensorflow:state : QUEUED




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Prediction job completed.




INFO:tensorflow:Prediction job completed.
INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:Preparing train data: 0/4104




INFO:tensorflow:state : QUEUED




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Prediction job completed.




INFO:tensorflow:Prediction job completed.


In [24]:
print_results(valid_dataset.show_data(), MODEL_NAMES)

tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1
	ROC AUC: 0.8233381391772395
	PR AUC: 0.8062951511107903
	F1: 0.7607565011820331
tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1
	ROC AUC: 0.8303615196078432
	PR AUC: 0.8125045070656154
	F1: 0.7703703703703705


In [25]:
print_results(test_dataset.show_data(), MODEL_NAMES)

tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1
	ROC AUC: 0.7981477681641835
	PR AUC: 0.7900106468171257
	F1: 0.7378091872791519
tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1
	ROC AUC: 0.8074846866462235
	PR AUC: 0.7951370231895221
	F1: 0.7507100720996286


#### TF-Hub Model

In [26]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
    'tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

valid_input_fn = make_test_input_fn(
    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None, label_data_type=tf.int64)

test_input_fn = make_test_input_fn(
    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None, label_data_type=tf.int64)

In [27]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)
valid_dataset.load_data(100000000)



INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:End of file.


In [28]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)
test_dataset.load_data(100000000)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:End of file.


In [29]:
# Set recompute_predictions=False to save time if predictions are available.
valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:TF-Records already exist - overwriting them.
INFO:tensorflow:Preparing train data: 0/4080




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Prediction job completed.
INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:TF-Records already exist - overwriting them.
INFO:tensorflow:Preparing train data: 0/4104




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Prediction job completed.


In [30]:
print_results(valid_dataset.show_data(), MODEL_NAMES)

tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1
	ROC AUC: 0.8612435121107267
	PR AUC: 0.851153195076283
	F1: 0.7937575030012005


In [31]:
print_results(test_dataset.show_data(), MODEL_NAMES)

tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1
	ROC AUC: 0.8434673869262717
	PR AUC: 0.8326080326940988
	F1: 0.779380468195791


### Optimistic

In [32]:
# Pattern for path of tf_records
OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/valid')
OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/test')

#### CNN, GRU Attention Models

In [33]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
    'tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1',
    'tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

valid_input_fn = make_test_input_fn(
    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer, label_data_type=tf.int64)

test_input_fn = make_test_input_fn(
    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer, label_data_type=tf.int64)

In [34]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)
valid_dataset.load_data(100000000)



INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:End of file.


In [35]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)
test_dataset.load_data(100000000)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:End of file.


In [36]:
# Set recompute_predictions=False to save time if predictions are available.
valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:Preparing train data: 0/4080




INFO:tensorflow:state : QUEUED




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Prediction job completed.




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Prediction job completed.
INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:Preparing train data: 0/4104




INFO:tensorflow:state : QUEUED




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Prediction job completed.




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Prediction job completed.


In [37]:
print_results(valid_dataset.show_data(), MODEL_NAMES)

tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1
	ROC AUC: 0.8304709727028066
	PR AUC: 0.8191225889787218
	F1: 0.7564259485924112
tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1
	ROC AUC: 0.8293254998077663
	PR AUC: 0.8181913933482414
	F1: 0.7652214022140222


In [38]:
print_results(test_dataset.show_data(), MODEL_NAMES)

tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1
	ROC AUC: 0.8043942295635125
	PR AUC: 0.79754755517453
	F1: 0.7305737109658679
tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1
	ROC AUC: 0.8156875904836816
	PR AUC: 0.8081941065311745
	F1: 0.7558876811594204


#### TF-Hub Model

In [39]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
    'tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

valid_input_fn = make_test_input_fn(
    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None, label_data_type=tf.int64)

test_input_fn = make_test_input_fn(
    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None, label_data_type=tf.int64)

In [None]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)
valid_dataset.load_data(100000000)

In [None]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)
test_dataset.load_data(100000000)

In [None]:
# Set recompute_predictions=False to save time if predictions are available.
valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

In [None]:
print_results(valid_dataset.show_data(), MODEL_NAMES)

In [None]:
print_results(test_dataset.show_data(), MODEL_NAMES)

### Finetuned

In [None]:
# TODO(msushkov)