In [1]:
import glob
import logging
import os
import random
import json

import numpy as np
import torch
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
                              TensorDataset)
import random
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm_notebook, trange
from tensorboardX import SummaryWriter

from pytorch_transformers import (WEIGHTS_NAME, BertConfig, BertForSequenceClassification, BertTokenizer,
                                  XLMConfig, XLMForSequenceClassification, XLMTokenizer, 
                                  XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer,
                                  RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer)

from pytorch_transformers import AdamW, WarmupLinearSchedule

# from make_features import (convert_examples_to_features,
#                         output_modes, processors)

%run make_features.ipynb

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


In [2]:
data_identifier = 'reddit_data'

In [3]:
args = {
    'data_dir': '{0}/'.format(data_identifier),
    'model_type':  'bert',
    'model_name': 'bert-base-uncased',
    'checkpoint_folder': False,
    'task_name': 'binary',
    'output_dir': 'outputs-{0}/'.format(data_identifier),
    'cache_dir': 'cache-{0}/'.format(data_identifier),
    'do_train': True,
    'do_eval': True,
    'fp16': True,
    'fp16_opt_level': 'O1',
    'max_seq_length': 128,
    'output_mode': 'classification',
    'train_batch_size': 8,
    'eval_batch_size': 8,

    'gradient_accumulation_steps': 1,
    'num_train_epochs': 1,
    'weight_decay': 0,
    'learning_rate': 4e-5,
    'adam_epsilon': 1e-8,
    'warmup_steps': 0,
    'max_grad_norm': 1.0,

    'logging_steps': 50,
    'evaluate_during_training': False,
    'save_steps': 1000,
    'eval_all_checkpoints': True,

    'overwrite_output_dir': False,
    'reprocess_input_data': False,
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
MODEL_CLASSES = {
    'bert': (BertConfig, BertForSequenceClassification, BertTokenizer)
}

config_class, model_class, tokenizer_class = MODEL_CLASSES[args['model_type']]

In [5]:
config = config_class.from_pretrained(args['model_name'], num_labels=2, finetuning_task=args['task_name'])
tokenizer = tokenizer_class.from_pretrained(args['model_name'])

INFO:pytorch_transformers.modeling_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/rajat/.cache/torch/pytorch_transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.bf3b9ea126d8c0001ee8a1e8b92229871d06d36d8808208cc2449280da87785c
INFO:pytorch_transformers.modeling_utils:Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": "binary",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "pruned_heads": {},
  "torchscript": false,
  "type_vocab_size": 2,
  "vocab_size": 30522
}

INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/b

In [6]:
if args['checkpoint_folder']:
    model = model_class.from_pretrained(args['checkpoint_folder'])
else:
    model = model_class.from_pretrained(args['model_name'])

INFO:pytorch_transformers.modeling_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/rajat/.cache/torch/pytorch_transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.bf3b9ea126d8c0001ee8a1e8b92229871d06d36d8808208cc2449280da87785c
INFO:pytorch_transformers.modeling_utils:Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "pruned_heads": {},
  "torchscript": false,
  "type_vocab_size": 2,
  "vocab_size": 30522
}

INFO:pytorch_transformers.modeling_utils:loading weights file https://s3.amazonaws.com/models.huggingface.co/b

In [7]:
model.to(device);

In [8]:
task = args['task_name']

processor = processors[task]()
label_list = processor.get_labels()
num_labels = len(label_list)

In [9]:
def load_and_cache_examples(task, tokenizer, evaluate=False):
    processor = processors[task]()
    output_mode = args['output_mode']
    
    mode = 'dev' if evaluate else 'train'
    cached_features_file = os.path.join(args['data_dir'], f"cached_{mode}_{args['model_name']}_{args['max_seq_length']}_{task}")
    
    if os.path.exists(cached_features_file) and not args['reprocess_input_data']:
        logger.info("Loading features from cached file %s", cached_features_file)
        features = torch.load(cached_features_file)
               
    else:
        logger.info("Creating features from dataset file at %s", args['data_dir'])
        label_list = processor.get_labels()
        examples = processor.get_dev_examples(args['data_dir']) if evaluate else processor.get_train_examples(args['data_dir'])
        
        features = convert_examples_to_features(examples, label_list, args['max_seq_length'], tokenizer, output_mode,
            cls_token_at_end=bool(args['model_type'] in ['xlnet']),            # xlnet has a cls token at the end
            cls_token=tokenizer.cls_token,
            sep_token=tokenizer.sep_token,
            cls_token_segment_id=2 if args['model_type'] in ['xlnet'] else 0,
            pad_on_left=bool(args['model_type'] in ['xlnet']),                 # pad on the left for xlnet
            pad_token_segment_id=4 if args['model_type'] in ['xlnet'] else 0)
        
        logger.info("Saving features into cached file %s", cached_features_file)
        torch.save(features, cached_features_file)
        
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
    if output_mode == "classification":
        all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)
    elif output_mode == "regression":
        all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float)

    dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
    return dataset


In [10]:
def train(train_dataset, model, tokenizer):
    tb_writer = SummaryWriter()
    
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args['train_batch_size'])
    
    t_total = len(train_dataloader) // args['gradient_accumulation_steps'] * args['num_train_epochs']
    
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args['weight_decay']},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args['learning_rate'], eps=args['adam_epsilon'])
    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args['warmup_steps'], t_total=t_total)
    
    if args['fp16']:
        try:
            from apex import amp
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
        model, optimizer = amp.initialize(model, optimizer, opt_level=args['fp16_opt_level'])
        
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args['num_train_epochs'])
    logger.info("  Total train batch size  = %d", args['train_batch_size'])
    logger.info("  Gradient Accumulation steps = %d", args['gradient_accumulation_steps'])
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(args['num_train_epochs']), desc="Epoch")
    
    for _ in train_iterator:
        epoch_iterator = tqdm_notebook(train_dataloader, desc="Iteration")
        for step, batch in enumerate(epoch_iterator):
            model.train()
            batch = tuple(t.to(device) for t in batch)
            inputs = {'input_ids':      batch[0],
                      'attention_mask': batch[1],
                      'token_type_ids': batch[2] if args['model_type'] in ['bert', 'xlnet'] else None,  # XLM don't use segment_ids
                      'labels':         batch[3]}
            outputs = model(**inputs)
            loss = outputs[0]  # model outputs are always tuple in pytorch-transformers (see doc)
            print("\r%f" % loss, end='')

            if args['gradient_accumulation_steps'] > 1:
                loss = loss / args['gradient_accumulation_steps']

            if args['fp16']:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args['max_grad_norm'])
                
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), args['max_grad_norm'])

            tr_loss += loss.item()
            if (step + 1) % args['gradient_accumulation_steps'] == 0:
                scheduler.step()  # Update learning rate schedule
                optimizer.step()
                model.zero_grad()
                global_step += 1

                if args['logging_steps'] > 0 and global_step % args['logging_steps'] == 0:
                    # Log metrics
                    if args['evaluate_during_training']:  # Only evaluate when single GPU otherwise metrics may not average well
                        results = evaluate(model, tokenizer)
                        for key, value in results.items():
                            tb_writer.add_scalar('eval_{}'.format(key), value, global_step)
                    tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step)
                    tb_writer.add_scalar('loss', (tr_loss - logging_loss)/args['logging_steps'], global_step)
                    logging_loss = tr_loss

                if args['save_steps'] > 0 and global_step % args['save_steps'] == 0:
                    # Save model checkpoint
                    output_dir = os.path.join(args['output_dir'], 'checkpoint-{}'.format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
                    model_to_save.save_pretrained(output_dir)
                    logger.info("Saving model checkpoint to %s", output_dir)


    return global_step, tr_loss / global_step


In [11]:
from sklearn.metrics import mean_squared_error, matthews_corrcoef, confusion_matrix
from scipy.stats import pearsonr

def get_mismatched(labels, preds):
    mismatched = labels != preds
    examples = processor.get_dev_examples(args['data_dir'])
    wrong = [i for (i, v) in zip(examples, mismatched) if v]
    
    return wrong

def get_eval_report(labels, preds):
    mcc = matthews_corrcoef(labels, preds)
    tn, fp, fn, tp = confusion_matrix(labels, preds).ravel()
    return {
        "mcc": mcc,
        "tp": tp,
        "tn": tn,
        "fp": fp,
        "fn": fn
    }, get_mismatched(labels, preds)

def compute_metrics(task_name, preds, labels):
    assert len(preds) == len(labels)
    return get_eval_report(labels, preds)

def evaluate(model, tokenizer, prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_output_dir = args['output_dir']

    results = {}
    EVAL_TASK = args['task_name']

    eval_dataset = load_and_cache_examples(EVAL_TASK, tokenizer, evaluate=True)
    if not os.path.exists(eval_output_dir):
        os.makedirs(eval_output_dir)


    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args['eval_batch_size'])

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args['eval_batch_size'])
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None
    for batch in tqdm_notebook(eval_dataloader, desc="Evaluating"):
        model.eval()
        batch = tuple(t.to(device) for t in batch)

        with torch.no_grad():
            inputs = {'input_ids':      batch[0],
                      'attention_mask': batch[1],
                      'token_type_ids': batch[2] if args['model_type'] in ['bert', 'xlnet'] else None,  # XLM don't use segment_ids
                      'labels':         batch[3]}
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]

            eval_loss += tmp_eval_loss.mean().item()
        nb_eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs['labels'].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)

    eval_loss = eval_loss / nb_eval_steps
    if args['output_mode'] == "classification":
        preds = np.argmax(preds, axis=1)
    elif args['output_mode'] == "regression":
        preds = np.squeeze(preds)
    result, wrong = compute_metrics(EVAL_TASK, preds, out_label_ids)
    results.update(result)

    output_eval_file = os.path.join(eval_output_dir, "eval_results.txt")
    with open(output_eval_file, "w") as writer:
        logger.info("***** Eval results {} *****".format(prefix))
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))

    return results, wrong


In [12]:
def exec():
    if args['do_train']:
        train_dataset = load_and_cache_examples(task, tokenizer)
        logger.info("Dataset conversion completed")
        global_step, tr_loss = train(train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)
    model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
    model_to_save.save_pretrained(args['output_dir'])
    tokenizer.save_pretrained(args['output_dir'])
    torch.save(args, os.path.join(args['output_dir'], 'training_args.bin'))


In [13]:
def evaluate_exec():
    results = {}
    if args['do_eval']:
        checkpoints = [args['output_dir']]
    if args['eval_all_checkpoints']:
        checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args['output_dir'] + '/**/' + WEIGHTS_NAME, recursive=True)))
        logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN)  # Reduce logging
    logger.info("Evaluate the following checkpoints: %s", checkpoints)
    for checkpoint in checkpoints:
        global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
        model = model_class.from_pretrained(checkpoint)
        model.to(device)
        result, wrong_preds = evaluate(model, tokenizer, prefix=global_step)
        result = dict((k + '_{}'.format(global_step), v) for k, v in result.items())
        results.update(result)

In [14]:
exec()

INFO:__main__:Loading features from cached file reddit_data/cached_train_bert-base-uncased_128_binary
INFO:__main__:Dataset conversion completed
INFO:__main__:***** Running training *****
INFO:__main__:  Num examples = 90557
INFO:__main__:  Num Epochs = 1
INFO:__main__:  Total train batch size  = 8
INFO:__main__:  Gradient Accumulation steps = 1
INFO:__main__:  Total optimization steps = 11320
Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


HBox(children=(IntProgress(value=0, description='Iteration', max=11320, style=ProgressStyle(description_width=…

0.656945



0.601708Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
0.415931Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
0.294262Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0
0.324888Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0
0.281435Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0
0.004579

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-1000


0.015517

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-2000


0.003544

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-3000


0.000884

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-4000


0.326151Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0
0.004397

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-5000


0.005731

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-6000


0.281844Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0
0.001150

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-7000


0.001115

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-8000


0.003173

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-9000


0.001503

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-10000


0.215560Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0
0.000848

INFO:__main__:Saving model checkpoint to outputs-reddit_data/checkpoint-11000


0.000353

Epoch: 100%|██████████| 1/1 [2:29:25<00:00, 8965.91s/it]
INFO:__main__: global_step = 11320, average loss = 0.08321949306572808





In [15]:
evaluate_exec()

INFO:__main__:Evaluate the following checkpoints: ['outputs-reddit_data/checkpoint-1000', 'outputs-reddit_data/checkpoint-10000', 'outputs-reddit_data/checkpoint-11000', 'outputs-reddit_data/checkpoint-2000', 'outputs-reddit_data/checkpoint-3000', 'outputs-reddit_data/checkpoint-4000', 'outputs-reddit_data/checkpoint-5000', 'outputs-reddit_data/checkpoint-6000', 'outputs-reddit_data/checkpoint-7000', 'outputs-reddit_data/checkpoint-8000', 'outputs-reddit_data/checkpoint-9000', 'outputs-reddit_data']
INFO:__main__:Creating features from dataset file at reddit_data/
100%|██████████| 11320/11320 [00:12<00:00, 927.39it/s]
INFO:__main__:Saving features into cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 1000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 1000 *****
INFO:__main__:  fn = 516
INFO:__main__:  fp = 10
INFO:__main__:  mcc = 0.9100358384518535
INFO:__main__:  tn = 5958
INFO:__main__:  tp = 4836





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 10000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 10000 *****
INFO:__main__:  fn = 100
INFO:__main__:  fp = 65
INFO:__main__:  mcc = 0.9707702876206661
INFO:__main__:  tn = 5903
INFO:__main__:  tp = 5252





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 11000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 11000 *****
INFO:__main__:  fn = 93
INFO:__main__:  fp = 75
INFO:__main__:  mcc = 0.9702296348112021
INFO:__main__:  tn = 5893
INFO:__main__:  tp = 5259





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 2000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 2000 *****
INFO:__main__:  fn = 181
INFO:__main__:  fp = 44
INFO:__main__:  mcc = 0.9603603350987361
INFO:__main__:  tn = 5924
INFO:__main__:  tp = 5171





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 3000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 3000 *****
INFO:__main__:  fn = 119
INFO:__main__:  fp = 117
INFO:__main__:  mcc = 0.9581793030861288
INFO:__main__:  tn = 5851
INFO:__main__:  tp = 5233





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 4000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 4000 *****
INFO:__main__:  fn = 120
INFO:__main__:  fp = 75
INFO:__main__:  mcc = 0.9654611016078426
INFO:__main__:  tn = 5893
INFO:__main__:  tp = 5232





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 5000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 5000 *****
INFO:__main__:  fn = 89
INFO:__main__:  fp = 153
INFO:__main__:  mcc = 0.9572047622127761
INFO:__main__:  tn = 5815
INFO:__main__:  tp = 5263





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 6000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 6000 *****
INFO:__main__:  fn = 115
INFO:__main__:  fp = 81
INFO:__main__:  mcc = 0.9652743169994108
INFO:__main__:  tn = 5887
INFO:__main__:  tp = 5237





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 7000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 7000 *****
INFO:__main__:  fn = 123
INFO:__main__:  fp = 64
INFO:__main__:  mcc = 0.9668970477908907
INFO:__main__:  tn = 5904
INFO:__main__:  tp = 5229





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 8000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 8000 *****
INFO:__main__:  fn = 124
INFO:__main__:  fp = 51
INFO:__main__:  mcc = 0.9690487947763742
INFO:__main__:  tn = 5917
INFO:__main__:  tp = 5228





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation 9000 *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results 9000 *****
INFO:__main__:  fn = 98
INFO:__main__:  fp = 72
INFO:__main__:  mcc = 0.9698782038495046
INFO:__main__:  tn = 5896
INFO:__main__:  tp = 5254





INFO:__main__:Loading features from cached file reddit_data/cached_dev_bert-base-uncased_128_binary
INFO:__main__:***** Running evaluation reddit_data *****
INFO:__main__:  Num examples = 11320
INFO:__main__:  Batch size = 8


HBox(children=(IntProgress(value=0, description='Evaluating', max=1415, style=ProgressStyle(description_width=…

INFO:__main__:***** Eval results reddit_data *****
INFO:__main__:  fn = 93
INFO:__main__:  fp = 75
INFO:__main__:  mcc = 0.9702296348112021
INFO:__main__:  tn = 5893
INFO:__main__:  tp = 5259



