In [1]:
import os
import torch
from transformers import Trainer, TrainingArguments, TrainerCallback
from transformers import BertTokenizerFast, BertForTokenClassification
from transformers import set_seed
from datasets import load_from_disk, load_dataset
from torch.nn import functional as F
import numpy as np
from nltk import sent_tokenize
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
torch.cuda.set_device(0)

In [2]:
from pathlib import Path
list(Path('test').glob('*'))

[PosixPath('test/logs'), PosixPath('test/checkpoint-1000')]

In [17]:
model = BertForTokenClassification.from_pretrained('test/checkpoint-9000/')

In [18]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-cased')

In [19]:
dataset = load_from_disk('../data/rocstories/')

In [20]:
from random import shuffle
from sklearn.metrics import accuracy_score
from scipy.stats import kendalltau
from collections import defaultdict
from tqdm.auto import tqdm

metrics = defaultdict(list)

VERBOSE = True
for entry in tqdm(dataset['val'].select(list(range(len(dataset['val'])//100)))):
    sents = [entry[key] for key in sorted([key for key in entry if key.startswith('sentence')], key=lambda x: x[-1])]
    #sent_idx = np.array(list(range(len(sents))))
    #data = list(zip(sents, sent_idx))
    #print(data)
    #shuffled_sents = [i[0] for i in data]
    #shuffled_idx = np.array([i[1] for i in data])
    #shuffled_text = ' [CLS] '+ ' [CLS] '.join(shuffled_sents)
    shuffled_text = entry['text']
    shuffled_idx = np.array(entry['so_targets'])
    if VERBOSE:
        print(' '.join(sents))
        print()
        print(shuffled_text)
        print('-')
        
    inputs = tokenizer(shuffled_text, add_special_tokens=False, return_tensors='pt')
    outputs = model(**inputs)
    logits = outputs['logits'].reshape(-1)
    input_ids = inputs['input_ids'].reshape(-1)
    target_logits = logits[input_ids == tokenizer.cls_token_id]
    predicted_idx = np.argsort(np.argsort(target_logits.reshape(-1).detach().numpy()))
    if shuffled_idx.shape[0] > target_logits.shape[0]:
            shuffled_idx = shuffled_idx[:target_logits.shape[0]]
    
    tau, p = kendalltau(shuffled_idx, predicted_idx)
    acc = accuracy_score(shuffled_idx, predicted_idx)
    metrics['tau'].append(tau)
    metrics['acc'].append(acc)
    if VERBOSE:
        print('Acc: ', acc)
        print('Tau: ', tau)
        print('-')
        print('Logits: ', target_logits.reshape(-1).detach().numpy())
        print('Pred: ', predicted_idx)
        print('True: ', shuffled_idx)
        try:
            print('Predicted Text Order: ', ' '.join(np.array(list(sent_tokenize(shuffled_text, language='english')))[np.argsort(target_logits.reshape(-1).detach().numpy())]))
        except Exception:
            pass
        print('\n------\n')

  0%|          | 0/58 [00:00<?, ?it/s]

Angie wanted to surprise her kids with a trip to the zoo. She packed up the things they would need for the day. The kids got in the car and she told them she had a surprise. She pulled into the zoo parking lot. The kids were excited to see that they were at the zoo.

[CLS] She pulled into the zoo parking lot. [CLS] Angie wanted to surprise her kids with a trip to the zoo. [CLS] She packed up the things they would need for the day. [CLS] The kids got in the car and she told them she had a surprise. [CLS] The kids were excited to see that they were at the zoo.
-
Acc:  0.6
Tau:  0.7999999999999999
-
Logits:  [ 2.3482354 -0.0656156  1.24684    3.1359527  3.629693 ]
Pred:  [2 0 1 3 4]
True:  [3 0 1 2 4]
Predicted Text Order:  [CLS] Angie wanted to surprise her kids with a trip to the zoo. [CLS] She packed up the things they would need for the day. [CLS] She pulled into the zoo parking lot. [CLS] The kids got in the car and she told them she had a surprise. [CLS] The kids were excited to see

In [21]:
np.mean(metrics['acc'])

0.6862068965517242

In [22]:
np.mean(metrics['tau'])

0.786206896551724