## Evaluate models based on classifier

In [2]:
import os
import sys
import random
import evaluate

p = os.path.abspath('../')
sys.path.insert(1, p)

os.environ["TOKENIZERS_PARALLELISM"] = "true"
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"

import transformers
transformers.logging.set_verbosity_error()

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
import torch.nn.functional as F

import matplotlib.pyplot as plt

from datasets import Dataset, load_dataset, Split

import seaborn as sns
sns.set_theme(color_codes=True)

from transformers import AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments

from src.contrastive_transformers.collators import TextCollator
from src.contrastive_transformers.datasets import AutoAugmentDataset
from src.contrastive_transformers.trainers import ContrastiveTrainer
from src.contrastive_transformers.losses import SupConLoss

from src.utils.utils import * 

seed = 7631
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
smm4h20 = load_dataset('KevinSpaghetti/smm4h20')
cadec = load_dataset('KevinSpaghetti/cadec')

meddra_ontology = pd.read_csv('./data/processed/meddra_ontology.csv')
all_pts = meddra_ontology['term_PT'].unique()

llt_pt_examples = pd.read_csv('./data/processed/llt_pt_examples.csv')
all_llts = llt_pt_examples['term_LLT'].unique()

llt_to_pt_mapping = dict(llt_pt_examples[['term_LLT', 'term_PT']].itertuples(index=False))

all_pts = load_dataset('KevinSpaghetti/all_pts', split=Split.ALL)
pt_vocab = dict(zip(all_pts['term'], all_pts['label']))
index_to_label = dict(zip(all_pts['label'], all_pts['term']))
print(len(pt_vocab))

Using custom data configuration KevinSpaghetti--smm4h20-9b0cef5ffb10261f
Found cached dataset parquet (/home/kevinds/.cache/huggingface/datasets/KevinSpaghetti___parquet/KevinSpaghetti--smm4h20-9b0cef5ffb10261f/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/2 [00:00<?, ?it/s]

Using custom data configuration KevinSpaghetti--cadec-d97aca76af8be810
Found cached dataset parquet (/home/kevinds/.cache/huggingface/datasets/KevinSpaghetti___parquet/KevinSpaghetti--cadec-d97aca76af8be810/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
model = AutoModelForSequenceClassification.from_pretrained(
    './models/cadec/pretraining-classifier/checkpoint-6496',
    num_labels=len(pt_vocab),
    label2id=pt_vocab,
    id2label={pt_vocab[k]: k for k in pt_vocab},
    cache_dir='./cache/')

In [13]:
model.eval()
model.to(device)
model_name = 'allenai/scibert_scivocab_uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)

test=cadec['test'].map(lambda x: {'label': pt_vocab[x['term_PT']]}, remove_columns=['term_PT'])
train=cadec['train'].map(lambda x: {'label': pt_vocab[x['term_PT']]}, remove_columns=['term_PT'])

seen, unseen = get_seen_unseen_split(train, test, label_col='label')

results = {}

Loading cached processed dataset at /home/kevinds/.cache/huggingface/datasets/KevinSpaghetti___parquet/KevinSpaghetti--cadec-d97aca76af8be810/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-c76b96fcef591309.arrow
Loading cached processed dataset at /home/kevinds/.cache/huggingface/datasets/KevinSpaghetti___parquet/KevinSpaghetti--cadec-d97aca76af8be810/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-3c03f1d5f6eda4ca.arrow
Loading cached processed dataset at /home/kevinds/.cache/huggingface/datasets/KevinSpaghetti___parquet/KevinSpaghetti--cadec-d97aca76af8be810/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-e10d86cac4d975b4.arrow
Loading cached processed dataset at /home/kevinds/.cache/huggingface/datasets/KevinSpaghetti___parquet/KevinSpaghetti--cadec-d97aca76af8be810/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-e5a77c47dec9701b.arrow


In [14]:
accuracy_1 = evaluate.load("KevinSpaghetti/accuracyk")
accuracy_5 = evaluate.load("KevinSpaghetti/accuracyk")

with torch.no_grad():
    for row in tqdm(test):
        model_inputs = tokenizer(row['ade'], padding='max_length', max_length=32, return_tensors='pt').to(device)
        model_predictions = model(**model_inputs)
        logits = model_predictions.get('logits').cpu()

        top1_pred = np.argmax(logits, axis=-1, keepdims=True)
        top5_pred = np.argpartition(logits, -5, axis=-1)[:, -5:]

        accuracy_1.add_batch(predictions=top1_pred, references=[row['label']])
        accuracy_5.add_batch(predictions=top5_pred, references=[row['label']])

    results['complete'] = {
        'top1': accuracy_1.compute()['accuracy'],
        'top5': accuracy_5.compute()['accuracy']
    }

100%|████████████████████████████████████████| 1121/1121 [00:10<00:00, 110.50it/s]


In [15]:
accuracy_1 = evaluate.load("KevinSpaghetti/accuracyk")
accuracy_5 = evaluate.load("KevinSpaghetti/accuracyk")

with torch.no_grad():
    for row in tqdm(seen):
        model_inputs = tokenizer(row['ade'], padding='max_length', max_length=32, return_tensors='pt').to(device)
        model_predictions = model(**model_inputs)
        logits = model_predictions.get('logits').cpu()
        
        top1_pred = np.argmax(logits, axis=-1, keepdims=True)
        top5_pred = np.argpartition(logits, -5, axis=-1)[:, -5:]
        
        accuracy_1.add_batch(predictions=top1_pred, references=[row['label']])
        accuracy_5.add_batch(predictions=top5_pred, references=[row['label']])
    
    results['seen'] = {
        'top1': accuracy_1.compute()['accuracy'],
        'top5': accuracy_5.compute()['accuracy']
    }

100%|████████████████████████████████████████| 1083/1083 [00:09<00:00, 110.92it/s]


In [16]:
accuracy_1 = evaluate.load("KevinSpaghetti/accuracyk")
accuracy_5 = evaluate.load("KevinSpaghetti/accuracyk")

with torch.no_grad():
    for row in tqdm(unseen):
        model_inputs = tokenizer(row['ade'], padding='max_length', max_length=32, return_tensors='pt').to(device)
        model_predictions = model(**model_inputs)
        logits = model_predictions.get('logits').cpu()
        
        top1_pred = np.argmax(logits, axis=-1, keepdims=True)
        top5_pred = np.argpartition(logits, -5, axis=-1)[:, -5:]
        
        accuracy_1.add_batch(predictions=top1_pred, references=[row['label']])
        accuracy_5.add_batch(predictions=top5_pred, references=[row['label']])
    
    results['unseen'] = {
        'top1': accuracy_1.compute()['accuracy'],
        'top5': accuracy_5.compute()['accuracy']
    }

100%|█████████████████████████████████████████████| 38/38 [00:01<00:00, 28.24it/s]


In [17]:
results

{'complete': {'top1': 0.8305084745762712, 'top5': 0.903657448706512},
 'seen': {'top1': 0.8485687903970452, 'top5': 0.9187442289935365},
 'unseen': {'top1': 0.3157894736842105, 'top5': 0.47368421052631576}}