In [None]:
import os
import json
import csv
import pandas as pd

import torch
import datasets

from pathlib import Path

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from evaluate import load

In [None]:
def write_header_in_csv(file, fields: list):
    with open(file, 'w', encoding='utf-8-sig', newline='') as csvFile:
        writer = csv.DictWriter(
            csvFile, fieldnames=fields, delimiter=',', escapechar='\\')

        writer.writeheader()

def append_in_csv(file, data: dict):
    with open(file, 'a', encoding='utf-8-sig', newline='') as csvFile:
        writer = csv.DictWriter(
            csvFile, fieldnames=data.keys(), delimiter=',', escapechar='\\')

        writer.writerow(data)

In [None]:
def label_encode(data):
    labels = {'contradiction': 0, 'entailment': 1, 'neutral': 2}
    key = data['gold_label']
    data = {'label': labels[key]}

    return data

def tokenize(data):
    data = tokenizer(
        data['sentence1'],
        data['sentence2'],
        max_length=512,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    return data

def compute_metrics(y):
    logits, labels = y
    preds = logits.argmax(axis=1)
    metrics = 'precision', 'recall', 'f1'
    result = {}

    result.update(
        load('accuracy').compute(
            predictions=preds,
            references=labels
        )
    )

    for metric in metrics:
        result.update(
            load(metric).compute(
                predictions=preds,
                references=labels, 
                average='macro'
            )
        )

    return result

In [None]:
NAME_DATA = 'mednli'
NAME_MODELS = (
    'cross-encoder/nli-deberta-v3-base',
    'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext',
    'sentence-transformers/stsb-roberta-base-v2',
    'microsoft/deberta-v3-base'
)

PATH_BASE = Path('../')
PATH_MODELS = PATH_BASE/'models'

In [None]:
models = os.listdir(PATH_MODELS)
modelDict = {}

for M in NAME_MODELS:
    modelDict[M] = []
    key = M.split('/')[-1]
    for P in models:
        if not (NAME_DATA in P and os.path.isdir(PATH_MODELS/P)):
            continue
        if key == ''.join(P.split('_')[1:][:-1]):
            modelDict[M].append(P)

modelDict

{'cross-encoder/nli-deberta-v3-base': ['mednli_nli-deberta-v3-base_1675166374',
  'mednli_nli-deberta-v3-base_1675180105',
  'mednli_nli-deberta-v3-base_1675241894'],
 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext': ['mednli_BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext_1675257677',
  'mednli_BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext_1675307496'],
 'sentence-transformers/stsb-roberta-base-v2': ['mednli_stsb-roberta-base-v2_1675258266'],
 'microsoft/deberta-v3-base': ['mednli_deberta-v3-base_1675300074']}

In [None]:
for NAME_MODEL, models in modelDict.items():
    dataset = datasets.load_dataset(
        'bigbio/mednli', 
        'mednli_source',
        data_dir=PATH_BASE/'data',
        split='test'
    )

    tokenizer = AutoTokenizer.from_pretrained(NAME_MODEL)

    dataset = dataset.map(label_encode)
    dataset = dataset.map(tokenize, batched=True)

    for NAME_MODEL in models:
        PATH_MODEL = PATH_MODELS/NAME_MODEL

        model = AutoModelForSequenceClassification.from_pretrained(
            PATH_MODEL/'weights', 
            local_files_only=True
        )

        # cols = list(dataset.column_names)
        cols = ['sentence1', 'sentence2', 'label', 'pred']
        write_header_in_csv(PATH_MODEL/'pred.csv', cols)
        
        model.eval()
        for data in dataset:
            features = tokenizer(
                [[data['sentence1'], data['sentence2']]],
                padding=True, 
                truncation=True, 
                return_tensors="pt"
            )
            
            with torch.no_grad():
                score = model(**features).logits
                
            pred = score.argmax(dim=1).numpy()
            data = dict(
                sentence1=data['sentence1'],
                sentence2=data['sentence2'],
                label=data['label'],
                pred=pred[0]
            )

            append_in_csv(PATH_MODEL/'pred.csv', data)

Using custom data configuration mednli_source-03971dcdb462331f
Found cached dataset mednli (C:/Users/wowns/.cache/huggingface/datasets/bigbio___mednli/mednli_source-03971dcdb462331f/1.0.0/f18e31046c4bf68bbdc505fbf3dab924a4ea60082a7f41346e3a84b54f453535)


OSError: Can't load tokenizer for '..\models\mednli_nli-deberta-v3-base_1675166374\weights'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '..\models\mednli_nli-deberta-v3-base_1675166374\weights' is the correct path to a directory containing all relevant files for a DebertaV2TokenizerFast tokenizer.

In [8]:
with open(PATH_MODEL/'args.json', 'r') as f:
    args = json.load(f)
    
args = TrainingArguments(**args)
trainer = Trainer(model, args, compute_metrics=compute_metrics)

result = trainer.evaluate(dataset)
df = pd.DataFrame.from_dict([result])

df.to_csv(PATH_MODEL/'result.csv', index=False, encoding='utf-8')
df

PyTorch: setting up devices
Both warmup_ratio and warmup_steps given, warmup_steps will override any effect of warmup_ratio during training
The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: sentence1, sentence1_binary_parse, pairID, sentence2_parse, gold_label, sentence1_parse, sentence2_binary_parse, sentence2. If sentence1, sentence1_binary_parse, pairID, sentence2_parse, gold_label, sentence1_parse, sentence2_binary_parse, sentence2 are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1422
  Batch size = 32


Unnamed: 0,eval_loss,eval_accuracy,eval_precision,eval_recall,eval_f1,eval_runtime,eval_samples_per_second,eval_steps_per_second
0,0.499047,0.824191,0.824744,0.824191,0.824258,1464.8049,0.971,0.031


In [29]:
sum(result) / len(result)

tensor([0.8242])