In [1]:
import polars as pl

df = pl.read_csv("fever_test.adversarial.csv")
df.head()

ID,CID,PREMISE,HYPOTHESIS,ALTERNATIVE HYPOTHESIS (only if you're not using the original one),LABEL,NEW HYPHOTESIS,NEW LABEL,CHANGE TYPE,BASE MODEL,LARGE1 MODEL,LARGE2 MODEL,DIFFICULTY SCORE
i64,i64,str,str,str,str,str,str,str,str,str,str,str
0,58846,"""Johnny Galecki . He is known f…","""Johnny Galecki has been in at …",,"""NEUTRAL""","""The number of sitcoms from Fra…","""ENTAILMENT""","""math""",,,,
1,172460,"""Matteo Renzi ( [ matˈtɛːo ˈrɛn…","""Matteo Renzi served as Prime M…",,"""ENTAILMENT""","""Matteo Renzi was the president…","""NEUTRAL""","""coreference""",,,,
2,181201,"""Southpaw is a 2015 American sp…","""Southpaw was released 2011.""",,"""CONTRADICTION""","""Among the movies that premiere…","""ENTAILMENT""","""rephrasing, math""",,,,
3,174024,"""Pink Floyd . The final Pink Fl…","""The Endless River is Pink Floy…",,"""CONTRADICTION""","""counting Pink Floyd's albums f…","""ENTAILMENT""","""ranking""",,,,
4,118068,"""Beatles Day . This day is cons…","""Liverpool is unrelated to The …","""Beatles Day was celebrated in …","""CONTRADICTION""","""Beatles Day was celebrated 20 …","""ENTAILMENT""","""math""",,,,


In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

MODELS = {}
TOKENIZERS = {}

model_name_base = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
model_name_large = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"
model_name_large_2 = "Joelzhang/deberta-v3-large-snli_mnli_fever_anli_R1_R2_R3-nli"
MAP = {
    'base' : model_name_base,
    'large1': model_name_large,
    'large2': model_name_large_2,
}

for model_name in MAP.values():
    print(f"> loading {model_name}")
    TOKENIZERS[model_name] = AutoTokenizer.from_pretrained(model_name)
    MODELS[model_name] = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)


def inference(model_name, premise, hypothesis):
    model_input = TOKENIZERS[model_name](premise, hypothesis, truncation=False, return_tensors="pt")
    output = MODELS[model_name](model_input["input_ids"].to(device))  # device = "cuda:0" or "cpu"
    prediction = torch.softmax(output["logits"][0], -1).tolist()
    label_names = ["ENTAILMENT", "NEUTRAL", "CONTRADICTION"]
    return {name: round(float(pred) * 100, 1) for pred, name in zip(prediction, label_names)}

> loading MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
> loading MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli
> loading Joelzhang/deberta-v3-large-snli_mnli_fever_anli_R1_R2_R3-nli




OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB. GPU 

In [20]:
for i, elem in enumerate(df.iter_rows(named=True)):
    result = {'base': None, 'large1': None, 'large2': None}
    print()
    print(f"SAMPLE {elem['ID']} - LABEL: {elem['LABEL']}")
    for model_id, model_name in MAP.items(): 
        prediction = inference(model_name, elem['PREMISE'], elem['NEW HYPOTHESIS'])
        print(prediction)
        result[model_id] = max(prediction, key=prediction.get)
    print(f"{int(result['base']==elem['LABEL'])},{int(result['large1']==elem['LABEL'])},{int(result['large2']==elem['LABEL'])}")
    with open("scores.log", 'w+') as fout:
        print(f"{int(result['base']==elem['LABEL'])},{int(result['large1']==elem['LABEL'])},{int(result['large2']==elem['LABEL'])}",file=fout)

0,1,1
0,0,0
0,0,0
0,0,1
0,0,0
0,1,0
0,0,0
0,1,0
0,0,0
1,1,1
1,1,1
1,0,0
0,1,0
0,0,0
0,1,1
0,1,1
0,0,0
0,1,1
0,0,0
0,0,0
0,1,0
0,0,0
0,0,0
0,0,1
0,1,1
0,1,0
0,1,0
1,1,1
0,1,1
0,1,1
0,0,1
