In [3]:
import os
import torch
import polars as pl
from transformers import AutoTokenizer, AutoModelForSequenceClassification
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

df = pl.read_csv("data/llama3_raw_03.06.2024.csv")
print("Total samples", df.height)
df.columns

Total samples 210


['cid',
 'premise',
 'hypothesis',
 'label',
 'Generated Entailment Hypothesis',
 'Entailment Correct?',
 'Entailment Adversarial?',
 'Generated Neutral Hypothesis',
 'Neutral Correct?',
 'Neutral Adversarial',
 'Generated Contradiction Hypothesis',
 'Contradiction Correct?',
 'Contradiction Adversarial']

In [4]:
df = df.with_columns(
    pl.lit(None).alias("new ENTAILMENT hypothesis fools BASE"),
    pl.lit(None).alias("new ENTAILMENT hypothesis fools LARGE1"),
    pl.lit(None).alias("new ENTAILMENT hypothesis fools LARGE2"),
    pl.lit(None).alias("ENTAILMENT difficulty score"),
    pl.lit(None).alias("new NEUTRAL hypothesis fools BASE"),
    pl.lit(None).alias("new NEUTRAL hypothesis fools LARGE1"),
    pl.lit(None).alias("new NEUTRAL hypothesis fools LARGE2"),
    pl.lit(None).alias("NEUTRAL difficulty score"),
    pl.lit(None).alias("new CONTRADICTION hypothesis fools BASE"),
    pl.lit(None).alias("new CONTRADICTION hypothesis fools LARGE1"),
    pl.lit(None).alias("new CONTRADICTION hypothesis fools LARGE2"),
    pl.lit(None).alias("CONTRADICTION difficulty score"),
)
df.columns

['cid',
 'premise',
 'hypothesis',
 'label',
 'Generated Entailment Hypothesis',
 'Entailment Correct?',
 'Entailment Adversarial?',
 'Generated Neutral Hypothesis',
 'Neutral Correct?',
 'Neutral Adversarial',
 'Generated Contradiction Hypothesis',
 'Contradiction Correct?',
 'Contradiction Adversarial',
 'new ENTAILMENT hypothesis fools BASE',
 'new ENTAILMENT hypothesis fools LARGE1',
 'new ENTAILMENT hypothesis fools LARGE2',
 'ENTAILMENT difficulty score',
 'new NEUTRAL hypothesis fools BASE',
 'new NEUTRAL hypothesis fools LARGE1',
 'new NEUTRAL hypothesis fools LARGE2',
 'NEUTRAL difficulty score',
 'new CONTRADICTION hypothesis fools BASE',
 'new CONTRADICTION hypothesis fools LARGE1',
 'new CONTRADICTION hypothesis fools LARGE2',
 'CONTRADICTION difficulty score']

In [5]:
MODELS = {}
TOKENIZERS = {}

model_name_base = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
model_name_large = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"
model_name_large_2 = "Joelzhang/deberta-v3-large-snli_mnli_fever_anli_R1_R2_R3-nli"
MAP = {
    'base' : model_name_base,
    'large1': model_name_large,
    'large2': model_name_large_2,
}

for model_name in MAP.values():
    print(f"> loading {model_name}")
    TOKENIZERS[model_name] = AutoTokenizer.from_pretrained(model_name)
    MODELS[model_name] = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)


def inference(model_name, premise, hypothesis):
    model_input = TOKENIZERS[model_name](premise, hypothesis, truncation=False, return_tensors="pt")
    output = MODELS[model_name](model_input["input_ids"].to(device))  # device = "cuda:0" or "cpu"
    prediction = torch.softmax(output["logits"][0], -1).tolist()
    label_names = ["ENTAILMENT", "NEUTRAL", "CONTRADICTION"]
    return {name: round(float(pred) * 100, 1) for pred, name in zip(prediction, label_names)}

def predict(model_name, premise, hypothesis, label):
    prediction = inference(model_name, premise, hypothesis)
    predicted = max(prediction, key=prediction.get)
    return int(predicted != label)

> loading MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
> loading MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli
> loading Joelzhang/deberta-v3-large-snli_mnli_fever_anli_R1_R2_R3-nli




OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB. GPU 

In [None]:
for model_id, model_name in MAP.items():
    print(f"> using model {model_id} :: {model_name}")
    for i, elem in enumerate(df.iter_rows(named=True)):
        if elem['Entailment Correct?'] == 'TRUE':
            int_flag = predict(model_name, elem['premise'], elem['hypothesis'], elem['label'])
            elem[f'new ENTAILMENT hypothesis fools {model_id.upper()}'] = int_flag
        
        if elem['Neutral Correct?'] == 'TRUE':
            int_flag = predict(model_name, elem['premise'], elem['hypothesis'], elem['label'])
            elem[f'new NEUTRAL hypothesis fools {model_id.upper()}'] = int_flag
        
        if elem['Contradiction Correct?'] == 'TRUE':
            int_flag = predict(model_name, elem['premise'], elem['hypothesis'], elem['label'])
            elem[f'new CONTRADICTION hypothesis fools {model_id.upper()}'] = int_flag

In [None]:
df = df.with_columns(
    (
        pl.col("new ENTAILMENT hypothesis fools BASE")   +  
        pl.col("new ENTAILMENT hypothesis fools LARGE1") + 
        pl.col("new ENTAILMENT hypothesis fools LARGE2")
    ).alias("ENTAILMENT difficulty score"),
    (
        pl.col("new NEUTRAL hypothesis fools BASE")   +  
        pl.col("new NEUTRAL hypothesis fools LARGE1") + 
        pl.col("new NEUTRAL hypothesis fools LARGE2")
    ).alias("NEUTRAL difficulty score"),
    (
        pl.col("new CONTRADICTION hypothesis fools BASE")   +  
        pl.col("new CONTRADICTION hypothesis fools LARGE1") + 
        pl.col("new CONTRADICTION hypothesis fools LARGE2")
    ).alias("CONTRADICTION difficulty score"),
)
df.columns