In [1]:
import torch
import polars as pl
from transformers import AutoTokenizer, AutoModelForSequenceClassification
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("> using", device)
LABEL_NAMES = ["ENTAILMENT", "NEUTRAL", "CONTRADICTION"]
input_file_1 = "data/gpt4o_raw_03.06.2024.csv"
input_file_2 = "data/GPT-4o_raw_single_class_prompt.csv"
output_name = "data/gpt4o-03.04_and_single_class_prompt.csv"
df1 = pl.read_csv(input_file_1)
df2 = pl.read_csv(input_file_2)
df = pl.concat([df1, df2])
print("DF1 samples", df1.height)
print("DF2 samples", df2.height)
print("Total samples", df.height)
df.columns

  from .autonotebook import tqdm as notebook_tqdm


> using cuda
DF1 samples 17
DF2 samples 29
Total samples 46


['cid',
 'premise',
 'hypothesis',
 'label',
 'Generated ENTAILMENT Hypothesis',
 'ENTAILMENT Correct?',
 'ENTAILMENT Adversarial?',
 'Generated NEUTRAL Hypothesis',
 'NEUTRAL Correct?',
 'NEUTRAL Adversarial',
 'Generated CONTRADICTION Hypothesis',
 'CONTRADICTION Correct?',
 'CONTRADICTION Adversarial']

# Compute the scores
Check whether the samples generated by LLama3 that are *correct* (i.e., checked by a human), are also *difficult* for the model (i.e., the model fails to classify them correctly).

This section only writes on another `.csv` file the results.

In [2]:
MODELS = {}
TOKENIZERS = {}

model_name_base = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
model_name_large = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"
model_name_large_2 = "Joelzhang/deberta-v3-large-snli_mnli_fever_anli_R1_R2_R3-nli"
MAP = {
    'base' : model_name_base,
    'large1': model_name_large,
    'large2': model_name_large_2,
}

for model_name in MAP.values():
    print(f"> loading {model_name}")
    TOKENIZERS[model_name] = AutoTokenizer.from_pretrained(model_name, cache_dir='./.hf_cache')
    MODELS[model_name] = AutoModelForSequenceClassification.from_pretrained(model_name, cache_dir='./.hf_cache').to(device)


def inference(model_name, premise, hypothesis):
    model_input = TOKENIZERS[model_name](premise, hypothesis, truncation=False, return_tensors="pt")
    output = MODELS[model_name](model_input["input_ids"].to(device))  # device = "cuda:0" or "cpu"
    prediction = torch.softmax(output["logits"][0], -1).tolist()
    return {name: round(float(pred) * 100, 1) for pred, name in zip(prediction, LABEL_NAMES)}

def predict(model_name, premise, hypothesis, label):
    prediction = inference(model_name, premise, hypothesis)
    predicted = max(prediction, key=prediction.get)
    return int(predicted != label)

> loading MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli


    Found GPU1 NVIDIA GeForce GT 710 which is of cuda capability 3.5.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability supported by this library is 3.7.
    


> loading MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli
> loading Joelzhang/deberta-v3-large-snli_mnli_fever_anli_R1_R2_R3-nli




In [3]:
from tqdm import tqdm
from pprint import pprint
res = {}
for label in LABEL_NAMES:
    for model_id in MAP.keys():
        res[f"new {label} hypothesis fools {model_id.upper()}"] = []
#pprint(res)

for model_id, model_name in MAP.items():
    for i, elem in tqdm(enumerate(df.iter_rows(named=True)), desc=f'> model {model_id}', total=df.height):
        for label in LABEL_NAMES:
            # filters empty cells
            if elem[f'Generated {label} Hypothesis'] == '':
                continue
            if elem[f'{label} Correct?'] == True:
                int_flag = predict(model_name, elem['premise'], elem['hypothesis'], elem['label'])
                res[f'new {label} hypothesis fools {model_id.upper()}'].append(int_flag)
            else:
                res[f'new {label} hypothesis fools {model_id.upper()}'].append(None)
#pprint(res)

> model base: 100%|██████████| 46/46 [00:01<00:00, 32.81it/s]
> model large1: 100%|██████████| 46/46 [00:02<00:00, 18.74it/s]
> model large2: 100%|██████████| 46/46 [00:02<00:00, 18.61it/s]


In [4]:
print(res)

{'new ENTAILMENT hypothesis fools BASE': [0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], 'new ENTAILMENT hypothesis fools LARGE1': [0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], 'new ENTAILMENT hypothesis fools LARGE2': [0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], 'new NEUTRAL hypothesis fools BASE': [0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, None, None, None, None, None, None, None, None, None, None, None, 1, 0, 0, 0, 0, 0, 1, 0, 0, None, None, None, None, None, None, None, None, None, None], 'new NEUTRAL hypothesis fools LARGE1': [0, 0

In [5]:
for label in LABEL_NAMES:
    for model_id in MAP.keys():
        df = df.with_columns(
            pl.Series(
                name=f"new {label} hypothesis fools {model_id.upper()}", 
                values=res[f"new {label} hypothesis fools {model_id.upper()}"]
            ),
        )
df.columns
df.head()

cid,premise,hypothesis,label,Generated ENTAILMENT Hypothesis,ENTAILMENT Correct?,ENTAILMENT Adversarial?,Generated NEUTRAL Hypothesis,NEUTRAL Correct?,NEUTRAL Adversarial,Generated CONTRADICTION Hypothesis,CONTRADICTION Correct?,CONTRADICTION Adversarial,new ENTAILMENT hypothesis fools BASE,new ENTAILMENT hypothesis fools LARGE1,new ENTAILMENT hypothesis fools LARGE2,new NEUTRAL hypothesis fools BASE,new NEUTRAL hypothesis fools LARGE1,new NEUTRAL hypothesis fools LARGE2,new CONTRADICTION hypothesis fools BASE,new CONTRADICTION hypothesis fools LARGE1,new CONTRADICTION hypothesis fools LARGE2
i64,str,str,str,str,bool,bool,str,bool,bool,str,bool,bool,i64,i64,i64,i64,i64,i64,i64,i64,i64
184052,"""Gangs of New York . The screen…","""Kenneth Lonergan is a writer o…","""ENTAILMENT""","""Kenneth Lonergan, who contribu…",True,False,"""Kenneth Lonergan, known for hi…",True,False,"""Gangs of New York was co-writt…",True,False,0,0,0,0,0,0,0,0,0
217178,"""Pakistan Movement . The Pakist…","""The Pakistan Movement was the …","""NEUTRAL""","""The Pakistan Movement involved…",True,False,"""The Pakistan Movement was prim…",True,False,"""The Pakistan Movement had no i…",True,False,0,0,0,0,0,0,0,0,0
169984,"""The Brat Pack is a nickname gi…","""The Brat Pack is a nickname gi…","""CONTRADICTION""","""The Brat Pack consisted of act…",True,False,"""Members of the Brat Pack were …",True,False,"""The Brat Pack refers to a grou…",True,False,1,1,1,1,1,1,1,1,1
120315,"""Bret Easton Ellis . Ellis also…","""Bret Easton Ellis barely wrote…","""CONTRADICTION""","""Bret Easton Ellis is the write…",True,False,"""Bret Easton Ellis's work on Th…",True,False,"""The Canyons, a 2013 film, was …",True,False,1,1,1,1,1,1,1,1,1
145735,"""New York Knicks . The Knicks c…","""The New York Knicks are in the…","""ENTAILMENT""","""The New York Knicks are part o…",True,False,"""The New York Knicks have won m…",True,False,"""The New York Knicks are a foot…",True,False,0,0,0,0,0,0,0,0,0


In [6]:
new_df = df.with_columns(
    (
        pl.col("new ENTAILMENT hypothesis fools BASE")   +  
        pl.col("new ENTAILMENT hypothesis fools LARGE1") + 
        pl.col("new ENTAILMENT hypothesis fools LARGE2")
    ).alias("ENTAILMENT difficulty score"),
    (
        pl.col("new NEUTRAL hypothesis fools BASE")   +  
        pl.col("new NEUTRAL hypothesis fools LARGE1") + 
        pl.col("new NEUTRAL hypothesis fools LARGE2")
    ).alias("NEUTRAL difficulty score"),
    (
        pl.col("new CONTRADICTION hypothesis fools BASE")   +  
        pl.col("new CONTRADICTION hypothesis fools LARGE1") + 
        pl.col("new CONTRADICTION hypothesis fools LARGE2")
    ).alias("CONTRADICTION difficulty score"),
)
new_df.head()

cid,premise,hypothesis,label,Generated ENTAILMENT Hypothesis,ENTAILMENT Correct?,ENTAILMENT Adversarial?,Generated NEUTRAL Hypothesis,NEUTRAL Correct?,NEUTRAL Adversarial,Generated CONTRADICTION Hypothesis,CONTRADICTION Correct?,CONTRADICTION Adversarial,new ENTAILMENT hypothesis fools BASE,new ENTAILMENT hypothesis fools LARGE1,new ENTAILMENT hypothesis fools LARGE2,new NEUTRAL hypothesis fools BASE,new NEUTRAL hypothesis fools LARGE1,new NEUTRAL hypothesis fools LARGE2,new CONTRADICTION hypothesis fools BASE,new CONTRADICTION hypothesis fools LARGE1,new CONTRADICTION hypothesis fools LARGE2,ENTAILMENT difficulty score,NEUTRAL difficulty score,CONTRADICTION difficulty score
i64,str,str,str,str,bool,bool,str,bool,bool,str,bool,bool,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
184052,"""Gangs of New York . The screen…","""Kenneth Lonergan is a writer o…","""ENTAILMENT""","""Kenneth Lonergan, who contribu…",True,False,"""Kenneth Lonergan, known for hi…",True,False,"""Gangs of New York was co-writt…",True,False,0,0,0,0,0,0,0,0,0,0,0,0
217178,"""Pakistan Movement . The Pakist…","""The Pakistan Movement was the …","""NEUTRAL""","""The Pakistan Movement involved…",True,False,"""The Pakistan Movement was prim…",True,False,"""The Pakistan Movement had no i…",True,False,0,0,0,0,0,0,0,0,0,0,0,0
169984,"""The Brat Pack is a nickname gi…","""The Brat Pack is a nickname gi…","""CONTRADICTION""","""The Brat Pack consisted of act…",True,False,"""Members of the Brat Pack were …",True,False,"""The Brat Pack refers to a grou…",True,False,1,1,1,1,1,1,1,1,1,3,3,3
120315,"""Bret Easton Ellis . Ellis also…","""Bret Easton Ellis barely wrote…","""CONTRADICTION""","""Bret Easton Ellis is the write…",True,False,"""Bret Easton Ellis's work on Th…",True,False,"""The Canyons, a 2013 film, was …",True,False,1,1,1,1,1,1,1,1,1,3,3,3
145735,"""New York Knicks . The Knicks c…","""The New York Knicks are in the…","""ENTAILMENT""","""The New York Knicks are part o…",True,False,"""The New York Knicks have won m…",True,False,"""The New York Knicks are a foot…",True,False,0,0,0,0,0,0,0,0,0,0,0,0


In [7]:
new_df.write_csv(output_name)
print("> Written stats csv.")

> Written stats csv.
