In [None]:
%pip install transformers accelerate bitsandbytes

In [None]:
# Open the file "../results/Human Annotation of LLaVA+ Rationales.xlsx" and open the sheet "LLAVA-1.5 Rationales-val 500 se"

# Its columns look like this: reindex	question	correct_answer	predicted_answer	is_correct	generated_rationale	image_path	vf_questions	vf_answers_GPT	hypothesis	alternative_hypotheses	gen_rationale_mask	alt_ent_prob	strict_sim

# We want a new column called ent_prob that that uses the following fuctions:
import os
import re
import ast
import nltk
import torch
import json
import hashlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, BitsAndBytesConfig
from tqdm import tqdm

# Configure the model for 8-bit loading
# bnb_config = BitsAndBytesConfig(load_in_8bit=True, device_map="auto")

if 'nli_tokenizer' not in globals():
    nli_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
if 'nli_model' not in globals():
    nli_model = AutoModelForSeq2SeqLM.from_pretrained("soumyasanyal/nli-entailment-verifier-xxl", load_in_8bit=True, device_map="auto")
    
def calc_support_prob(premise, hypothesis, nli_model=nli_model, nli_tokenizer=nli_tokenizer):
    def get_score(nli_model, nli_tokenizer, input_ids):
        pos_ids = nli_tokenizer('Yes').input_ids
        neg_ids = nli_tokenizer('No').input_ids
        pos_id = pos_ids[0]
        neg_id = neg_ids[0]

        with torch.no_grad():
            logits = nli_model(input_ids, decoder_input_ids=torch.zeros((input_ids.size(0), 1), dtype=torch.long)).logits
            pos_logits = logits[:, 0, pos_id]
            neg_logits = logits[:, 0, neg_id]
            posneg_logits = torch.cat([pos_logits.unsqueeze(-1), neg_logits.unsqueeze(-1)], dim=1)

            # Cast to float before applying softmax
            posneg_logits = posneg_logits.float()
            scores = torch.nn.functional.softmax(posneg_logits, dim=1)
            entail_score = scores[:, 0].item()
            no_entail_score = scores[:, 1].item()
        
        return entail_score, no_entail_score
    
    prompt = f"Premise: {premise}\nHypothesis: {hypothesis}\nGiven the premise, is the hypothesis correct?\nAnswer:"
    input_ids = nli_tokenizer(prompt, return_tensors='pt').input_ids
    return get_score(nli_model, nli_tokenizer, input_ids)[0]

def generate_mask(generated_rationale, predicted_answer):
    # Create a regex pattern to match the predicted answer case-insensitively and as a whole word
    predicted_answer = str(predicted_answer)
    pattern = re.compile(r'\b' + re.escape(predicted_answer) + r'\b', re.IGNORECASE)
    return pattern.sub("<mask>", generated_rationale)

def evaluate_support(data, nli_model, nli_tokenizer, use_mask=True, use_pieces=False, hypothesis_col='hypothesis', threshold=0.5):
    support_scores = []
    for idx, row in data.iterrows():
        if use_pieces:
            premise = row['concat_rationale_pieces_mask'] if use_mask else row['concat_rationale_pieces']
        else: 
            premise = row['gen_rationale_mask'] if use_mask else row['generated_rationale']
        
        hypothesis = row[hypothesis_col]
        entail_prob = calc_support_prob(premise, hypothesis, nli_model=nli_model, nli_tokenizer=nli_tokenizer)
        support = entail_prob > threshold 
        if entail_prob < threshold:
            print(f"Premise: {premise}")
            print(f"Hypothesis: {hypothesis}")
            print(f"Probability: {entail_prob}")
        support_scores.append({
            'entail_prob': entail_prob,
            'support': support
        })
    return support_scores

# Load the Excel file and specific sheet
file_path = "../results/Human Annotation of LLaVA+ Rationales.xlsx"
sheet_name = "LLAVA-1.5 Rationales-val 500 se"
data = pd.read_excel(file_path, sheet_name=sheet_name)

# Enable tqdm for pandas
tqdm.pandas()

# Apply the function with a progress bar
# Apply the entailment probability calculation to each row and create a new column
data['ent_prob'] = data.progress_apply(
    lambda row: calc_support_prob(row['gen_rationale_mask'], row['hypothesis'], nli_model, nli_tokenizer), axis=1
)

data

In [None]:
# Output data as temp.xlsx
data.to_excel('ent_prob_human_anno.xlsx', index=False)

In [None]:
import pandas as pd

data = pd.read_excel('ent_prob_human_anno.xlsx')
data

In [None]:
# Function to convert scientific notation to fixed-point notation
def convert_to_fixed_point(alt_ent_prob):
    # Convert the string to a Python list
    numbers = eval(alt_ent_prob)
    # Format each number to 4 decimal points
    formatted_numbers = [f"{num:.4f}" for num in numbers]
    # Convert the list back to string format
    return str(formatted_numbers)

# Apply the conversion function to the column
data['alt_ent_prob'] = data['alt_ent_prob'].apply(convert_to_fixed_point)

# Display the updated data
data

In [None]:
# Function to determine strict_sim
def calculate_strict_sim(row):
    ent_prob = row['ent_prob']
    # Convert string representation of list to Python list and cast elements to float
    alt_ent_prob = [float(item) for item in eval(row['alt_ent_prob'])]
    return ent_prob >= 0.5 and all(item < 0.5 for item in alt_ent_prob)

# Update the strict_sim column
data['strict_sim'] = data.apply(calculate_strict_sim, axis=1)

# Display the updated DataFrame
data

In [None]:
data.to_excel('ent_prob_human_anno_v2.xlsx', index=False)