In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_NAME = "microsoft/phi-4"
DEVICE = torch.device("cpu")
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
if torch.mps.is_available():
    DEVICE = torch.device("mps")

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)

In [None]:
import os, sys

dir2 = os.path.abspath("")
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)

from complexity_estimation.tokenwise_entropy import TokenwiseEntropy
from utils.prompt import get_sys_prompt, get_user_prompt
import pandas as pd
import ast
import csv


def estimate_dataset(
    df,
    model,
    tokenizer,
    get_subject_from_row,
    get_question_from_row,
    get_options_from_row,
    verify_answer,
):
    df["entropy_ans_correct"] = 0
    df["entropy_ans_value"] = 0
    entropy_estimator = TokenwiseEntropy(llm_model=model, device=DEVICE)

    for index, row in df.iterrows():
        sys_prompt = get_sys_prompt(get_subject_from_row(row))
        user_prompt = get_user_prompt(get_question_from_row(row), get_options_from_row(row))
        messages = [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt},
        ]
        formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(DEVICE)

        outputs = model.generate(**inputs, max_new_tokens=1, pad_token_id=tokenizer.eos_token_id)

        entropy = entropy_estimator.calculate(outputs)
        df["entropy_ans_value"] = entropy

        answer_raw = outputs[:, -1:]
        answer = tokenizer.decode(answer_raw, skip_special_tokens=True)

        df["entropy_ans_correct"] = verify_answer(row, answer)

    return df


DATASET = "../data/mmlu_pro_stem.tsv"

df = pd.read_csv("../data/mmlu_pro_stem.tsv", sep="\t", header=0)

processed_df = estimate_dataset(
    df=df,
    model=model,
    tokenizer=tokenizer,
    get_subject_from_row=lambda row: row["base_cluster"],
    get_question_from_row=lambda row: row["question"],
    get_options_from_row=lambda row: ast.literal_eval(row["options"]),
    verify_answer=lambda row, model_answer: int(row["answer_index"]) + 1 == int(model_answer),
)
processed_df.to_csv("../data/mmlu_pro_stem_w_entropy.tsv", sep="\t", quoting=csv.QUOTE_NONE)