In [None]:
# -------------------------------
# Phi-4-mini batch inference with optional token
# -------------------------------

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import pandas as pd
import gc
import time
import os

# -------------------------------
# 0. Memory cleanup
# -------------------------------
gc.collect()
torch.cuda.empty_cache()

# -------------------------------
# 1. Load dataset
# -------------------------------
try:
    final_datasheet = "finalDataset.csv"
    data = pd.read_csv(final_datasheet, encoding="utf-8")
except UnicodeDecodeError:
    print("UTF-8 failed, trying ISO-8859-1...")
    data = pd.read_csv(final_datasheet, encoding="ISO-8859-1")

data['ModelAnswer'] = ""
data['Correct'] = ""

# -------------------------------
# 2. Model name & optional token
# -------------------------------
model_name = "microsoft/Phi-4-mini-flash-reasoning"
huggingface_token = os.getenv("HF_TOKEN")  # optional

if huggingface_token is None:
    print("⚠️ No Hugging Face token found. Will try to download public model without token.")

# -------------------------------
# 3. Load tokenizer and model
# -------------------------------
token_kwargs = {"use_auth_token": huggingface_token} if huggingface_token else {}
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, **token_kwargs)

print("Loading model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    **token_kwargs,
    device_map="auto",
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)

# -------------------------------
# 4. Generate function
# -------------------------------
def generate_answer(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.0,
            do_sample=False
        )
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    return decoded.replace(prompt.strip(), "").strip()

# -------------------------------
# 5. Loop through dataset
# -------------------------------
start_time = time.time()
total_questions = len(data)
correct_answers = 0

for idx, row in data.iterrows():
    prompt = f"""
You are a person from India with deep knowledge and lived experience of Indian culture.
Now, answer the following question using your expertise in Indian culture by identifying the specific cultural element being referred to.
Respond only with the name of the cultural element (e.g., Indian) — no additional text, questions, or explanations.

Question: {row['Corrected Question']}
"""
    model_answer = generate_answer(prompt)
    prediction_correctness = row['Answer'].strip().lower() in model_answer.lower()
    if prediction_correctness:
        correct_answers += 1

    data.at[idx, 'ModelAnswer'] = model_answer
    data.at[idx, 'Correct'] = str(prediction_correctness)

    print(f"Progress: {idx+1}/{total_questions} | Correct so far: {correct_answers}")

# -------------------------------
# 6. Summary & save
# -------------------------------
accuracy = (correct_answers / total_questions) * 100
print(f"Total correct: {correct_answers}/{total_questions}")
print(f"Accuracy: {accuracy:.2f}%")

output_file = "model_answers_results_phi_mini.csv"
data.to_csv(output_file, index=False)
print(f"Results saved to {output_file}")
print(f"Total runtime: {time.time() - start_time:.2f} seconds")

# -------------------------------
# 7. Cleanup
# -------------------------------
gc.collect()
torch.cuda.empty_cache()

