<a href="https://colab.research.google.com/github/NextME14/BuddhismEval/blob/main/BuddhismEval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Dependencies

In [None]:
#  openpyxl for Excel outputs
!pip install --quiet requests pandas openpyxl


# Set Up Your API Key

In [None]:
import os

# Replace this with your real key (keep it secret!)
os.environ["OPENROUTER_API_KEY"] = "API-KEY"


# Upload Your JSONL

In [None]:
from google.colab import files

uploaded = files.upload()  # click “Choose Files” and pick your .jsonl
# after upload, /content/english_eval.jsonl will exist


# Evaluator

In [None]:
import json, time, os, pandas as pd
from collections import defaultdict
import requests

# ─── CONFIG ─────────────────────
API_URL = "https://openrouter.ai/api/v1/chat/completions"
API_KEY = os.getenv("OPENROUTER_API_KEY")

DATA_PATH       = "/content/english_eval.jsonl"
OUTPUT_TEMPLATE = "/content/English_evaluation_claud_{mode}prompt.xlsx"
# ─────────────────────────────────

def init_results():
    return {
        'questions': [],
        'correct': defaultdict(int),
        'incorrect': defaultdict(int),
        'missed': defaultdict(int),
        'q_label_breakdown': defaultdict(lambda: {'correct':0,'incorrect':0,'missed':0}),
        'start_time': time.time()
    }

def load_data(path):
    data = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line: continue
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"[load_data] skipping bad line: {e}")
    return data

def apply_prompt(entry):
    return (
        "[INST] You are a Theravada Buddhist scholar with deep knowledge of the Dhammapada.\n"
        "Analyze the question step-by-step internally, but only output the final answer (1, 2, or 3).\n\n"
        f"Question: {entry['question']}\n\n"
        "Options:\n"
        f"1. {entry['options'][0]}\n"
        f"2. {entry['options'][1]}\n"
        f"3. {entry['options'][2]}\n\n"
        "Your final answer (1, 2, or 3): [/INST]"
    )

def send_api_request(prompt):
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "anthropic/claude-3-haiku",
        "messages": [{"role":"user","content":prompt}],
        "temperature":0.2,
        "max_tokens":150
    }
    try:
        r = requests.post(API_URL, headers=headers, json=payload)
        r.raise_for_status()
        return r.json()
    except Exception as e:
        print(f"[API error] {e}")
        return None

def predict_option(entry):
    prompt = apply_prompt(entry)
    resp   = send_api_request(prompt)
    if resp and resp.get("choices"):
        txt = resp["choices"][0]["message"]["content"].strip()
        for opt in ('1','2','3'):
            if txt.startswith(opt):
                return int(opt), txt
    return None, None

def process_question(item, results):
    label = str(item.get('q_label','Unknown'))
    detail = {
        'id': item['id'],
        'question': item['question'],
        'options': item['options'],
        'correct_answer': item['correct_answer'],
        'q_label': label,
        'response': None,
        'result': 'missed',
        'reasoning': ''
    }

    # find correct index
    try:
        correct_idx = item['options'].index(item['correct_answer']) + 1
    except ValueError:
        results['missed']['total'] += 1
        results['q_label_breakdown'][label]['missed'] += 1
        results['questions'].append(detail)
        return

    pred, reasoning = predict_option(item)
    detail['reasoning'] = reasoning or "No response"
    if pred is None:
        results['missed']['total'] += 1
        results['q_label_breakdown'][label]['missed'] += 1
    else:
        detail['response'] = pred
        if pred == correct_idx:
            detail['result'] = 'correct'
            results['correct']['total'] += 1
            results['q_label_breakdown'][label]['correct'] += 1
        else:
            detail['result'] = 'incorrect'
            results['incorrect']['total'] += 1
            results['q_label_breakdown'][label]['incorrect'] += 1

    results['questions'].append(detail)

def generate_report(path, results):
    df_det = pd.DataFrame(results['questions'])
    total  = len(results['questions'])
    summary = {
        'Metric': ["Total","Correct","Incorrect","Missed","Accuracy","Time(s)"],
        'Value': [
            total,
            results['correct']['total'],
            results['incorrect']['total'],
            results['missed']['total'],
            f"{results['correct']['total']/total*100:.2f}%",
            f"{time.time()-results['start_time']:.2f}"
        ]
    }
    df_sum = pd.DataFrame(summary)
    breakdown=[]
    for lbl,st in sorted(results['q_label_breakdown'].items()):
        c,i,m = st['correct'],st['incorrect'],st['missed']
        tot   = c+i+m
        breakdown.append({
            'Label':lbl,
            'Correct':c,'Incorrect':i,'Missed':m,
            'Accuracy':f"{c/tot*100:.2f}%" if tot else "N/A"
        })
    df_bd = pd.DataFrame(breakdown)

    os.makedirs(os.path.dirname(path), exist_ok=True)
    with pd.ExcelWriter(path, engine='openpyxl') as w:
        df_det.to_excel(w, sheet_name='Details', index=False)
        df_sum.to_excel(w, sheet_name='Summary', index=False)
        df_bd.to_excel(w, sheet_name='Label Analysis', index=False)

def evaluate():
    print(" Loading data…")
    data = load_data(DATA_PATH)
    print(f"→ {len(data)} questions loaded.")

    results = init_results()
    for idx, item in enumerate(data,1):
        process_question(item, results)
        if idx%10==0:
            print(f"  • {idx}/{len(data)} done")

    out = OUTPUT_TEMPLATE.format(mode="long")
    generate_report(out, results)
    print(f" Report saved to {out}")
    return out

# run it
if __name__=="__main__":
    evaluate()


# Download Your Report

In [None]:
from google.colab import files
files.download("/content/English_evaluation_claud_longprompt.xlsx")
