In [None]:
# ================================================================
# ü¶ô Zero-Shot Llama 3 with V1 Prompts 
# ================================================================

import os
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
from sklearn.metrics import accuracy_score, f1_score, cohen_kappa_score, classification_report
from tqdm import tqdm
from huggingface_hub import login


In [None]:

# ------------------------------------------------
# 1Ô∏è‚É£ Setup & Login
# ------------------------------------------------
# ‚ö†Ô∏è PASTE YOUR HUGGING FACE TOKEN HERE
# Accept license first: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
HF_TOKEN = "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" 
login(token=HF_TOKEN)



In [None]:
# ------------------------------------------------
# 1Ô∏è‚É£ Paths and Device
# ------------------------------------------------
# Update these paths to match your actual file locations
test_path = r"E:\Shahnawaz Qureshi\NTNU Bloom-Project\Bloom-project\CLO_Classification\data\test.csv"
save_dir  = r"E:\Shahnawaz Qureshi\NTNU Bloom-Project\Bloom-project\CLO_Classification\models\bart_zero_shot"

os.makedirs(save_dir, exist_ok=True)

# Note: The pipeline handles device placement automatically, 
# but we define this variable just in case we need it for manual checks.
device_id = 0 if torch.cuda.is_available() else -1
print(f"Using device ID: {device_id} (0=GPU, -1=CPU)")

In [None]:

# ------------------------------------------------
# 2Ô∏è‚É£ Load Llama 3 (4-Bit for Speed/Memory)
# ------------------------------------------------
print("üöÄ Loading Llama 3...")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
tokenizer.pad_token = tokenizer.eos_token 

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    token=HF_TOKEN
)

# Text Generation Pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=15, # Keep it short (just the label)
    do_sample=True,
    temperature=0.01,  # Nearly deterministic
    return_full_text=False
)

In [None]:


# ------------------------------------------------
# 3Ô∏è‚É£ V1 Prompt Library (Your Lists)
# ------------------------------------------------
PROMPTS_DIRECT = [
    "Classify the following learning objective according to Bloom‚Äôs Taxonomy level (Remember, Understand, Apply, Analyze, Evaluate, Create): {text}",
    "Identify which Bloom‚Äôs taxonomy category best represents the learning objective below: {text}",
    "Determine the Bloom‚Äôs cognitive level that this learning objective belongs to: {text}",
]
PROMPTS_CONTEXTUAL = [
    "You are an education expert evaluating learning objectives. Based on Bloom‚Äôs taxonomy, decide which cognitive level (Remember, Understand, Apply, Analyze, Evaluate, Create) best describes this statement: {text}",
    "As a teacher reviewing course outcomes, identify the Bloom‚Äôs taxonomy level demonstrated in this learning objective: {text}",
    "You are an educational researcher mapping outcomes to Bloom‚Äôs levels. Which category does this belong to? {text}",
]
PROMPTS_VERB = [
    "Bloom‚Äôs taxonomy associates action verbs with cognitive levels. Determine the correct Bloom level for the following learning objective, based on its main verb and meaning: {text}",
    "Analyze the main verb in this learning objective and identify which Bloom‚Äôs level it represents: {text}",
    "Considering verbs like define, explain, apply, analyze, evaluate, and create, classify this learning objective: {text}",
]
PROMPTS_REFLECTIVE = [
    "Explain briefly what type of thinking this learning objective requires (e.g., recall, comprehension, application, critical analysis, evaluation, creativity), then state its Bloom‚Äôs taxonomy level: {text}",
    "Think like an instructor. Describe what mental process this objective involves, and then choose the Bloom‚Äôs taxonomy level: {text}",
    "Reflect on the cognitive process behind this statement and select the Bloom‚Äôs taxonomy category (Remember, Understand, Apply, Analyze, Evaluate, Create): {text}",
]
PROMPTS_DOMAIN = [
    "In the context of computer science education, identify the Bloom‚Äôs taxonomy level of this learning objective: {text}",
    "In business and management studies, determine the Bloom‚Äôs taxonomy category that best matches this outcome: {text}",
    "For engineering and technical courses, classify this learning objective according to Bloom‚Äôs taxonomy: {text}",
]

PROMPT_LIBRARY = {
    "direct": PROMPTS_DIRECT,
    "contextual": PROMPTS_CONTEXTUAL,
    "verb_focused": PROMPTS_VERB,
    "reflective": PROMPTS_REFLECTIVE,
    "domain_specific": PROMPTS_DOMAIN
}

label_cols = ['Remember', 'Understand', 'Apply', 'Analyze', 'Evaluate', 'Create']


In [None]:

# ------------------------------------------------
# 4Ô∏è‚É£ The Evaluation Loop
# ------------------------------------------------
# Note: If running on Kaggle without uploaded data, create a dummy csv to test first!
if os.path.exists(test_path):
    test_df = pd.read_csv(test_path)
else:
    print("‚ö†Ô∏è WARNING: DATASET NOT FOUND. Update 'test_path' variable.")
    test_df = pd.DataFrame() # Empty to prevent crash if path is wrong

results_summary = []

for style, prompts_list in PROMPT_LIBRARY.items():
    print(f"\n{'='*20} Testing Style: {style.upper()} {'='*20}")
    
    preds, trues = [], []

    for i, row in tqdm(test_df.iterrows(), total=len(test_df)):
        lo_text = str(row["Learning_outcome"])
        
        # Ground Truth
        gold_labels = [col for col in label_cols if row.get(col) == 1]
        true_label = gold_labels[0] if gold_labels else "unknown"

        # --- PREPARE PROMPT ---
        # 1. Rotate through your V1 list
        template = prompts_list[i % len(prompts_list)]
        
        # 2. Insert the text into your V1 template
        user_content = template.format(text=lo_text)
        
        # 3. Format for Llama Chat
        # We add a System instruction to force the model to be brief.
        messages = [
            {"role": "system", "content": "You are a classifier. Answer with ONLY the category name from Bloom's Taxonomy. Do not explain."},
            {"role": "user", "content": user_content}
        ]
        
        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

        # --- GENERATE ---
        outputs = generator(prompt)
        generated_text = outputs[0]['generated_text'].strip()
        
        # --- CLEAN OUTPUT ---
        # Llama might say "The answer is Create." -> We extract "Create"
        pred_label = "unknown"
        for label in label_cols:
            if label.lower() in generated_text.lower():
                pred_label = label
                # Prioritize exact matches (optional logic can be added here)
                break 
        
        preds.append(pred_label)
        trues.append(true_label)

    # --- METRICS ---
    acc = accuracy_score(trues, preds)
    f1 = f1_score(trues, preds, average='weighted', zero_division=0)
    
    print(f"   >>> Accuracy: {acc:.4f} | F1: {f1:.4f}")
    
    # Detailed Table
    report = classification_report(trues, preds, labels=label_cols, output_dict=True, zero_division=0)
    per_level_df = pd.DataFrame(report).transpose()
    per_level_df = per_level_df.loc[label_cols, ['precision', 'recall', 'f1-score', 'support']]
    display(per_level_df)

    results_summary.append({
        "Model": "Llama-3-8B",
        "Prompt_Style": style,
        "Accuracy": acc,
        "F1_Score": f1
    })


In [None]:

# ------------------------------------------------
# 5Ô∏è‚É£ Final Report
# ------------------------------------------------
df_results = pd.DataFrame(results_summary).sort_values(by="Accuracy", ascending=False)
print("\nüèÜ FINAL LLAMA REPORT")
display(df_results)
df_results.to_csv(os.path.join(save_dir, "llama_v1_results.csv"), index=False)