# 🤖 Claude Haiku Fine-Tune Demo

In [None]:
import json

# Load evaluation samples
with open("../data/eval_data.jsonl", "r") as f:
    eval_samples = [json.loads(line) for line in f]

# Preview
for i, sample in enumerate(eval_samples):
    print(f"Sample {i+1}:")
    print("Prompt:", sample["prompt"])
    print("Expected:", sample["expected"])
    print("-" * 50)


In [None]:
# Simulate predictions from base vs fine-tuned Haiku
def mock_model_predict(prompt, model="base"):
    if "diagnosis" in prompt.lower():
        return "Post-traumatic stress disorder" if model == "finetuned" else "stress"
    if "claim value" in prompt.lower():
        return "$10,000 CAD" if model == "finetuned" else "$7,500"

# Compare predictions
for i, sample in enumerate(eval_samples):
    base_pred = mock_model_predict(sample["prompt"], model="base")
    fine_pred = mock_model_predict(sample["prompt"], model="finetuned")
    print(f"Prompt {i+1}: {sample['prompt']}")
    print("Expected:", sample["expected"])
    print("Base Model:", base_pred)
    print("Fine-Tuned:", fine_pred)
    print("=" * 60)


In [None]:
from sklearn.metrics import accuracy_score

expected = [sample["expected"] for sample in eval_samples]
base_preds = [mock_model_predict(s["prompt"], "base") for s in eval_samples]
fine_preds = [mock_model_predict(s["prompt"], "finetuned") for s in eval_samples]

base_acc = accuracy_score(expected, base_preds)
fine_acc = accuracy_score(expected, fine_preds)

print("📊 Accuracy Comparison:")
print(f"Base Model Accuracy: {base_acc:.2f}")
print(f"Fine-Tuned Accuracy: {fine_acc:.2f}")
