# Demo

This demo shows how to perform flexible and customizable evaluation tasks. 

In [None]:
from eval.benchs import ExampleQAEvaluator, get_all_evaluator_classes, load_evaluator
from eval.llms import HuggingFace, OpenAIAPI

## 1. Model Loading

In [None]:
gpt4o = OpenAIAPI(
    model_name="gpt4o",
    api_key="your_api_key"
)

api_model = OpenAIAPI(
    model_name="Qwen/Qwen2-1.5B-Instruct",
    api_key="your_api_key",  
    base_url="https://api.siliconflow.cn/v1",
)  # Note: SiliconFlow provides free API keys for many models.

hf_model = HuggingFace(
    model_name_or_path="Qwen/Qwen2-0.5B-Instruct",
    apply_chat_template=True,
    system_message="You are a helpful assistant.",
)

## 2. Evaluator Loading and Evaluation

In [None]:
# Method 1: Directly Use The Evaluator Class
evaluator = ExampleQAEvaluator(api_model, num_batches=2)
evaluator.evaluate()

# Method 2: Load The Evaluator Class By Its Name
evaluator_class = load_evaluator("UHGSelectiveEvaluator")
evaluator_from_str = evaluator_class(api_model)
evaluator_from_str.evaluate()

# Method 3: Iterate All Evaluator Classes To Batch Evaluate
for evaluator_class in get_all_evaluator_classes():
    evaluator_item = evaluator_class(api_model)
    evaluator_item.evaluate()