In [1]:
from datasets import load_dataset
from coolprompt import PromptTuner
from langchain_ollama import ChatOllama

target_model = ChatOllama(model="qwen2.5:1.5b-instruct")
system_model = ChatOllama(model="qwen2.5:3b-instruct")

tuner = PromptTuner(target_model=target_model, system_model=system_model)


samsum = load_dataset("knkarthick/samsum")
dataset = samsum["train"]["dialogue"][:40]
targets = samsum["train"]["summary"][:40]

result = tuner.run(
    start_prompt="Summarize the text",
    task="generation",
    dataset=dataset,
    target=targets,
    method="hype",
    metric="geval",
    geval_criteria="relevance",
    generate_num_samples=20,
    verbose=2,
)

result2 = tuner.run(
    start_prompt="Summarize the text",
    task="generation",
    dataset=dataset,
    target=targets,
    method="hype",
    metric="geval",
    geval_criteria=["relevance"],
    generate_num_samples=20,
    verbose=2,
)

custom = {
    "creativity": """You will be given a response to a question.
    Rate the creativity on a scale from 1 to {metric_ceil}.
    Return ONLY a single number.

    Source: {request}
    Response: {response}

    Creativity score (number only):"""
}

result3 = tuner.run(
    start_prompt="",
    task="generation",
    dataset=dataset,
    target=targets,
    method="hype",
    metric="geval",
    geval_criteria=["creativity"],
    geval_custom_templates=custom,
    generate_num_samples=20,
    verbose=2,
)

[2025-11-08 17:16:46,941] [INFO] [assistant.__init__] - Validating the target model
[2025-11-08 17:16:46,942] [INFO] [assistant.__init__] - Validating the system model
[2025-11-08 17:16:46,942] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-11-08 17:16:46,942] [INFO] [assistant.__init__] - Validating the system model
[2025-11-08 17:16:46,942] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-11-08 17:16:50,461] [INFO] [assistant.run] - Validating args for PromptTuner running
[2025-11-08 17:16:50,462] [INFO] [evaluator.__init__] - Evaluator successfully initialized with geval metric
[2025-11-08 17:16:50,461] [INFO] [assistant.run] - Validating args for PromptTuner running
[2025-11-08 17:16:50,462] [INFO] [evaluator.__init__] - Evaluator successfully initialized with geval metric
[2025-11-08 17:16:58,111] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-11-08 17:16:58,112] [INFO] [assistant.run] - Method: hype, Task: gen

In [5]:
result2 = tuner.run(
    start_prompt="Summarize the text",
    task="generation",
    dataset=dataset,
    target=targets,
    method="hype",
    metric="meteor",
    generate_num_samples=20,
    verbose=2
)

[2025-11-08 12:16:00,756] [INFO] [assistant.run] - Validating args for PromptTuner running
[nltk_data] Downloading package wordnet to /home/asd480/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/asd480/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/asd480/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[2025-11-08 12:16:02,816] [INFO] [evaluator.__init__] - Evaluator successfully initialized with meteor metric
[2025-11-08 12:16:05,406] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-11-08 12:16:05,407] [INFO] [assistant.run] - Method: hype, Task: generation
[2025-11-08 12:16:05,407] [INFO] [assistant.run] - Metric: meteor, Validation size: 0.25
[2025-11-08 12:16:05,408] [INFO] [assistant.run] - Dataset: 30 samples
[2025-11-08 12:16:05,408] [INFO] [assistant.run] - Target: 30 samples
[2025-11-08 12:16:05,409] [IN