In [None]:
import os
from getpass import getpass

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    os.environ["OPENAI_API_KEY"] = "your key"

from langchain_openai import ChatOpenAI
from datasets import load_dataset
from coolprompt.assistant import PromptTuner
from coolprompt.utils.prompt_freezer import split_prompt, merge_prompt


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
)


samsum = load_dataset("knkarthick/samsum")
dataset = samsum["train"]["dialogue"][:5]
targets = samsum["train"]["summary"][:5]

geval_steps = [
    "1. Compare the assistant's summary with the reference and list the key facts each one mentions.",
    "2. Check whether the assistant introduces any unsupported information or misses essential events.",
    "3. Decide if the assistant's summary is concise and coherent while covering all required details.",
]


In [3]:
start_prompt = "Summarize the text <freeze>in exactly 4 sentences.</freeze>"
optimizable, frozen = split_prompt(start_prompt)
print(f"Optimizable: {optimizable}")
print(f"Frozen: {frozen}")


[2025-12-20 15:34:05,994] [DEBUG] [prompt_freezer.split_prompt] - Found 1 frozen part(s). Frozen content: in exactly 4 sentences....


Optimizable: Summarize the text
Frozen: in exactly 4 sentences.


## HyPE


In [4]:
tuner = PromptTuner(target_model=model, system_model=model)

final_prompt = tuner.run(
    start_prompt=start_prompt,
    task="generation",
    dataset=dataset,
    target=targets,
    method="hype",
    metric="geval",
    geval_evaluation_steps=geval_steps,
    verbose=2,
)


[2025-12-20 15:34:06,019] [INFO] [assistant.__init__] - Validating the target model
[2025-12-20 15:34:06,020] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-12-20 15:34:06,021] [INFO] [assistant.run] - Validating args for PromptTuner running
[2025-12-20 15:34:06,022] [INFO] [evaluator.__init__] - Evaluator successfully initialized with geval metric


[2025-12-20 15:34:09,050] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-12-20 15:34:09,052] [INFO] [assistant.run] - Method: hype, Task: generation
[2025-12-20 15:34:09,053] [INFO] [assistant.run] - Metric: geval, Validation size: 0.25
[2025-12-20 15:34:09,054] [INFO] [assistant.run] - Dataset: 5 samples
[2025-12-20 15:34:09,055] [INFO] [assistant.run] - Target: 5 samples
[2025-12-20 15:34:09,056] [INFO] [hype.hype_optimizer] - Running HyPE optimization...
[2025-12-20 15:34:09,056] [DEBUG] [hype.hype_optimizer] - Start prompt:
Summarize the text <freeze>in exactly 4 sentences.</freeze>
[2025-12-20 15:34:09,056] [INFO] [hype.hype_optimizer] - Found 1 frozen part(s) in prompt
[2025-12-20 15:34:12,615] [INFO] [hype.hype_optimizer] - HyPE optimization completed
[2025-12-20 15:34:12,616] [DEBUG] [hype.hype_optimizer] - Raw HyPE output:
[PROMPT_START]Please provide a concise summary of the given text, ensuring that the response is clear and focused. The summary must cap

## Results


In [5]:
print(f"Initial: {start_prompt}")
print(f"Final: {final_prompt}")
print(f"Initial score: {tuner.init_metric:.4f}")
print(f"Final score: {tuner.final_metric:.4f}")
print(f"Frozen in final_prompt {frozen in final_prompt}")

Initial: Summarize the text <freeze>in exactly 4 sentences.</freeze>
Final: Please provide a concise summary of the given text, ensuring that the response is clear and focused. The summary must capture the main ideas without unnecessary elaboration and should be structured effectively. Importantly, the summary should be composed in exactly 4 sentences.
Initial score: 0.6000
Final score: 0.6500
Frozen in final_prompt True


In [6]:
tuner = PromptTuner(target_model=model, system_model=model)

final_prompt = tuner.run(
    start_prompt=start_prompt,
    task="generation",
    dataset=dataset,
    target=targets,
    method="reflective",
    metric="meteor",
    geval_evaluation_steps=geval_steps,
    verbose=2,
)

[2025-12-20 15:34:36,672] [INFO] [assistant.__init__] - Validating the target model
[2025-12-20 15:34:36,672] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-12-20 15:34:36,673] [INFO] [assistant.run] - Validating args for PromptTuner running


[nltk_data] Downloading package wordnet to /home/asd480/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/asd480/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/asd480/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[2025-12-20 15:34:38,107] [INFO] [evaluator.__init__] - Evaluator successfully initialized with meteor metric
[2025-12-20 15:34:41,391] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-12-20 15:34:41,395] [INFO] [assistant.run] - Method: reflective, Task: generation
[2025-12-20 15:34:41,399] [INFO] [assistant.run] - Metric: meteor, Validation size: 0.25
[2025-12-20 15:34:41,400] [INFO] [assistant.run] - Dataset: 5 samples
[2025-12-20 15:34:41,401] [INFO] [assistant.run] - Target: 5 samples
[2025-12-20 15:34:41,402] [INFO] [evoluter.__init__] - Found frozen parts in initial prompt. LLM will preserve them 

In [7]:
print(f"Initial: {start_prompt}")
print(f"Final: {final_prompt}")
print(f"Initial score: {tuner.init_metric:.4f}")
print(f"Final score: {tuner.final_metric:.4f}")
print(f"Frozen preserved: {frozen in final_prompt}")

Initial: Summarize the text <freeze>in exactly 4 sentences.</freeze>
Final: Please summarize the given text in exactly 4 sentences.
Initial score: 0.3596
Final score: 0.3843
Frozen preserved: True


## DistillPrompt


In [8]:
tuner = PromptTuner(target_model=model, system_model=model)

final_prompt_distill = tuner.run(
    start_prompt=start_prompt,
    task="generation",
    dataset=dataset,
    target=targets,
    method="distill",
    metric="meteor",
    num_epochs=3,
    verbose=1,
)

_, frozen_check = split_prompt(start_prompt)

print(f"Initial: {start_prompt}")
print(f"Final: {final_prompt_distill}")
print(f"Initial score: {tuner.init_metric:.4f}")
print(f"Final score: {tuner.final_metric:.4f}")
print(f"Frozen preserved: {frozen_check in final_prompt_distill}")


[2025-12-20 15:41:22,740] [INFO] [assistant.__init__] - Validating the target model
[2025-12-20 15:41:22,741] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-12-20 15:41:22,742] [INFO] [assistant.run] - Validating args for PromptTuner running
[nltk_data] Downloading package wordnet to /home/asd480/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/asd480/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/asd480/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[2025-12-20 15:41:23,917] [INFO] [evaluator.__init__] - Evaluator successfully initialized with meteor metric
[2025-12-20 15:41:27,129] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-12-20 15:41:27,130] [INFO] [assistant.run] - Method: distill, Task: generation
[2025-12-20 15:41:27,130] [INFO] [assistant.run] - Metric: meteor, Validation s

Initial: Summarize the text <freeze>in exactly 4 sentences.</freeze>
Final: Condense the text while preserving essential details and organization in exactly 4 sentences. Ensure all key information is maintained and the structure remains clear, focusing on brevity and coherence without unnecessary elaboration. Do not alter any frozen fragments and include each one only once, exactly as written.
Initial score: 0.5205
Final score: 0.3964
Frozen preserved: True


## Muliple freeze tags


In [9]:
multi_freeze_prompt = "Summarize the text <freeze>in exactly 4 sentences</freeze>. <freeze>Start your response with 'Summary:' prefix.</freeze>"
optimizable_multi, frozen_multi = split_prompt(multi_freeze_prompt)
print(f"Optimizable: {optimizable_multi}")
print(f"Frozen: {frozen_multi}")


Optimizable: Summarize the text .
Frozen: in exactly 4 sentences Start your response with 'Summary:' prefix.


In [10]:
tuner = PromptTuner(target_model=model, system_model=model)

final_prompt_multi = tuner.run(
    start_prompt=multi_freeze_prompt,
    task="generation",
    dataset=dataset,
    target=targets,
    method="hype",
    metric="geval",
    geval_evaluation_steps=geval_steps,
    verbose=1,
)

print(f"Initial: {multi_freeze_prompt}")
print(f"Final: {final_prompt_multi}")

[2025-12-20 15:43:51,639] [INFO] [assistant.__init__] - Validating the target model
[2025-12-20 15:43:51,640] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-12-20 15:43:51,640] [INFO] [assistant.run] - Validating args for PromptTuner running
[2025-12-20 15:43:51,641] [INFO] [evaluator.__init__] - Evaluator successfully initialized with geval metric
[2025-12-20 15:43:54,177] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-12-20 15:43:54,178] [INFO] [assistant.run] - Method: hype, Task: generation
[2025-12-20 15:43:54,178] [INFO] [assistant.run] - Metric: geval, Validation size: 0.25
[2025-12-20 15:43:54,179] [INFO] [assistant.run] - Dataset: 5 samples
[2025-12-20 15:43:54,179] [INFO] [assistant.run] - Target: 5 samples
[2025-12-20 15:43:54,180] [INFO] [hype.hype_optimizer] - Running HyPE optimization...
[2025-12-20 15:43:54,180] [INFO] [hype.hype_optimizer] - Found 2 frozen part(s) in prompt
[2025-12-20 15:43:55,485] [INFO] [hype.hype_optimi

Initial: Summarize the text <freeze>in exactly 4 sentences</freeze>. <freeze>Start your response with 'Summary:' prefix.</freeze>
Final: Please provide a concise summary of the text, ensuring that the response is structured in exactly 4 sentences. Additionally, Start your response with 'Summary:' prefix.


In [11]:
tuner = PromptTuner(target_model=model, system_model=model)

final_prompt_multi = tuner.run(
    start_prompt=multi_freeze_prompt,
    task="generation",
    dataset=dataset,
    target=targets,
    method="distill",
    metric="meteor",
    geval_evaluation_steps=geval_steps,
    verbose=1,
)

print(f"Initial: {multi_freeze_prompt}")
print(f"Final: {final_prompt_multi}")

[2025-12-20 15:44:15,587] [INFO] [assistant.__init__] - Validating the target model
[2025-12-20 15:44:15,587] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-12-20 15:44:15,588] [INFO] [assistant.run] - Validating args for PromptTuner running
[nltk_data] Downloading package wordnet to /home/asd480/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/asd480/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/asd480/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[2025-12-20 15:44:16,577] [INFO] [evaluator.__init__] - Evaluator successfully initialized with meteor metric
[2025-12-20 15:44:18,604] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-12-20 15:44:18,604] [INFO] [assistant.run] - Method: distill, Task: generation
[2025-12-20 15:44:18,605] [INFO] [assistant.run] - Metric: meteor, Validation s

Initial: Summarize the text <freeze>in exactly 4 sentences</freeze>. <freeze>Start your response with 'Summary:' prefix.</freeze>
Final: Summarize the text in exactly 4 sentences. Start your response with 'Summary:' prefix.


## Validation


In [12]:
from coolprompt.utils.prompt_freezer import validate_freeze_tags

test1 = "Summarize the text <freeze>in exactly 3 sentences</freeze>"
test2 = "Summarize the text <freeze>in english</freeze> and make it <freeze>concise</freeze>"
test3 = "Summarize the text <freeze>in 3 sentences"
test4 = "Summarize the text <freeze>in english</freeze></freeze>"
test5 = "Summarize the text <freeze>in english</freeze> <freeze>concise"

tests = [
    (test1, True),
    (test2, True),
    (test3, False),
    (test4, False),
    (test5, False),
]

for test, should_pass in tests:
    try:
        validate_freeze_tags(test)
        result = "PASS" if should_pass else "FAIL"
    except ValueError:
        result = "PASS" if not should_pass else "FAIL"
    print(f"{result}: {test}")


PASS: Summarize the text <freeze>in exactly 3 sentences</freeze>
PASS: Summarize the text <freeze>in english</freeze> and make it <freeze>concise</freeze>
PASS: Summarize the text <freeze>in 3 sentences
PASS: Summarize the text <freeze>in english</freeze></freeze>
PASS: Summarize the text <freeze>in english</freeze> <freeze>concise


In [13]:
bad_prompt = "Text <freeze>frozen"
try:
    split_prompt(bad_prompt)
    print("FAIL: split_prompt should raise ValueError")
except ValueError as e:
    print(f"PASS: {e}")


PASS: Found 1 opening tags <freeze> but 0 closing tags </freeze>. Each opening tag must have closing tag.
