In [None]:
import os
from getpass import getpass

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    os.environ["OPENAI_API_KEY"] = "your key"

from langchain_openai import ChatOpenAI
from datasets import load_dataset
from coolprompt.assistant import PromptTuner
from coolprompt.utils.prompt_freezer import split_prompt, merge_prompt


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
)


samsum = load_dataset("knkarthick/samsum")
dataset = samsum["train"]["dialogue"][:5]
targets = samsum["train"]["summary"][:5]

geval_steps = [
    "1. Compare the assistant's summary with the reference and list the key facts each one mentions.",
    "2. Check whether the assistant introduces any unsupported information or misses essential events.",
    "3. Decide if the assistant's summary is concise and coherent while covering all required details.",
]


In [4]:
start_prompt = "Summarize the text <freeze>in exactly 4 sentences.</freeze>"
optimizable, frozen = split_prompt(start_prompt)
print(f"Optimizable: {optimizable}")
print(f"Frozen: {frozen}")


[2025-12-07 21:59:14,642] [DEBUG] [prompt_freezer.split_prompt] - Found 1 frozen part(s). Frozen content: in exactly 4 sentences....


Optimizable: Summarize the text
Frozen: in exactly 4 sentences.


## HyPE


In [5]:
tuner = PromptTuner(target_model=model, system_model=model)

final_prompt = tuner.run(
    start_prompt=start_prompt,
    task="generation",
    dataset=dataset,
    target=targets,
    method="hype",
    metric="geval",
    geval_evaluation_steps=geval_steps,
    verbose=2,
)


[2025-12-07 21:59:16,169] [INFO] [assistant.__init__] - Validating the target model
[2025-12-07 21:59:16,170] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-12-07 21:59:16,171] [INFO] [assistant.run] - Validating args for PromptTuner running
[2025-12-07 21:59:16,172] [INFO] [evaluator.__init__] - Evaluator successfully initialized with geval metric
[2025-12-07 21:59:27,024] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-12-07 21:59:27,025] [INFO] [assistant.run] - Method: hype, Task: generation
[2025-12-07 21:59:27,025] [INFO] [assistant.run] - Metric: geval, Validation size: 0.25
[2025-12-07 21:59:27,025] [INFO] [assistant.run] - Dataset: 5 samples
[2025-12-07 21:59:27,026] [INFO] [assistant.run] - Target: 5 samples
[2025-12-07 21:59:27,026] [INFO] [hype.hype_optimizer] - Running HyPE optimization...
[2025-12-07 21:59:27,027] [DEBUG] [hype.hype_optimizer] - Start prompt:
Summarize the text <freeze>in exactly 4 sentences.</freeze>
[2025-12

## Results


In [6]:
print(f"Initial: {start_prompt}")
print(f"Final: {final_prompt}")
print(f"Initial score: {tuner.init_metric:.4f}")
print(f"Final score: {tuner.final_metric:.4f}")
print(f"Frozen in final_prompt {frozen in final_prompt}")

Initial: Summarize the text <freeze>in exactly 4 sentences.</freeze>
Final: You are an expert system for **automatic text summarization**. Read the following input passage carefully and produce a **concise, coherent summary** that preserves the passage’s key ideas, main arguments, and most important supporting points while omitting minor details, examples, and repetitions. Write the summary in clear, neutral prose that is understandable to a general audience, maintaining the original meaning without adding new information, opinions, or external knowledge. Ensure the output is a **fixed-length extractive–abstractive summary** that fully covers the core content of the source passage, with each sentence flowing logically to the next and no bullet points, headings, or meta-commentary. in exactly 4 sentences.
Initial score: 0.9000
Final score: 0.8500
Frozen in final_prompt True


In [16]:
tuner = PromptTuner(target_model=model, system_model=model)

final_prompt = tuner.run(
    start_prompt=start_prompt,
    task="generation",
    dataset=dataset,
    target=targets,
    method="reflective",
    metric="meteor",
    geval_evaluation_steps=geval_steps,
    verbose=2,
)

[2025-12-07 18:07:19,974] [INFO] [assistant.__init__] - Validating the target model
[2025-12-07 18:07:19,975] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-12-07 18:07:19,977] [INFO] [assistant.run] - Validating args for PromptTuner running
[nltk_data] Downloading package wordnet to /home/asd480/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/asd480/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/asd480/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[2025-12-07 18:07:21,233] [INFO] [evaluator.__init__] - Evaluator successfully initialized with meteor metric
[2025-12-07 18:07:23,129] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-12-07 18:07:23,131] [INFO] [assistant.run] - Method: reflective, Task: generation
[2025-12-07 18:07:23,132] [INFO] [assistant.run] - Metric: meteor, Validatio

In [17]:
print(f"Initial: {start_prompt}")
print(f"Final: {final_prompt}")
print(f"Initial score: {tuner.init_metric:.4f}")
print(f"Final score: {tuner.final_metric:.4f}")
print(f"Frozen preserved: {frozen in final_prompt}")

Initial: Summarize the text <freeze>in exactly 4 sentences.</freeze>
Final: Summarize the text concisely and accurately, ensuring key points are effectively captured using active voice and specific language for concise summarization. in exactly 4 sentences.
Initial score: 0.4900
Final score: 0.3316
Frozen preserved: True


## DistillPrompt


In [17]:
tuner = PromptTuner(target_model=model, system_model=model)

final_prompt_distill = tuner.run(
    start_prompt=start_prompt,
    task="generation",
    dataset=dataset,
    target=targets,
    method="distill",
    metric="meteor",
    num_epochs=3,
    verbose=1,
)

_, frozen_check = split_prompt(start_prompt)

print(f"Initial: {start_prompt}")
print(f"Final: {final_prompt_distill}")
print(f"Initial score: {tuner.init_metric:.4f}")
print(f"Final score: {tuner.final_metric:.4f}")
print(f"Frozen preserved: {frozen_check in final_prompt_distill}")


[2025-12-07 18:46:48,592] [INFO] [assistant.__init__] - Validating the target model
[2025-12-07 18:46:48,593] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-12-07 18:46:48,596] [INFO] [assistant.run] - Validating args for PromptTuner running
[nltk_data] Downloading package wordnet to /home/asd480/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/asd480/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/asd480/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[2025-12-07 18:46:49,836] [INFO] [evaluator.__init__] - Evaluator successfully initialized with meteor metric
[2025-12-07 18:46:53,544] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-12-07 18:46:53,544] [INFO] [assistant.run] - Method: distill, Task: generation
[2025-12-07 18:46:53,545] [INFO] [assistant.run] - Metric: meteor, Validation s

Initial: Summarize the text <freeze>in exactly 4 sentences.</freeze>
Final: Summarize the text by clearly identifying the main ideas and key points. Your summary should capture the essence of the content in a concise manner. in exactly 4 sentences.
Initial score: 0.3189
Final score: 0.3620
Frozen preserved: True


## Muliple freeze tags


In [20]:
multi_freeze_prompt = "Summarize the text <freeze>in exactly 4 sentences</freeze>. <freeze>Start your response with 'Summary:' prefix.</freeze>"
optimizable_multi, frozen_multi = split_prompt(multi_freeze_prompt)
print(f"Optimizable: {optimizable_multi}")
print(f"Frozen: {frozen_multi}")


Optimizable: Summarize the text .
Frozen: in exactly 4 sentences Start your response with 'Summary:' prefix.


In [23]:
tuner = PromptTuner(target_model=model, system_model=model)

final_prompt_multi = tuner.run(
    start_prompt=multi_freeze_prompt,
    task="generation",
    dataset=dataset,
    target=targets,
    method="hype",
    metric="geval",
    geval_evaluation_steps=geval_steps,
    verbose=1,
)

print(f"Initial: {multi_freeze_prompt}")
print(f"Final: {final_prompt_multi}")

[2025-12-07 18:57:02,561] [INFO] [assistant.__init__] - Validating the target model
[2025-12-07 18:57:02,562] [INFO] [assistant.__init__] - PromptTuner successfully initialized
[2025-12-07 18:57:02,563] [INFO] [assistant.run] - Validating args for PromptTuner running
[2025-12-07 18:57:02,564] [INFO] [evaluator.__init__] - Evaluator successfully initialized with geval metric
[2025-12-07 18:57:04,664] [INFO] [assistant.run] - === Starting Prompt Optimization ===
[2025-12-07 18:57:04,664] [INFO] [assistant.run] - Method: hype, Task: generation
[2025-12-07 18:57:04,665] [INFO] [assistant.run] - Metric: geval, Validation size: 0.25
[2025-12-07 18:57:04,665] [INFO] [assistant.run] - Dataset: 5 samples
[2025-12-07 18:57:04,666] [INFO] [assistant.run] - Target: 5 samples
[2025-12-07 18:57:04,666] [INFO] [hype.hype_optimizer] - Running HyPE optimization...
[2025-12-07 18:57:04,666] [INFO] [hype.hype_optimizer] - Found frozen parts in prompt. Optimizing only optimizable part.
[2025-12-07 18:57:0

Initial: Summarize the text <freeze>in exactly 4 sentences</freeze>. <freeze>Start your response with 'Summary:' prefix.</freeze>
Final: Please provide a concise summary of the given text, ensuring that it captures the main ideas clearly and succinctly. Focus on distilling the essential points without unnecessary details, and limit your response to four sentences. Additionally, make sure to start your summary with the prefix 'Summary:' to clearly indicate the beginning of the summary.  in exactly 4 sentences Start your response with 'Summary:' prefix.


## Validation


In [24]:
from coolprompt.utils.prompt_freezer import validate_freeze_tags

test1 = "Summarize the text <freeze>in exactly 3 sentences</freeze>"
test2 = "Summarize the text <freeze>in english</freeze> and make it <freeze>concise</freeze>"
test3 = "Summarize the text <freeze>in 3 sentences"
test4 = "Summarize the text <freeze>in english</freeze></freeze>"
test5 = "Summarize the text <freeze>in english</freeze> <freeze>concise"

tests = [
    (test1, True),
    (test2, True),
    (test3, False),
    (test4, False),
    (test5, False),
]

for test, should_pass in tests:
    try:
        validate_freeze_tags(test)
        result = "PASS" if should_pass else "FAIL"
    except ValueError:
        result = "PASS" if not should_pass else "FAIL"
    print(f"{result}: {test}")


PASS: Summarize the text <freeze>in exactly 3 sentences</freeze>
PASS: Summarize the text <freeze>in english</freeze> and make it <freeze>concise</freeze>
PASS: Summarize the text <freeze>in 3 sentences
PASS: Summarize the text <freeze>in english</freeze></freeze>
PASS: Summarize the text <freeze>in english</freeze> <freeze>concise


In [25]:
bad_prompt = "Text <freeze>frozen"
try:
    split_prompt(bad_prompt)
    print("FAIL: split_prompt should raise ValueError")
except ValueError as e:
    print(f"PASS: {e}")


PASS: Mismatched freeze tags: found 1 opening tags <freeze> but 0 closing tags </freeze>. Each opening tag must have closing tag.
