### Prompt Engineering

You can easily a/b test prompts in Sammo by swapping out sections of the metaprompt.

In [58]:
import sammo
from sammo.runners import OpenAIChat
from sammo.components import Output
from sammo.data import DataTable
from sammo.base import EvaluationScore
import os
import pandas as pd

_ = sammo.setup_logger("WARNING")  # we're only interested in warnings for now

runner = OpenAIChat(
    model_id="gpt-4o-mini",
    api_config={"api_key": os.environ["OPENAI_API_KEY"]},
    cache=os.getenv("CACHE_FILE", "cache.tsv"),
    timeout=30,
)

def load_data():
    df = pd.read_csv("transaction_data_with_classifications.csv")
    mydata = DataTable.from_pandas(df, input_fields="description", output_fields="classification", constants={"instructions": "Determine how to classify these transactions."})
    return mydata

def accuracy(y_true: DataTable, y_pred: DataTable) -> EvaluationScore:
    y_true = y_true.outputs.values
    y_pred = y_pred.outputs.values
    n_correct = sum([y_p == y_t for y_p, y_t in zip(y_pred, y_true)])
    return EvaluationScore(n_correct / len(y_true))

from sammo.instructions import MetaPrompt, Section, Paragraph, InputData, FewshotExamples
from sammo.dataformatters import (
    QuestionAnswerFormatter,
)

mydata = load_data()
sample = mydata.sample(20, seed=42)

labels = ["Rent", "Bills", "Other", "Food", "Entertainment", "Utilities", "Salary", "Taxes", "Insurance", "Unknown"]

mprompt = MetaPrompt(
    [
        Section("Instructions", mydata.constants["instructions"]),
        Section("Examples", FewshotExamples(mydata.sample(3, seed=43))),
        Paragraph(f"\nOutput labels: {', '.join(labels)}"),
        Paragraph(InputData()),
    ],
    render_as="markdown",
    data_formatter=QuestionAnswerFormatter(labels),
)
# automatically wraps it with the right parser component
mprompt_parsed = mprompt.with_extractor("empty_result")

result = Output(mprompt_parsed, minibatch_size=5, on_error="empty_result").run(
    runner, sample
)
result[:5]


minibatches[################################################################################]4/4[00:00<00:00, 1423.23it/s]


+-----------------------------------+----------+
| input                             | output   |
| cash withdrawal for holiday       | Other    |
+-----------------------------------+----------+
| mortgage payment                  | Rent     |
+-----------------------------------+----------+
| ATM withdrawal downtown           | Other    |
+-----------------------------------+----------+
| cash withdrawal for holiday       | Other    |
+-----------------------------------+----------+
| refund for cancelled subscription | Other    |
+-----------------------------------+----------+
Constants: {'instructions': 'Determine how to classify these transac...

In [59]:
accuracy(result, sample)

name    value
------  -------
score   0.85

In [20]:
from sammo.search import EnumerativeSearch
from sammo.search_op import one_of
from sammo.components import Output


def labeling_prompt_space():
    instructions = one_of(
        [
            "Determine how to classify these transactions.",
            "Determine how to classify these transactions. THIS IS VERY IMPORTANT TO MY CAREER.",
        ]
    )
    
    mprompt = MetaPrompt(
        [
            Section("Instructions", instructions),
            Section("Examples", FewshotExamples(mydata.sample(3, seed=43))),
            Paragraph(f"\nOutput labels: {', '.join(labels)}"),
            Paragraph(InputData()),
        ],
        render_as="markdown",
        data_formatter=QuestionAnswerFormatter(labels),
    )
    # automatically wraps it with the right parser component
    mprompt_parsed = mprompt.with_extractor("empty_result")

    return Output(mprompt_parsed, minibatch_size=5, on_error="empty_result")


In [21]:
sample = mydata.sample(25, seed=42)
searcher = EnumerativeSearch(runner, labeling_prompt_space, accuracy)
y_pred = searcher.fit_transform(sample)
searcher.show_report()

candidate[###################################]2/2[00:02<00:00] >> minibatches (total)[#######################]10/10[00:02<00:00]

Fitting log (2 entries):
iteration    action                                            objective    costs                          parse_errors
-----------  ------------------------------------------------  -----------  -----------------------------  --------------
0            {'decision_0': "'Determine how to classify these  0.8          {'input': 661, 'output': 150}  0.0
             transactions.'"}
1            {'decision_0': "'Determine how to classify these  0.84         {'input': 706, 'output': 149}  0.0
             transactions. THIS IS VERY IMPORTANT TO MY
             CAREER.'"}


### Prompt Optimization

Sammo also provides tools for optimizing prompts automatically.

In [63]:
from sammo.dataformatters import PlainFormatter

class InititialCandidates:
    def __init__(self, dtrain):
        self.dtrain = dtrain

    def __call__(self):
        example_formatter = PlainFormatter(
            all_labels=self.dtrain.outputs.unique(), orient="item"
        )

        labels = self.dtrain.outputs.unique()
        instructions = MetaPrompt(
            [
                Paragraph("Instructions: "),
                Paragraph(
                    one_of(
                        [
                            self.dtrain.constants["instructions"],
                            "",
                            "Find the best output label given the input.",
                            self.dtrain.constants["instructions"] * 2,
                        ]
                    )
                ),
                Paragraph("\n"),
                Paragraph(
                    f"Output labels: {', '.join(labels)}\n" if len(labels) <= 10 else ""
                ),
                Paragraph(InputData()),
                Paragraph("Output: "),
            ],
            render_as="raw",
            data_formatter=example_formatter,
        )

        return Output(
            instructions.with_extractor("raise"),
            minibatch_size=1,
            on_error="empty_result",
        )

In [64]:
from sammo.mutators import BagOfMutators, InduceInstructions, Paraphrase

mydata = load_data()
d_train = mydata.sample(10, seed=4)

mutation_operators = BagOfMutators(
    InititialCandidates(d_train),
    InduceInstructions("#instructions", d_train),
    Paraphrase("#instructions"),
    sample_for_init_candidates=False,
)

In [65]:
from sammo.search import BeamSearch

prompt_optimizer = BeamSearch(
            runner,
            mutation_operators,
            accuracy,
            maximize=True,
            depth=3,
            mutations_per_beam=2,
            n_initial_candidates=4,
            beam_width=4,
            add_previous=True,
    )
prompt_optimizer.fit(d_train)
prompt_optimizer.show_report()

search depth[                                ]0/3[00:00<00:01] >> mutate[                                                   ]0/4.51it/s]]Output(
  child = StripWhitespace(
    child = GenerateText(
      child = MetaPrompt(
        child = [
          0 : Paragraph(
            content = 'Instructions: ',
            reference_id = None,
            reference_classes = None
          ),
          1 : Paragraph(
            content = 'Determine how to classify these transactions.',
            reference_id = None,
            reference_classes = None
          ),
          2 : Paragraph(
            content = '\n',
            reference_id = None,
            reference_classes = None
          ),
          3 : Paragraph(
            content = 'Output labels: Other, Food, This transaction would be classified as Other., Utilities\n',
            reference_id = None,
            reference_classes = None
          ),
          4 : Paragraph(
            content = InputData(
              id

In [66]:
prompt_optimizer.show_report()


Fitting log (4 entries):
iteration    action                                             objective    costs                         parse_errors    prev_actions
-----------  -------------------------------------------------  -----------  ----------------------------  --------------  --------------------------------------------------
-1           {'decision_0': "'Determine how to classify these   0.9          {'input': 422, 'output': 10}  0.0             [{'decision_0': "'Determine how to classify these
             transactions.'"}                                                                                              transactions.'"}]
-1           {'decision_0': "''"}                               0.9          {'input': 362, 'output': 10}  0.0             [{'decision_0': "''"}]
-1           {'decision_0': "'Find the best output label given  0.9          {'input': 442, 'output': 10}  0.0             [{'decision_0': "'Find the best output label given
             the input.'"}    

In [52]:
print(prompt_optimizer.best_prompt)

Output(
  child = StripWhitespace(
    child = GenerateText(
      child = MetaPrompt(
        child = [
          0 : Paragraph(
            content = 'Instructions: ',
            reference_id = None,
            reference_classes = None
          ),
          1 : Paragraph(
            content = 'Determine how to classify these transactions.',
            reference_id = 'instructions',
            reference_classes = None
          ),
          2 : Paragraph(
            content = '\n',
            reference_id = None,
            reference_classes = None
          ),
          3 : Paragraph(
            content = 'Output labels: Other, Food, This transaction would be classified as Other., Utilities\n',
            reference_id = None,
            reference_classes = None
          ),
          4 : Paragraph(
            content = InputData(
              id_offset = 0,
              reference_id = None
            ),
            reference_id = None,
            reference_classes = 