# Content
## Define Classifier
## Define Datasets
## Define DSPy.Examples
## Define Optimizers
## Define Evaluation
## Run Evaluation
## Save Prompts

# Define classifier

In [1]:
import dspy 
import random
import pandas as pd
from datasets import load_dataset

class Classification(dspy.Signature):
    """Classify the customer message into one of the intent labels.
    The output should be only the predicted class as a single intent label."""

    customer_message = dspy.InputField(desc="Customer message during customer service interaction")
    intent_labels = dspy.InputField(desc="Labels that represent customer intent")
    answer = dspy.OutputField(desc="a label best matching customer's intent ")

lm_mini = dspy.OpenAI(model='gpt-4o-mini')
dspy.settings.configure(lm=lm_mini)
cot_predictor = dspy.ChainOfThought(Classification)


# Parse Atis Dataset

In [2]:

dataset = load_dataset("tuetschek/atis")
dataset.set_format(type="pandas")

df_train: pd.DataFrame = dataset["train"][:]
df_test: pd.DataFrame = dataset["test"][:]
small_test = df_test.head(100)

## x column: text, y column: intent

In [3]:
df_train.iloc[0]

id                                                        0
intent                                               flight
text      i want to fly from boston at 838 am and arrive...
slots     O O O O O B-fromloc.city_name O B-depart_time....
Name: 0, dtype: object

## prepare labels

In [4]:
labels = df_train["intent"].unique().tolist()
labels_str = "%".join(labels)
labels_str

'flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no'

In [5]:
## run prediction
first_row = df_train.iloc[0]
print(f"customer message: {first_row['text']},real class: {first_row['intent']}")
cot_predictor(customer_message=first_row["text"], intent_labels=labels_str)


 		You are using the client GPT3, which will be removed in DSPy 2.6.
 		Changing the client is straightforward and will let you use new features (Adapters) that improve the consistency of LM outputs, especially when using chat LMs. 

 		Learn more about the changes and how to migrate at
 		https://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb


customer message: i want to fly from boston at 838 am and arrive in denver at 1110 in the morning,real class: flight


Prediction(
    rationale='produce the answer. We first identify that the customer is expressing a desire to fly from Boston to Denver, specifying both the departure time (8:38 AM) and the arrival time (11:10 AM). This indicates that the customer is looking for flight information, particularly related to the flight time and possibly the airfare. The intent is primarily focused on booking or inquiring about a flight, which aligns with the "flight_time" and "airfare" labels. However, since the customer is providing specific times, the most relevant label is "flight_time".',
    answer='flight_time'
)

# Define Examples

In [6]:
# nowhere else there was an example of this thing

# we want k examples per class 
def get_dspy_examples(df, k) -> dspy.example:
    dspy_examples = []
    for label in labels:
        try:
            label_df = df[df["intent"] == label].sample(n=k)
            for index, row in label_df.iterrows():
                dspy_examples.append(
                    dspy.Example(customer_message=row["text"], answer=row["intent"], 
                                 intent_labels=labels_str).with_inputs("customer_message", "intent_labels")
                )
        except:
            # there are classes that don't have any representatives
            continue

    return dspy_examples


train_examples = get_dspy_examples(df_train, k=2)
all_test_examples = get_dspy_examples(df_test, k=10)
print(len(all_test_examples), len(all_test_examples) // 2)
dev_examples = random.sample(all_test_examples, len(all_test_examples) // 2)
test_examples = [example for example in all_test_examples if example not in dev_examples]

90 45


# Define LabeledFewShot Optimizer
LabeledFewShot is the simplest optimizer. Its compile method injects samples intro the prompt.
There is not optimization going on.


In [7]:
from dspy.teleprompt import LabeledFewShot

few_shot_demos = random.sample(train_examples, k=10)
labeled_fewshot_optimizer = LabeledFewShot(k=len(few_shot_demos))
few_shot_model = labeled_fewshot_optimizer.compile(student=cot_predictor, trainset=few_shot_demos)


### What is happenning under the hood?
### LabeledFewShot randomly selects labels

### DSPy SOURCE CODE: https://github.com/stanfordnlp/dspy/blob/793530c65a0e1721997dac0d2636f0f70ad649b6/dspy/teleprompt/vanilla.py#L6

class LabeledFewShot(Teleprompter):
    def __init__(self, k=16):
        self.k = k

    def compile(self, student, *, trainset, sample=True):
        self.student = student.reset_copy()
        self.trainset = trainset

        if len(self.trainset) == 0:
            return self.student

        rng = random.Random(0)

        for predictor in self.student.predictors():
            if sample:
                predictor.demos = rng.sample(self.trainset, min(self.k, len(self.trainset)))
            else:
                predictor.demos = self.trainset[: min(self.k, len(self.trainset))]

        return self.student

### My own summary of the implementation
DSPy samples randomly a portion of the samples as examples for in-context learning. 
There's no actual optimization process.

### How does the prompt looks like?

In [20]:
example = test_examples[0]
# without inputs(), we won't inject the inputs of the example
pred = few_shot_model(**example.inputs())
# Produce a prediction from our `cot` module, using the `example` above as input.
lm_mini.inspect_history(n=1)




Classify the customer message into one of the intent labels.
The output should be only the predicted class as a single intent label.

---

Follow the following format.

Customer Message: Customer message during customer service interaction

Intent Labels: Labels that represent customer intent

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: a label best matching customer's intent

---

Customer Message: what are the air restrictions on flights from pittsburgh to atlanta for the airfare of 416 dollars
Intent Labels: flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no
Answer: restriction

---

Customer Message: what is the flight number of the earliest flight between boston and washington dc
Intent Labels: flight%flight_time%airfare%air

'\n\n\nClassify the customer message into one of the intent labels.\nThe output should be only the predicted class as a single intent label.\n\n---\n\nFollow the following format.\n\nCustomer Message: Customer message during customer service interaction\n\nIntent Labels: Labels that represent customer intent\n\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\n\nAnswer: a label best matching customer\'s intent\n\n---\n\nCustomer Message: what are the air restrictions on flights from pittsburgh to atlanta for the airfare of 416 dollars\nIntent Labels: flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no\nAnswer: restriction\n\n---\n\nCustomer Message: what is the flight number of the earliest flight between boston and washington dc\nIntent Labels: 

## Define BootstrapFewShot Optimizer
This family of optimizers is focused on optimizing the few shot examples. Let's take an example of a Sample pipeline and see how we can use this optimizer to optimize it. From: https://dspy.ai/deep-dive/optimizers/bootstrap-fewshot/

In [9]:
from dspy.evaluate import answer_exact_match as metric
from dspy.teleprompt import BootstrapFewShot

optimizer = BootstrapFewShot(
    metric=metric,
    max_bootstrapped_demos=10,
    max_labeled_demos=10,
    max_rounds=10,
)

### Optimize

In [10]:
# documentation is wrong - there is not valset: https://dspy.ai/deep-dive/optimizers/bootstrap-fewshot/
cot_few_shot_optimized = optimizer.compile(cot_predictor, trainset=train_examples)


 28%|████████████████████████████████████▍                                                                                              | 10/36 [00:14<00:38,  1.47s/it]

Bootstrapped 10 full traces after 10 examples for up to 10 rounds, amounting to 10 attempts.





## Peek under the hood of DSPy source code for BootStrapFewShot training

### DSPy source code for training
class BootstrapFewShot()
    def _train(self):
        rng = random.Random(0)
        raw_demos = self.validation

        for name, predictor in self.student.named_predictors():
            augmented_demos = self.name2traces[name][: self.max_bootstrapped_demos]

            sample_size = min(self.max_labeled_demos - len(augmented_demos), len(raw_demos))
            sample_size = max(0, sample_size)

            raw_demos = rng.sample(raw_demos, sample_size)

            if dspy.settings.release >= 20230928:
                predictor.demos = raw_demos + augmented_demos
            else:
                predictor.demos = augmented_demos + raw_demos

        return self.student

I consulted with ChatGPT about this method. Source code: https://github.com/stanfordnlp/dspy/blob/main/dspy/teleprompt/bootstrap.py

_train() Purpose
Once _bootstrap() has collected and validated a set of bootstrapped demos, _train() takes over to:

Compile Final Demos for Predictors: _train() assembles the demos (both bootstrapped and labeled) for each predictor within the student model. For each predictor, it selects a mix of bootstrapped demos (from _bootstrap()) and labeled examples (raw demos from the validation set) to create a final demo set.
Random Sampling: The method performs a random sample from the raw labeled demos, ensuring the demos meet the configuration limits, such as max_labeled_demos.
Set Demos for Each Predictor: Finally, _train() updates each predictor in the student model with this finalized set of demos, effectively preparing it for use.
In essence, _bootstrap() is responsible for creating and validating bootstrapped demos, while _train() assembles a balanced set of these demos and labeled examples to finalize the student model’s training.

### My Own Summary
BootstrapFewShot has two main properties:
1. Enable you to generate additional examples
2. DSPy tests which predictions pass the validation and keep only those

## Define BootstrapFewShotWithRandomSearch

In [11]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch

optimizer = BootstrapFewShotWithRandomSearch(
    metric=metric, 
    max_bootstrapped_demos=10, 
    max_labeled_demos=10,
    num_threads=10,
    num_candidate_programs=5
)

Going to sample between 1 and 10 traces per predictor.
Will attempt to bootstrap 5 candidate sets.


In [12]:
cot_few_shot_rs_optimized = optimizer.compile(cot_predictor, trainset=train_examples)

Average Metric: 29 / 36  (80.6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:05<00:00,  6.66it/s]
2024/11/18 09:16:35 INFO dspy.evaluate.evaluate: Average Metric: 29 / 36 (80.6%)


New best score: 80.56 for seed -3
Scores so far: [80.56]
Best score so far: 80.56


Average Metric: 31 / 36  (86.1): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:04<00:00,  8.78it/s]
2024/11/18 09:16:39 INFO dspy.evaluate.evaluate: Average Metric: 31 / 36 (86.1%)


New best score: 86.11 for seed -2
Scores so far: [80.56, 86.11]
Best score so far: 86.11


 28%|███████████████████████████████████▊                                                                                             | 10/36 [00:00<00:00, 1520.39it/s]


Bootstrapped 10 full traces after 10 examples for up to 1 rounds, amounting to 10 attempts.


Average Metric: 30 / 36  (83.3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:07<00:00,  5.06it/s]
2024/11/18 09:16:47 INFO dspy.evaluate.evaluate: Average Metric: 30 / 36 (83.3%)


Scores so far: [80.56, 86.11, 83.33]
Best score so far: 86.11


 28%|████████████████████████████████████▍                                                                                              | 10/36 [00:25<01:05,  2.53s/it]


Bootstrapped 7 full traces after 10 examples for up to 1 rounds, amounting to 10 attempts.


Average Metric: 31 / 36  (86.1): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:06<00:00,  5.38it/s]
2024/11/18 09:17:19 INFO dspy.evaluate.evaluate: Average Metric: 31 / 36 (86.1%)


Scores so far: [80.56, 86.11, 83.33, 86.11]
Best score so far: 86.11


 11%|██████████████▋                                                                                                                     | 4/36 [00:06<00:55,  1.72s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.


Average Metric: 31 / 36  (86.1): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:07<00:00,  4.59it/s]
2024/11/18 09:17:33 INFO dspy.evaluate.evaluate: Average Metric: 31 / 36 (86.1%)


Scores so far: [80.56, 86.11, 83.33, 86.11, 86.11]
Best score so far: 86.11


  3%|███▋                                                                                                                                | 1/36 [00:01<00:40,  1.17s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.


Average Metric: 32 / 36  (88.9): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:07<00:00,  4.80it/s]
2024/11/18 09:17:42 INFO dspy.evaluate.evaluate: Average Metric: 32 / 36 (88.9%)


New best score: 88.89 for seed 2
Scores so far: [80.56, 86.11, 83.33, 86.11, 86.11, 88.89]
Best score so far: 88.89


 14%|██████████████████▎                                                                                                                 | 5/36 [00:08<00:53,  1.72s/it]


Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.


Average Metric: 29 / 36  (80.6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:07<00:00,  5.10it/s]
2024/11/18 09:17:58 INFO dspy.evaluate.evaluate: Average Metric: 29 / 36 (80.6%)


Scores so far: [80.56, 86.11, 83.33, 86.11, 86.11, 88.89, 80.56]
Best score so far: 88.89


 19%|█████████████████████████▋                                                                                                          | 7/36 [00:11<00:46,  1.60s/it]


Bootstrapped 4 full traces after 7 examples for up to 1 rounds, amounting to 7 attempts.


Average Metric: 31 / 36  (86.1): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:06<00:00,  5.33it/s]
2024/11/18 09:18:16 INFO dspy.evaluate.evaluate: Average Metric: 31 / 36 (86.1%)


Scores so far: [80.56, 86.11, 83.33, 86.11, 86.11, 88.89, 80.56, 86.11]
Best score so far: 88.89
8 candidate programs found.


## Peek under the hood of the source code Implementation

### Source code
From: https://github.com/stanfordnlp/dspy/blob/main/dspy/teleprompt/random_search.py

       assert seed >= 0, seed

        random.Random(seed).shuffle(trainset_copy)
        size = random.Random(seed).randint(self.min_num_samples, self.max_num_samples)

        optimizer = BootstrapFewShot(
            metric=self.metric,
            metric_threshold=self.metric_threshold,
            max_bootstrapped_demos=size,
            max_labeled_demos=self.max_labeled_demos,
            teacher_settings=self.teacher_settings,
            max_rounds=self.max_rounds,
            max_errors=self.max_errors,
        )

        program = optimizer.compile(student, teacher=teacher, trainset=trainset_copy)

    evaluate = Evaluate(
        devset=self.valset,
        metric=self.metric,
        num_threads=self.num_threads,
        max_errors=self.max_errors,
        display_table=False,
        display_progress=True,
    )

    score, subscores = evaluate(program, return_all_scores=True)

    all_subscores.append(subscores)

### My own summary
Given the number of programs we will generate each time a different seed and run BootStrapFewShot with that 

# Evaluation

## Single Evaluation

In [13]:
from dspy.evaluate import answer_exact_match

# Instantiate the metric.
metric = answer_exact_match

example = test_examples[0]
# Produce a prediction from our `cot` module, using the `example` above as input.
print(example)
pred = cot_predictor(**example.inputs())
print(pred)

# Compute the metric score for the prediction.
score = metric(example, pred)

print(f"Customer message: \t {example.customer_message}\n")
print(f"Gold Response: \t {example.answer}\n")
print(f"Predicted Response: \t {pred.answer}\n")
print(f"Exact match score: {score:.2f}")

Example({'customer_message': 'show flights from minneapolis to kansas city', 'answer': 'flight', 'intent_labels': 'flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no'}) (input_keys={'customer_message', 'intent_labels'})
Prediction(
    rationale='identify the customer\'s intent. The customer is asking to see flights from one city (Minneapolis) to another (Kansas City). This indicates that they are looking for information related to flights, specifically the availability of flights between these two locations. The intent clearly aligns with searching for flights, which is represented by the label "flight."',
    answer='flight'
)
Customer message: 	 show flights from minneapolis to kansas city

Gold Response: 	 flight

Predicted Response: 	 flight

Exact match score: 1.00


## Setup Evaluation

In [14]:
from dspy.evaluate.evaluate import Evaluate
# Set up the `evaluate_atis` function. We'll use this many times below.
print(len(train_examples))
evaluate_atis = Evaluate(devset=test_examples, num_threads=8, display_progress=True, display_table=5, provide_traceback=True)

36


## Evaluate zero shot CoT 

In [15]:
# Evaluate the program with the `answer_exact_match` metric.
# Launch evaluation.
evaluate_atis(cot_predictor, metric=metric)


Average Metric: 37 / 42  (88.1): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [00:08<00:00,  5.17it/s]
2024/11/18 09:19:01 INFO dspy.evaluate.evaluate: Average Metric: 37 / 42 (88.1%)


Unnamed: 0,customer_message,example_answer,intent_labels,rationale,pred_answer,answer_exact_match
0,show flights from minneapolis to kansas city,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,identify the customer's intent. The customer is asking to see flights from one city (Minneapolis) to another (Kansas City). This indicates that they are looking...,flight,✔️ [True]
1,which flights leave on wednesday april thirteenth from indianapolis and arrive in montreal in the morning,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"produce the answer. We need to identify the main focus of the customer's message. The customer is inquiring about flights, specifically looking for details about...",flight_time,
2,i would like flight information from phoenix to denver,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"identify the customer's intent. The customer is asking for specific information regarding a flight from Phoenix to Denver. This request involves details about the flight,...",flight,✔️ [True]
3,how much is coach flight from pittsburgh to atlanta,airfare,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"determine the customer's intent. The customer is inquiring about the cost of a coach flight from Pittsburgh to Atlanta, which directly relates to airfare. The...",airfare,✔️ [True]
4,list airfares for first class round trip from detroit to st. petersburg,airfare,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,identify the customer's intent. The customer is asking for information about airfares specifically for a first-class round trip flight from Detroit to St. Petersburg. This...,airfare,✔️ [True]


88.1

## Evaluate few shot CoT

In [16]:
evaluate_atis(few_shot_model, metric=metric)

Average Metric: 33 / 42  (78.6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [00:14<00:00,  2.98it/s]
2024/11/18 09:19:22 INFO dspy.evaluate.evaluate: Average Metric: 33 / 42 (78.6%)


Unnamed: 0,customer_message,example_answer,intent_labels,rationale,pred_answer,answer_exact_match
0,show flights from minneapolis to kansas city,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,produce the answer. We need to identify the main intent of the customer message. The customer is asking to see flights from one city to...,flight,✔️ [True]
1,which flights leave on wednesday april thirteenth from indianapolis and arrive in montreal in the morning,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,determine the intent of the customer's message. The customer is asking for information about flights that leave from Indianapolis and arrive in Montreal on a...,flight_time,
2,i would like flight information from phoenix to denver,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"produce the answer. We need to identify the main focus of the customer's request. The customer is asking for flight information, which typically includes details...",flight,✔️ [True]
3,how much is coach flight from pittsburgh to atlanta,airfare,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,produce the answer. We need to identify the main focus of the customer's inquiry. The customer is asking about the cost of a coach flight...,airfare,✔️ [True]
4,list airfares for first class round trip from detroit to st. petersburg,airfare,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"produce the answer. We need to identify the main focus of the customer's request. The customer is asking for airfares specifically for first-class round trips,...",airfare,✔️ [True]


78.57

## Evaluate BootstrapedFewShot

In [17]:
evaluate_atis(cot_few_shot_optimized, metric=metric)

Average Metric: 37 / 42  (88.1): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [00:10<00:00,  4.06it/s]
2024/11/18 09:19:43 INFO dspy.evaluate.evaluate: Average Metric: 37 / 42 (88.1%)


Unnamed: 0,customer_message,example_answer,intent_labels,rationale,pred_answer,answer_exact_match
0,show flights from minneapolis to kansas city,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"identify the main focus of the customer's request. The customer is asking to see flights from Minneapolis to Kansas City, which indicates a clear interest...",flight,✔️ [True]
1,which flights leave on wednesday april thirteenth from indianapolis and arrive in montreal in the morning,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"identify the intent of the customer's message. The customer is inquiring about flights that leave on a specific day (Wednesday, April 13th) from Indianapolis to...",flight,✔️ [True]
2,i would like flight information from phoenix to denver,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,identify the main focus of the customer's request. The customer is explicitly asking for flight information regarding a route from Phoenix to Denver. This indicates...,flight,✔️ [True]
3,how much is coach flight from pittsburgh to atlanta,airfare,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"identify the main focus of the customer's inquiry. The customer is asking about the cost of a coach flight from Pittsburgh to Atlanta, which indicates...",airfare,✔️ [True]
4,list airfares for first class round trip from detroit to st. petersburg,airfare,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,identify the main focus of the customer's request. The customer is asking for airfares specifically for first-class round trip flights from Detroit to St. Petersburg....,airfare,✔️ [True]


88.1

## Evaluate Boostraped Random Search

In [18]:
evaluate_atis(cot_few_shot_rs_optimized, metric=metric)

Average Metric: 40 / 42  (95.2): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [00:09<00:00,  4.22it/s]
2024/11/18 09:19:58 INFO dspy.evaluate.evaluate: Average Metric: 40 / 42 (95.2%)


Unnamed: 0,customer_message,example_answer,intent_labels,rationale,pred_answer,answer_exact_match
0,show flights from minneapolis to kansas city,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"determine the intent of the customer's message. The customer is asking to see flights between two specific cities, which indicates they are looking for information...",flight,✔️ [True]
1,which flights leave on wednesday april thirteenth from indianapolis and arrive in montreal in the morning,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,determine the intent of the customer's message. The customer is asking for specific flight information regarding flights that leave from a particular city on a...,flight,✔️ [True]
2,i would like flight information from phoenix to denver,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,"determine the intent of the customer's message. The customer is requesting flight information between two specific cities, which indicates they are looking for details related...",flight,✔️ [True]
3,how much is coach flight from pittsburgh to atlanta,airfare,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,determine the intent of the customer's message. The customer is inquiring about the cost of a specific type of flight (coach) from one city to...,airfare,✔️ [True]
4,list airfares for first class round trip from detroit to st. petersburg,airfare,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,determine the intent of the customer's message. The customer is asking for information about airfares specifically for first class round trip flights between two cities....,airfare,✔️ [True]


95.24

# Save / Load models

In [21]:
cot_predictor.save("cot_zero_shot.json")
few_shot_model.save("cot_few_shot.json")
cot_few_shot_optimized.save("cot_boostraped_few_shot.json")
cot_few_shot_rs_optimized.save("cot_bootstraped_rs_few_shot.json")

In [22]:
cot_predictor.load("cot_zero_shot.json")
few_shot_model.load("cot_few_shot.json")
cot_few_shot_optimized.load("cot_boostraped_few_shot.json")
cot_few_shot_rs_optimized.load("cot_bootstraped_rs_few_shot.json")

In [27]:
cot_predictor.parameters()

[Predict(StringSignature(customer_message, intent_labels -> rationale, answer
     instructions='Classify the customer message into one of the intent labels.\nThe output should be only the predicted class as a single intent label.'
     customer_message = Field(annotation=str required=True json_schema_extra={'desc': 'Customer message during customer service interaction', '__dspy_field_type': 'input', 'prefix': 'Customer Message:'})
     intent_labels = Field(annotation=str required=True json_schema_extra={'desc': 'Labels that represent customer intent', '__dspy_field_type': 'input', 'prefix': 'Intent Labels:'})
     rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the answer}. We ...', '__dspy_field_type': 'output'})
     answer = Field(annotation=str required=True json_schema_extra={'desc': "a label best matching customer's intent ", '__dspy_field_type': 'output', 'prefix': 'Answer:'})
 ))

In [30]:
few_shot_model.demos

[Example({'customer_message': 'what kind of plane flies from boston to pittsburgh after noon', 'answer': 'aircraft', 'intent_labels': 'flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no'}) (input_keys={'customer_message', 'intent_labels'}),
 Example({'customer_message': 'what are the restrictions on the cheapest one way fare between boston and oakland', 'answer': 'restriction', 'intent_labels': 'flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no'}) (input_keys={'customer_message', 'intent_labels'}),
 Example({'customer_message': 'fine can you give me information o

In [31]:
cot_few_shot_optimized.demos

[Example({'augmented': True, 'customer_message': 'show me the flights from los angeles to pittsburgh which arrive at pittsburgh on monday', 'intent_labels': 'flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no', 'rationale': 'produce the answer. We need to identify the main focus of the customer\'s request. The customer is asking for flights from Los Angeles to Pittsburgh, specifically mentioning the arrival day (Monday). This indicates that the intent is related to finding flights. The most relevant label that matches this request is "flight".', 'answer': 'flight'}) (input_keys=None),
 Example({'augmented': True, 'customer_message': 'what time does the flight leave denver going to san francisco on continental airlines', 'intent_labels': 'flight%flight_time%airfare%aircraft%g

In [78]:
cot_few_shot_rs_optimized.demos

[Example({'augmented': True, 'customer_message': 'show me the flights from boston to oakland', 'intent_labels': 'flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no', 'rationale': 'produce the answer. We need to identify the main request in the customer message. The customer is asking for information about flights from Boston to Oakland, which indicates they are looking for flight details. The most relevant intent label that matches this request is "flight".', 'answer': 'flight'}) (input_keys=None),
 Example({'augmented': True, 'customer_message': 'does delta have an early afternoon flight from boston to san francisco', 'intent_labels': 'flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight

# Remove bootstrapping

In [41]:
from dspy.evaluate import answer_exact_match as metric
from dspy.teleprompt import BootstrapFewShot

optimizer = BootstrapFewShot(
    metric=metric,
    max_bootstrapped_demos=4, # no need for boostrapped
    max_labeled_demos=40, # increase examples
    max_rounds=20,
)

# documentation is wrong - there is not valset: https://dspy.ai/deep-dive/optimizers/bootstrap-fewshot/
cot_few_shot_to_bootstrap = optimizer.compile(cot_predictor, trainset=train_examples)


evaluate_atis(cot_few_shot_to_bootstrap, metric=metric)
# bootrsapped_demos: generated demos?
# max_labeled_demos: input demos

 11%|██████████████▋                                                                                                                     | 4/36 [00:01<00:11,  2.85it/s]


Bootstrapped 4 full traces after 4 examples for up to 20 rounds, amounting to 8 attempts.


Average Metric: 32 / 40  (80.0): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [00:11<00:00,  3.63it/s]
2024/11/17 06:42:28 INFO dspy.evaluate.evaluate: Average Metric: 32 / 40 (80.0%)


Unnamed: 0,customer_message,example_answer,intent_labels,rationale,pred_answer,answer_exact_match
0,list airports in new york,airport,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,determine the intent of the customer's message. The customer is asking for a list of airports located in New York. This request is focused on...,airport,✔️ [True]
1,show me flights from montreal to orlando,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,produce the answer. The customer is requesting information about flights from Montreal to Orlando. This indicates that the intent is related to finding flights between...,flight,✔️ [True]
2,which flights leave chicago next tuesday and arrive in detroit around 6 pm,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,determine the intent of the customer's message. The customer is asking for information about flights that leave from Chicago next Tuesday and arrive in Detroit...,flight,✔️ [True]
3,list the flights from dallas to baltimore arriving july first,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,determine the intent of the customer's message. The customer is asking for a list of flights from Dallas to Baltimore that are arriving on a...,flight,✔️ [True]
4,show flights tomorrow evening from milwaukee to st. louis,flight,flight%flight_time%airfare%aircraft%ground_service%airport%airline%distance%abbreviation%ground_fare%quantity%city%flight_no%capacity%flight+airfare%meal%restriction%airline+flight_no%ground_service+ground_fare%airfare+flight_time%cheapest%aircraft+flight+flight_no,produce the answer. The customer is requesting information about flights that are scheduled for tomorrow evening from Milwaukee to St. Louis. This indicates that the...,flight,✔️ [True]


80.0