In [17]:
import dspy
import random
from dspy.datasets import DataLoader
from datasets import load_dataset

# Load the Banking77 dataset.
CLASSES = load_dataset("PolyAI/banking77", split="train", trust_remote_code=True).features['label'].names
kwargs = dict(fields=("text", "label"), input_keys=("text",), split="train", trust_remote_code=True)

# Load the first 2000 examples from the dataset, and assign a hint to each *training* example.
raw_data = [
    dspy.Example(x, label=CLASSES[x.label]).with_inputs("text")
    for x in DataLoader().from_huggingface(dataset_name="PolyAI/banking77", **kwargs)[:1000]
]

random.Random(0).shuffle(raw_data)

  _CITATION = """\
Downloading data: 100%|██████████| 839k/839k [00:00<00:00, 7.19MB/s]
Downloading data: 100%|██████████| 240k/240k [00:00<00:00, 9.07MB/s]
Generating train split: 100%|██████████| 10003/10003 [00:00<00:00, 117937.35 examples/s]
Generating test split: 100%|██████████| 3080/3080 [00:00<00:00, 161172.46 examples/s]


Average Metric: 3.00 / 17 (17.6%):  10%|█         | 21/200 [1:03:49<9:04:03, 182.36s/it]


In [18]:
raw_data

[Example({'text': 'What if there is an error on the exchange rate?', 'label': 'card_payment_wrong_exchange_rate'}) (input_keys={'text'}),
 Example({'text': 'There must have been a mistake, why was I charged an extra pound?', 'label': 'extra_charge_on_statement'}) (input_keys={'text'}),
 Example({'text': 'If I find a card I lost do I need to dispose of it? Or can I re-active the card and continue to use it?', 'label': 'card_linking'}) (input_keys={'text'}),
 Example({'text': 'The exchange rate for my electronic payment is incorrect.', 'label': 'card_payment_wrong_exchange_rate'}) (input_keys={'text'}),
 Example({'text': "I'd lke my card reactivated, I was thinking it was misplaced but I discovered when I woke up that I had left it in my coat.", 'label': 'card_linking'}) (input_keys={'text'}),
 Example({'text': 'How is the current exchange rate?', 'label': 'exchange_rate'}) (input_keys={'text'}),
 Example({'text': 'A pending charge of 1L appears on my statement.  Please explain as I have

In [19]:
len(CLASSES), CLASSES[:10]

(77,
 ['activate_my_card',
  'age_limit',
  'apple_pay_or_google_pay',
  'atm_support',
  'automatic_top_up',
  'balance_not_updated_after_bank_transfer',
  'balance_not_updated_after_cheque_or_cash_deposit',
  'beneficiary_not_allowed',
  'cancel_transfer',
  'card_about_to_expire'])

In [20]:
unlabeled_trainset = [dspy.Example(text=x.text).with_inputs("text") for x in raw_data[:500]]

unlabeled_trainset[0]

Example({'text': 'What if there is an error on the exchange rate?'}) (input_keys={'text'})

In [21]:
from typing import Literal

classify = dspy.ChainOfThought(f"text -> label: Literal{CLASSES}")