# Import dspy

In [1]:
import dspy

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Set the Language Model to GPT-3.5-turbo and the Retrieval Model to ColBERTv2 on the Wiki17 abstracts dataset
turbo = dspy.OpenAI(model='gpt-3.5-turbo')
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts)

# Task Examples

In [4]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

Downloading builder script: 100%|██████████| 6.42k/6.42k [00:00<00:00, 11.7MB/s]
Downloading readme: 100%|██████████| 9.19k/9.19k [00:00<00:00, 23.9MB/s]
Downloading data: 100%|██████████| 566M/566M [01:29<00:00, 6.33MB/s]
Downloading data: 100%|██████████| 47.5M/47.5M [00:06<00:00, 7.16MB/s]
Downloading data: 100%|██████████| 46.2M/46.2M [00:13<00:00, 3.52MB/s]
Downloading data files: 100%|██████████| 3/3 [01:49<00:00, 36.66s/it]
Generating train split: 100%|██████████| 90447/90447 [00:08<00:00, 10216.50 examples/s]
Generating validation split: 100%|██████████| 7405/7405 [00:00<00:00, 12876.56 examples/s]
Generating test split: 100%|██████████| 7405/7405 [00:00<00:00, 14142.52 examples/s]
  table = cls._concat_blocks(blocks, axis=0)


(20, 50)

In [5]:
train_example = trainset[0]
print(f"Question: {train_example.question}")
print(f"Answer: {train_example.answer}")

Question: At My Window was released by which American singer-songwriter?
Answer: John Townes Van Zandt


In [6]:
dev_example = devset[18]
print(f"Question: {dev_example.question}")
print(f"Answer: {dev_example.answer}")
print(f"Relevant Wikipedia Titles: {dev_example.gold_titles}")

Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Answer: English
Relevant Wikipedia Titles: {'Restaurant: Impossible', 'Robert Irvine'}


In [7]:
print(f"For this dataset, training examples have input keys {train_example.inputs().keys()} and label keys {train_example.labels().keys()}")
print(f"For this dataset, dev examples have input keys {dev_example.inputs().keys()} and label keys {dev_example.labels().keys()}")

For this dataset, training examples have input keys ['question'] and label keys ['answer']
For this dataset, dev examples have input keys ['question'] and label keys ['answer', 'gold_titles']


# Building Blocks

In [8]:
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [9]:
# Define the predictor.
generate_answer = dspy.Predict(BasicQA)

# Call the predictor on a particular input.
pred = generate_answer(question=dev_example.question)

# Print the input and the prediction.
print(f"Question: {dev_example.question}")
print(f"Predicted Answer: {pred.answer}")

Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Predicted Answer: American


In [10]:
turbo.inspect_history(n=1)





Answer questions with short factoid answers.

---

Follow the following format.

Question: ${question}
Answer: often between 1 and 5 words

---

Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Answer:[32m American[0m



