In [None]:
%%capture
%pip install dspy-ai

In [34]:
import dspy
turbo = dspy.OpenAI(model='gpt-3.5-turbo')
colbertv2_scenic_docs = dspy.ColBERTv2(url='https://scenic-lang.readthedocs.io/en/latest/')

dspy.settings.configure(lm=turbo, rm=colbertv2_scenic_docs)

In [35]:
# We will make our own question and answer dataset pairs
# we read in the existing programs from the outputs/predict_few_shot folder
# Anything before "param map" is the question, anything after including "param map" is the answer

import os
from typing import List

num_samples = 200

class Example:
    def __init__(self, question : str, answer : str):
        self.question = question
        self.answer = answer

    def __str__(self):
        return f'Question: \n{self.question}\nAnswer: \n{self.answer}'

    def __repr__(self):
        return self.__str__()

def build_dataset(path : str, num_samples : int) -> List[Example]:
    dataset = []
    for file in os.listdir(path)[:num_samples]:
        with open(os.path.join(path, file), 'r') as f:
            program = f.read()
            description = program.split('param map')[0]
            description = description.replace('#', '')

            scenic = program.split('param map')[1]
            scenic = 'param map' + scenic

            ex = Example(description, scenic)
            dspy_example = dspy.Example(question=ex.question, answer=ex.answer)
            dspy_example.with_inputs('question')
            
            dataset.append(dspy_example)
    return dataset

dataset = build_dataset('../outputs/predict_few_shot', num_samples)
print("An example from the dataset:")
print(dataset[0])

# We can split the dataset into train and test sets 80/20
import random
random.shuffle(dataset)
split_percentage = 0.8
split_index = int(len(dataset) * split_percentage)
train_data = dataset[:split_index]
test_data = dataset[split_index:]

An example from the dataset:
Example({'question': '  On October 20th, a test vehicle operating in manual driving mode was stopped at a red light after exiting Highway 101 to turn right onto\n Lawrence Expressway in Sunnyvale when a Ford pickup made contact from the rear. No injuries were reported and law enforcement was\n not called to the scene. Both vehicles sustained moderate damage.\n\n', 'answer': "param map = localPath('../../../assets/maps/CARLA/Town01.xodr')\nparam carla_map = 'Town01'\nmodel scenic.simulators.carla.model\n\nbehavior EgoBehavior():\n    do WaitForTrafficLightBehavior()\n\nbehavior FollowingVehicleBehavior():\n    do FollowLaneBehavior()\n\nintersection = network.intersections[0] \nrightTurn = intersection.outgoingManeuvers[0]\nstopLine = rightTurn.stopLine\n\nego = new Car at stopLine, \n    with behavior EgoBehavior()\n\nfollowingVehicle = new Pickup following roadDirection from ego for 5,\n    with behavior FollowingVehicleBehavior()\n\nrequire followingVehic

In [36]:
class GenScenic(dspy.Signature):
    """
    Write Scenic probablistic programs given a natural language description 
    of an autonomous vehicle crash report.
    """
    question = dspy.InputField()
    param_definitions = dspy.OutputField(desc='Necessary imports and parameter definitions')
    scene_setup = dspy.OutputField(desc='Objects that should be placed in the scene')
    behaviors = dspy.OutputField(desc='Dynamics of the objects in the scene')
    placement = dspy.OutputField(desc='Placement of the dynamic objects in the scene')
    post_conditions = dspy.OutputField(desc='Requires and Terminates clauses')

In [37]:
generate_ans = dspy.Predict(GenScenic)

pred = generate_ans(question=test_data[0].question)

print(f"Question:\n {test_data[0].question}")
print(f"Predicted Answer:\n {pred}")

turbo.inspect_history(n=1)

Question:
   SCENARIO DESCRIPTION
 At approximately 3:50 pm, the Lyft autonomous vehicle ("Lyft") was traveling northbound on San Antonio Road in Palo Alto while in manual mode when a third-party vehicle made contact with rear bumper at a very low speed. At the time of contact, was stopped for traffic for approximately 2 seconds during stop and go traffic. No visible damage was noted by either party at the scene. No injuries were reported and local law enforcement was not called.


Predicted Answer:
 Prediction(
    param_definitions='Necessary imports and parameter definitions:\n- time: float (time in seconds)\n- speed: float (speed in meters per second)\n- position: tuple (x, y) (position in meters)\n- vehicle: string (name of the vehicle)\n- contact_speed: float (speed of the third-party vehicle at the time of contact)',
    scene_setup='- Lyft autonomous vehicle ("Lyft")\n- Third-party vehicle',
    behaviors='- Lyft is traveling northbound on San Antonio Road in manual mode\n- Lyf

In [38]:
class GenScenic(dspy.Signature):
    """
    Write Scenic probablistic programs given a natural language description 
    of an autonomous vehicle crash report.
    """
    context = dspy.InputField(desc="may contain relevant information on how to construct a scenic program")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="Scenic program")

class RAG(dspy.Module):
    def __init__(self, num_passages : int = 3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenScenic)

    def forward(self, question : str) -> str:
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [39]:
from dspy.teleprompt import BootstrapFewShot, BootstrapFinetune
from dspy.evaluate import answer_exact_match, auto_evaluation, answer_passage_match

# Validation logic: check that the generated program is valid
def validate_context_and_answer(example, prediction, trace=None):
    return auto_evaluation(example, prediction)

few_show_teleprompt = BootstrapFewShot(metric=validate_context_and_answer)

compiled_few_shot = few_show_teleprompt.compile(RAG(), trainset=train_data)

pred = compiled_few_shot(test_data[0].question)
print(f"Question:\n {test_data[0].question}")
print(f"Predicted Answer using Few Shot and RAG:\n {pred}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")

  3%|▎         | 4/158 [00:00<00:00, 13421.77it/s]

Failed to run or to evaluate example Example({'question': '  SCENARIO DESCRIPTION\n A Zoox vehicle was in autonomy proceeding straight on Pine Street in San Francisco, CA when it encountered a double parked United States Postal Service van in lane (vehicle 2: model, year, VIN and insurance are unknown). The Zoox vehicle came to a complete stop while waiting for an opportunity to merge around vehicle 2. Vehicle 2 then rolled backwards into the Zoox vehicle, causing minor damage to front sensors and Vehicle 2 rear bumper. There were no injuries and first responders were not notified. Post incident review did not observe any brake lights or reverse lights on vehicle 2 while it was rolling backwards.\n\n', 'answer': 'param map = localPath(\'../../../assets/maps/CARLA/Town01.xodr\')\nparam carla_map = \'Town01\'\nmodel scenic.simulators.carla.model\n\nEGO_MODEL = "vehicle.lincoln.mkz_2017"\nVAN_MODEL = "vehicle.postalvan"\nEGO_SPEED = 10\nMERGE_THRESHOLD = 5\n\nbehavior EgoBehavior():\n    




ValueError: Inputs have not been set for this example. Use `example.with_inputs()` to set them.

In [None]:
finetune_teleprompt = BootstrapFinetune(metric=validate_context_and_answer)

compiled_finetune = finetune_teleprompt.compile(RAG(), trainset=train_data)

pred = compiled_finetune(test_data[0].question)
print(f"Question:\n {test_data[0].question}")
print(f"Predicted Answer using Finetune and RAG:\n {pred}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")

In [None]:
generate_ans_with_chain_of_thought = dspy.ChainOfThought(GenScenic)

pred = generate_ans_with_chain_of_thought(question=test_data[0].question)

print(f"Question:\n {test_data[0].question}")
print(f"Thought: {pred.rationale.split('.', 1)[1].strip()}")
print(f"Predicted Answer:\n {pred}")