In [1]:
!pip install "arize-phoenix[evals]" "dspy-ai==2.4.3" openinference-instrumentation-dspy opentelemetry-exporter-otlp openai "cohere==5.3.3" datasets 'scikit-learn'



In [2]:
from getpass import getpass
import os

import openai

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key

In [1]:
from  getpass import getpass
import os

import cohere

if not (cohere_api_key := os.getenv("CO_API_KEY")):
    cohere_api_key = getpass("🔑 Enter your Cohere API key: ")
cohere.api_key = cohere_api_key
os.environ["CO_API_KEY"] = cohere_api_key

In [4]:
import phoenix as px

session = px.launch_app()

  from .autonotebook import tqdm as notebook_tqdm


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📺 To view the Phoenix app in a notebook, run `px.active_session().view()`
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [5]:
# Initialize Instrumentation
from openinference.instrumentation.dspy import DSPyInstrumentor
from opentelemetry import trace as trace_api
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

endpoint = "http://127.0.0.1:6006/v1/traces"
resource = Resource(attributes={})
tracer_provider = trace_sdk.TracerProvider(resource=resource)
span_otlp_exporter = OTLPSpanExporter(endpoint=endpoint)
tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter=span_otlp_exporter))

trace_api.set_tracer_provider(tracer_provider=tracer_provider)
DSPyInstrumentor().instrument()

ModuleNotFoundError: No module named 'openinference.instrumentation.dspy'

In [None]:
from datasets import load_dataset

dataset = load_dataset("wiki_qa")

In [None]:
dataset["train"]

Dataset({
    features: ['question_id', 'question', 'document_title', 'answer', 'label'],
    num_rows: 20360
})

In [None]:
import dspy

trainset = [dspy.Example({ "question": row["question"], "context": row["answer"], "label": "relevant" if row["label"] == 1 else "unrelated" }).with_inputs("question", "context") for row in dataset["train"].select(range(30))]
valset = [dspy.Example({ "question": row["question"], "context": row["answer"], "label": "relevant" if row["label"] == 1 else "unrelated" }).with_inputs("question", "context") for row in dataset["validation"].select(range(6))]

In [None]:
trainset

[Example({'question': 'how are glacier caves formed?', 'context': 'A partly submerged glacier cave on Perito Moreno Glacier .', 'label': 'unrelated'}) (input_keys={'context', 'question'}),
 Example({'question': 'how are glacier caves formed?', 'context': 'The ice facade is approximately 60 m high', 'label': 'unrelated'}) (input_keys={'context', 'question'}),
 Example({'question': 'how are glacier caves formed?', 'context': 'Ice formations in the Titlis glacier cave', 'label': 'unrelated'}) (input_keys={'context', 'question'}),
 Example({'question': 'how are glacier caves formed?', 'context': 'A glacier cave is a cave formed within the ice of a glacier .', 'label': 'relevant'}) (input_keys={'context', 'question'}),
 Example({'question': 'how are glacier caves formed?', 'context': 'Glacier caves are often called ice caves , but this term is properly used to describe bedrock caves that contain year-round ice.', 'label': 'unrelated'}) (input_keys={'context', 'question'}),
 Example({'questi

In [2]:
import dspy

command_r = dspy.Cohere(model="command-r", max_tokens=1000, api_key=os.environ["CO_API_KEY"])
dspy.settings.configure(lm=command_r)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
print(command_r("say_hello"))

["Hello! How's it going? 😊"]


In [None]:
import dspy

class RelevanceEvalSignature(dspy.Signature):
    question = dspy.InputField()
    context = dspy.InputField()
    label = dspy.OutputField(desc="relevant or unrelated")

class Classify(dspy.Module):
    def __init__(self):
        super().__init__()
        self.prog = dspy.ChainOfThought(RelevanceEvalSignature)

    def forward(self, question: str, context: str):
        resp = self.prog(question=question, context=context)
        # dspy.Suggest(resp.label in ["relevant", "unrelated"], "the label must be either 'relevant' or 'unrelated'")
        return resp

In [None]:
trainset

In [None]:
relevance_classify = Classify()


In [None]:
from dspy.evaluate import Evaluate

# Optimize! Use our own custom metric
def validate_label(example, pred, trace=None):
    match = example["label"] == pred.label.lower()
    return 1 if match else 0

evaluator = Evaluate(devset=trainset)

In [None]:
from phoenix.trace import using_project

with using_project("eval-pre-optimized"):
    evaluator(relevance_classify, metric=validate_label)

In [None]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
from phoenix.trace import using_project

# Set up the optimizer: we want to "bootstrap" (i.e., self-generate) 3-shot examples of our CoT program.
config = dict(max_bootstrapped_demos=3, max_labeled_demos=3)

teleprompter = BootstrapFewShotWithRandomSearch(metric=validate_label, **config)
with using_project("relevance_training"):
    optimized_classify = teleprompter.compile(relevance_classify, trainset=trainset, valset=valset)

In [None]:
optimized_classify.save(path="optimized_relevance_classify")

In [None]:
from phoenix.trace import using_project

with using_project("eval-post-optimized-rd2"):
    evaluator(optimized_classify, metric=validate_label)