In [1]:
!pip install "arize-phoenix[evals]" dspy-ai openinference-instrumentation-dspy opentelemetry-exporter-otlp openai "cohere<5.0.0" datasets 'scikit-learn'



In [1]:
from getpass import getpass
import os

import openai

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key

In [2]:
from  getpass import getpass
import os

import cohere

if not (cohere_api_key := os.getenv("CO_API_KEY")):
    cohere_api_key = getpass("🔑 Enter your Cohere API key: ")
cohere.api_key = cohere_api_key
os.environ["CO_API_KEY"] = cohere_api_key

In [3]:
import phoenix as px

session = px.launch_app()

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📺 To view the Phoenix app in a notebook, run `px.active_session().view()`
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [4]:
# Initialize Instrumentation
from openinference.instrumentation.dspy import DSPyInstrumentor
from opentelemetry import trace as trace_api
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

endpoint = "http://127.0.0.1:6006/v1/traces"
resource = Resource(attributes={})
tracer_provider = trace_sdk.TracerProvider(resource=resource)
span_otlp_exporter = OTLPSpanExporter(endpoint=endpoint)
tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter=span_otlp_exporter))

trace_api.set_tracer_provider(tracer_provider=tracer_provider)
DSPyInstrumentor().instrument()

Unknown project: UHJvamVjdDox

GraphQL request:4:3
3 | ) {
4 |   node(id: $id) {
  |   ^
5 |     __typename
Traceback (most recent call last):
  File "/Users/mikeldking/.virtualenvs/phoenix/lib/python3.10/site-packages/graphql/execution/execute.py", line 521, in execute_field
    result = resolve_fn(source, info, **args)
  File "/Users/mikeldking/.virtualenvs/phoenix/lib/python3.10/site-packages/strawberry/schema/schema_converter.py", line 692, in _resolver
    return _get_result_with_extensions(
  File "/Users/mikeldking/.virtualenvs/phoenix/lib/python3.10/site-packages/strawberry/schema/schema_converter.py", line 678, in extension_resolver
    return reduce(
  File "/Users/mikeldking/.virtualenvs/phoenix/lib/python3.10/site-packages/strawberry/schema/schema_converter.py", line 673, in wrapped_get_result
    return _get_result(
  File "/Users/mikeldking/.virtualenvs/phoenix/lib/python3.10/site-packages/strawberry/schema/schema_converter.py", line 634, in _get_result
    return field.g

In [5]:
from datasets import load_dataset

dataset = load_dataset("wiki_qa")

In [6]:
dataset["test"].to_pandas().head()

Unnamed: 0,question_id,question,document_title,answer,label
0,Q0,HOW AFRICAN AMERICANS WERE IMMIGRATED TO THE US,African immigration to the United States,African immigration to the United States refer...,0
1,Q0,HOW AFRICAN AMERICANS WERE IMMIGRATED TO THE US,African immigration to the United States,The term African in the scope of this article ...,0
2,Q0,HOW AFRICAN AMERICANS WERE IMMIGRATED TO THE US,African immigration to the United States,From the Immigration and Nationality Act of 19...,0
3,Q0,HOW AFRICAN AMERICANS WERE IMMIGRATED TO THE US,African immigration to the United States,African immigrants in the United States come f...,0
4,Q0,HOW AFRICAN AMERICANS WERE IMMIGRATED TO THE US,African immigration to the United States,"They include people from different national, l...",0


In [7]:
dataset["train"].to_pandas().head()

Unnamed: 0,question_id,question,document_title,answer,label
0,Q1,how are glacier caves formed?,Glacier cave,A partly submerged glacier cave on Perito More...,0
1,Q1,how are glacier caves formed?,Glacier cave,The ice facade is approximately 60 m high,0
2,Q1,how are glacier caves formed?,Glacier cave,Ice formations in the Titlis glacier cave,0
3,Q1,how are glacier caves formed?,Glacier cave,A glacier cave is a cave formed within the ice...,1
4,Q1,how are glacier caves formed?,Glacier cave,"Glacier caves are often called ice caves , but...",0


In [8]:
import dspy

trainset = [dspy.Example(row).with_inputs("question", "answer") for row in dataset["train"]]
valset = [dspy.Example(row).with_inputs("question", "answer") for row in dataset["validation"]]

In [9]:
import dspy

turbo = dspy.OpenAI(model="gpt-3.5-turbo")
dspy.settings.configure(lm=turbo)

In [10]:
import dspy

class RelevanceEvalSignature(dspy.Signature):
    question = dspy.InputField()
    answer = dspy.InputField()
    label = dspy.OutputField(desc="relevant or unrelated")

class Classify(dspy.Module):
    def __init__(self):
        super().__init__()
        self.prog = dspy.ChainOfThought(RelevanceEvalSignature)

    def forward(self, question):
        return self.prog(question=question)

In [11]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
from phoenix.trace import using_project

# Set up the optimizer: we want to "bootstrap" (i.e., self-generate) 3-shot examples of our CoT program.
config = dict(max_bootstrapped_demos=3, max_labeled_demos=3)

# Optimize! Use our own custom metric
def validate_label(example, pred, trace=None):
    if example["label"] == 1:
        match = pred.label.lower() == "relevant"
    else:
        match = pred.label.lower() == "unrelated"
    return 1 if match else 0

teleprompter = BootstrapFewShotWithRandomSearch(metric=validate_label, **config)
with using_project("relevance_training"):
    optimized_cot = teleprompter.compile(Classify(), trainset=trainset, valset=valset)

Going to sample between 1 and 3 traces per predictor.
Will attempt to train 16 candidate sets.


  0%|          | 0/2733 [00:00<?, ?it/s]




Average Metric: 0 / 4  (0.0):   0%|          | 3/2733 [00:02<1:34:24,  2.07s/it]



Average Metric: 0 / 6  (0.0):   0%|          | 5/2733 [00:02<16:20,  2.78it/s]  



Average Metric: 0 / 7  (0.0):   0%|          | 7/2733 [00:02<12:26,  3.65it/s]



Average Metric: 0 / 8  (0.0):   0%|          | 8/2733 [00:02<12:12,  3.72it/s]



Average Metric: 0 / 9  (0.0):   0%|          | 9/2733 [00:03<17:04,  2.66it/s]



Average Metric: 0 / 11  (0.0):   0%|          | 11/2733 [00:03<13:30,  3.36it/s]



Average Metric: 1 / 13  (7.7):   0%|          | 12/2733 [00:04<14:54,  3.04it/s]



Average Metric: 2 / 14  (14.3):   0%|          | 13/2733 [00:04<13:13,  3.43it/s]



Average Metric: 3 / 19  (15.8):   1%|          | 18/2733 [00:05<14:44,  3.07it/s]



Average Metric: 3 / 20  (15.0):   1%|          | 20/2733 [00:05<08:23,  5.39it/s]



Average Metric: 3 / 22  (13.6):   1%|          | 21/2733 [00:06<08:57,  5.05it/s]



Average Metric: 3 / 23  (13.0):   1%|          | 23/2733 [00:06<08:18,  5.44it/s]



Average Metric: 4 / 38  (10.5):   1%|▏         | 37/2733 [00:07<04:13, 10.64it/s]



Average Metric: 4 / 40  (10.0):   1%|▏         | 39/2733 [00:07<03:45, 11.93it/s]



Average Metric: 4 / 41  (9.8):   1%|▏         | 40/2733 [00:08<03:45, 11.93it/s] 



Average Metric: 4 / 42  (9.5):   2%|▏         | 42/2733 [00:08<06:11,  7.25it/s]



Average Metric: 4 / 44  (9.1):   2%|▏         | 44/2733 [00:08<06:19,  7.08it/s]



Average Metric: 4 / 55  (7.3):   2%|▏         | 54/2733 [00:09<05:35,  7.98it/s]



Average Metric: 4 / 55  (7.3):   2%|▏         | 55/2733 [00:20<05:35,  7.98it/s]


