In [1]:
# !pip install arize-phoenix langchain langchain-core langchain-community langchain-benchmarks nest_asyncio jarowinkler

# Set Up OpenAI API Key

In [2]:
import os
from getpass import getpass

if not os.getenv("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass("🔑 Enter your OpenAI API key: ")

# Import Modules

In [3]:
import json
import tempfile

import jarowinkler
import nest_asyncio
import pandas as pd
import phoenix as px
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_benchmarks import download_public_dataset, registry
from langchain_openai.chat_models import ChatOpenAI
from openinference.instrumentation.langchain import LangChainInstrumentor
from openinference.instrumentation.openai import OpenAIInstrumentor
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from phoenix.datasets.experiments import evaluate_experiment, run_experiment
from phoenix.datasets.types import Example

nest_asyncio.apply()

# Launch Phoenix

In [4]:
# px.launch_app()

# Instrument LangChain and OpenAI

In [5]:
endpoint = "http://127.0.0.1:4317"
(tracer_provider := TracerProvider()).add_span_processor(
    SimpleSpanProcessor(OTLPSpanExporter(endpoint))
)

LangChainInstrumentor().instrument(tracer_provider=tracer_provider)
OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)

# Download JSON Data

In [6]:
dataset_name = "Email Extraction"

with tempfile.NamedTemporaryFile(suffix=".json") as f:
    download_public_dataset(registry[dataset_name].dataset_id, path=f.name)
    df = pd.read_json(f.name)[["inputs", "outputs"]]
df = df.sample(10, random_state=42)
df

Fetching examples...


  0%|          | 0/42 [00:00<?, ?it/s]

Done fetching examples.


Unnamed: 0,inputs,outputs
25,{'input': '**iCloud** �� # Failed to atte...,"{'output': {'tone': 'negative', 'topic': 'iClo..."
13,{'input': '--- | We Passed the Stop Dang...,"{'output': {'tone': 'positive', 'topic': 'Stop..."
8,{'input': '#### Where sustainability meets st...,"{'output': {'tone': 'positive', 'topic': 'Prom..."
26,"{'input': '| | | | | | Hello Jacob, 👋  ...","{'output': {'tone': 'positive', 'topic': 'Busi..."
4,{'input': 'Some travelers plan ahead; others p...,"{'output': {'tone': 'positive', 'topic': 'Trav..."
39,{'input': '--- | Costco --- ANSWE...,"{'output': {'tone': 'positive', 'topic': 'Invi..."
19,"{'input': 'Dear Jacob, Your opinion matte...","{'output': {'tone': 'positive', 'topic': 'Invi..."
29,{'input': '_`I Am looking for a possible partn...,"{'output': {'tone': 'positive', 'topic': 'Inve..."
30,{'input': 'It's always been a hassle to get mo...,"{'output': {'tone': 'positive', 'topic': 'Busi..."
6,{'input': 'Your exclusive retreat at The Venet...,"{'output': {'tone': 'positive', 'topic': 'Excl..."


# Upload Dataset to Phoenix

In [8]:
dataset = px.Client().upload_dataset(
    df, name=dataset_name, input_keys=("inputs",), output_keys=("outputs",)
)
dataset.dataframe.head()

Unnamed: 0_level_0,input,output,metadata
example_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RGF0YXNldEV4YW1wbGU6MTcz,{'inputs': {'input': '**iCloud** �� # Fai...,{'outputs': {'output': {'action_items': ['Upda...,{}
RGF0YXNldEV4YW1wbGU6MTc0,{'inputs': {'input': '--- | We Passed th...,{'outputs': {'output': {'action_items': ['visi...,{}
RGF0YXNldEV4YW1wbGU6MTc1,{'inputs': {'input': '#### Where sustainabili...,{'outputs': {'output': {'action_items': ['Choo...,{}
RGF0YXNldEV4YW1wbGU6MTc2,{'inputs': {'input': '| | | | | | Hello ...,{'outputs': {'output': {'action_items': ['Tell...,{}
RGF0YXNldEV4YW1wbGU6MTc3,{'inputs': {'input': 'Some travelers plan ahea...,{'outputs': {'output': {'action_items': ['Cons...,{}


# Set Up LangChain

In [9]:
llm = ChatOpenAI(model="gpt-4o").bind_functions(
    functions=[registry[dataset_name].schema],
    function_call=registry[dataset_name].schema.schema()["title"],
)
output_parser = JsonOutputFunctionsParser()
extraction_chain = registry[dataset_name].instructions | llm | output_parser

# Define Task Function

In [10]:
def task(ex: Example) -> str:
    return extraction_chain.invoke(ex.input["inputs"])

# Check that the task is working by running it on at least one Example

In [11]:
task(dataset.examples[0])

{'sender': 'The iCloud Team',
 'sender_address': '6101 Long Prairie Rd, Ste 744 #511, Flower Mound, TX, 75028',
 'action_items': ['Update your payment information'],
 'topic': 'Failed to renew iCloud storage subscription',
 'tone': 'negative'}

# Run Experiment

In [12]:
experiment = run_experiment(dataset, task)

🧪 Experiment started.
📺 View dataset experiments: http://127.0.0.1:6006/datasets/RGF0YXNldDo4/experiments
🔗 View this experiment: http://127.0.0.1:6006/datasets/RGF0YXNldDo4/compare?experimentId=RXhwZXJpbWVudDoxNA==


running tasks |          | 0/10 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


# Define Evaluator

In [15]:
def jarowinkler_similarity(output, expected) -> int:
    return jarowinkler.jarowinkler_similarity(
        json.dumps(output, sort_keys=True),
        json.dumps(expected["outputs"]["output"], sort_keys=True),
    )

# Evaluate Experiment

In [16]:
evaluate_experiment(experiment, jarowinkler_similarity)

running experiment evaluations |          | 0/10 (0.0%) | ⏳ 00:00<? | ?it/s