In [1]:
from openai import OpenAI
import pandas as pd
import json
import duckdb #used for some of the SQL code
from pydantic import BaseModel, Field 
from IPython.display import Markdown

from helper import get_openai_api_key, get_phoenix_endpoint, setup_phoenix

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import phoenix as px
import os
from phoenix.otel import register 
from openinference.semconv.trace import SpanAttributes 
from openinference.instrumentation.openai import OpenAIInstrumentor
from opentelemetry.trace import Status , StatusCode
from openinference.instrumentation import TracerProvider

In [3]:
# initialize the OpenAI client
openai_api_key = get_openai_api_key()
client = OpenAI(api_key=openai_api_key)

MODEL = "gpt-4o-mini"

Phoenix

In [None]:
session = setup_phoenix()

  next(self.gen)
  next(self.gen)


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix
Phoenix is running at: http://localhost:6006/


Unknown dataset: RGF0YXNldDox

GraphQL request:4:3
3 | ) {
4 |   dataset: node(id: $id) {
  |   ^
5 |     __typename
Traceback (most recent call last):
  File "/Users/priyanka./Documents/ai-agents/venv/lib/python3.11/site-packages/graphql/execution/execute.py", line 530, in await_result
    return_type, field_nodes, info, path, await result
                                          ^^^^^^^^^^^^
  File "/Users/priyanka./Documents/ai-agents/venv/lib/python3.11/site-packages/strawberry/schema/schema_converter.py", line 788, in _async_resolver
    return await await_maybe(
           ^^^^^^^^^^^^^^^^^^
  File "/Users/priyanka./Documents/ai-agents/venv/lib/python3.11/site-packages/strawberry/utils/await_maybe.py", line 13, in await_maybe
    return await value
           ^^^^^^^^^^^
  File "/Users/priyanka./Documents/ai-agents/venv/lib/python3.11/site-packages/phoenix/server/api/queries.py", line 508, in node
    raise NotFound(f"Unknown dataset: {id}")
phoenix.server.api.exceptions.NotFoun

In [5]:
PROJECT_NAME ="tracing_agent"

In [6]:
tracer_provider = register(
    project_name=PROJECT_NAME,
    endpoint= get_phoenix_endpoint() + "v1/traces"
)

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: tracing_agent
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: http://localhost:6006/v1/traces
|  Transport: HTTP + protobuf
|  Transport Headers: {}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [7]:
OpenAIInstrumentor().instrument(tracer_provider = tracer_provider)

In [8]:
tracer = tracer_provider.get_tracer(__name__)

#Tool 1: Dataset Lookup

In [9]:
# define the path to the transactional data
TRANSACTION_DATA_FILE_PATH = 'Store_Sales_Price_Elasticity_Promotions_Data.parquet'

In [10]:
# prompt templete for sql generation

SQL_GENERATION_PROMPT=""""
Generate an SQL query based on a prompt. Do not reply with anything besides the SQL query.
The prompt is : {prompt}

The availabe columns are: {columns}
The table name is: {table_name}
"""

In [11]:
def generate_sql_query(prompt:str,columns:list,table_name:str)->str:
    """Generate an SQL query based on a prompt"""
    formatted_prompt = SQL_GENERATION_PROMPT.format(prompt=prompt,columns=columns,table_name=table_name)

    response = client.chat.completions.create(
        model=MODEL,
        messages=[{"role":"user","content":formatted_prompt}],
    )

    return response.choices[0].message.content

In [12]:
@tracer.tool()
def lookup_sales_data(prompt:str)->str:
    """Implemantaion of sales data lookup from parquet file using SQL"""
    try:
        table_name="sales" #table name

        #read the parquet file into a duckdb table
        df=pd.read_parquet(TRANSACTION_DATA_FILE_PATH)
        duckdb.sql(f"CREATE TABLE IF NOT EXISTS {table_name} AS SELECT * FROM df")

        #generate sql code
        sql_query=generate_sql_query(prompt ,df.columns,table_name)

        #clean the response to make sure it only includes the SQL code
        sql_query = sql_query.strip()
        sql_query=sql_query.replace("```sql" , "").replace("```" , "")
        
        with tracer.start_as_current_span(
            "execute_sql_query",
            openinference_span_kind="chain"
        ) as span:
            span.set_input(sql_query)
            # execute the query
            result = duckdb.sql(sql_query).df()
            span.set_output(value=str(result))
            span.set_status(StatusCode.OK)

        return result.to_string()
    except Exception as e:
        return f"Error accessing data: {str(e)}"

In [13]:
# #testing first tool
# example_data = lookup_sales_data("Show me all the sales for store 1320 on November 1st, 2021")

# print(example_data)

#Tool 2 : Data analysis 

In [14]:
DATA_ANALYSIS_PROMPT = """
Analyse the following data:{data}
Your job is to answer the following question:{prompt}"""

In [15]:
@tracer.tool()
def analyze_sales_data(prompt:str,data:str)->str:
    """Implementation of AI-powered sales data analysis"""
    formatted_prompt = DATA_ANALYSIS_PROMPT.format(data=data,prompt=prompt)

    response = client.chat.completions.create(
        model=MODEL,
        messages=[{"role":"user", "content":formatted_prompt}]
    )

    analysis = response.choices[0].message.content
    return analysis if analysis else "No analysis could be generated"

In [16]:
# print(analyze_sales_data(prompt = "What trends do you see in this data",
#                          data=example_data))

#Tool 3 : Data visualization

In [17]:
CHART_CONFIGURATION_PROMPT = """
Generate a chart configuration based on this data:{data}
The goal is to show:{visualization_goal}
"""

In [18]:
class VisualizationConfig(BaseModel):
    chart_type : str = Field (...,description="Type of chart to generate")
    x_axis: str = Field(..., description="Name of the x-axis column")
    y_axis: str = Field(..., description="Name of the y-axis column")
    title: str = Field(..., description="Title of the chart")

In [19]:
@tracer.chain()
def extract_chart_config(data:str, visualization_goal:str)-> str:
    """Generate chart visualization configuration
    
    Args:
        data:String containing the data to visualize
        visualization_goal:Description of what the visualization should show
    Returns:
        Dictionary containing line chart configuaration
    """
    formatted_prompt=CHART_CONFIGURATION_PROMPT.format(data=data,visualization_goal=visualization_goal)

    response = client.beta.chat.completions.parse(
        model=MODEL,
        messages=[{"role":"user" , "content":formatted_prompt}],
        response_format=VisualizationConfig
    )       
    try:
        #Extract axis and title info from response
        content = response.choices[0].message.content

        return{
            "chart_type":content.chart_type,
            "x_axis": content.x_axis,
            "y_axis": content.y_axis,
            "title": content.title,
            "data": data
        }
    except Exception:
        return{
            "chart_type": "line", 
            "x_axis": "date",
            "y_axis": "value",
            "title": visualization_goal,
            "data": data
        }


In [20]:
CREATE_CHART_PROMPT="""
Write python code to create a chart based on the following configuration.
Only return the code, no other text.
config:{config}
"""

In [21]:
@tracer.chain()
def create_chart(config: dict)-> str:
    """Create a chart based on the configuration"""
    formatted_prompt = CREATE_CHART_PROMPT.format(config=config)

    response = client.chat.completions.create(
        model=MODEL,
        messages=[{"role":"user" , "content":formatted_prompt}],
    )

    code = response.choices[0].message.content
    code = code.replace("```python","").replace("```","")
    code = code.strip()

    return code


In [22]:
@tracer.tool()
def generate_visualization(data:str,visualization_goal:str)->str:
    """Generate a visualization based on the data and goal"""
    config = extract_chart_config(data,visualization_goal)
    code = create_chart(config)
    return code


In [23]:
# code = generate_visualization(example_data,"A bar chart of sales by product SKU. Put the product SKU on the x-axis and the sales on the y-axis.")
# print(code)

In [24]:
# exec(code)
# for safety we should wrap the code generated by llm within an enclosed environment 

# Defining the Router

In [25]:
#Tool Schema
tools =[{
    "type":"function",
    "function":{
        "name":"lookup_sales_data",
        "description":"Look up data from Store Scales Price Elasticity Promotions dataset",
        "parameters":{
            "type":"object",
            "properties":{
                "prompt":{"type":"string" , "description":"The unchanged prompt that the user provided."}  
            },
            "required":["prompt"]
        }
    }
},
{
    "type": "function",
    "function": {
        "name": "analyze_sales_data", 
        "description": "Analyze sales data to extract insights",
        "parameters": {
            "type": "object",
            "properties": {
                "data": {"type": "string", "description": "The lookup_sales_data tool's output."},
                "prompt": {"type": "string", "description": "The unchanged prompt that the user provided."}
            },
            "required": ["data", "prompt"]
        }
    }

},
{
    "type": "function",
    "function": {
        "name": "generate_visualization",
        "description": "Generate Python code to create data visualizations",
        "parameters": {
            "type": "object", 
            "properties": {
                "data": {"type": "string", "description": "The lookup_sales_data tool's output."},
                "visualization_goal": {"type": "string", "description": "The goal of the visualization."}
            },
            "required": ["data", "visualization_goal"]
        }
    }
}]

# Dictionary mapping function names to their implementations
tool_implementations = {
    "lookup_sales_data": lookup_sales_data,
    "analyze_sales_data": analyze_sales_data, 
    "generate_visualization": generate_visualization
}

#Router Logic

In [26]:
@tracer.chain()
def handle_tool_calls(tool_calls,messages):
    
    for tool_call in tool_calls:
        function = tool_implementations[tool_call.function.name]
        function_args = json.loads(tool_call.function.arguments)
        result = function(**function_args)
        messages.append({"role":"tool" , "content": result,"tool_call_id":tool_call.id})

    return messages


In [27]:
SYSTEM_PROMPT = """
You are a helpful assistant that can answer questions about the Store Sales Price Elasticity Promotions dataset.
"""

In [28]:
def run_agent(messages):
    print("Running agent with messages:",messages)

    if isinstance(messages,str):
        messages=[{"role":"user" , "content":messages}]

    if not any(
            isinstance(message , dict) and message.get("role") =="system" for message in messages
        ):
            system_prompt = {"role":"system","content": SYSTEM_PROMPT}
            messages.append(system_prompt)

    while True:
        #Router Span 
        print("Starting router call span")
        with tracer.start_as_current_span(
             "router_call" , openinference_span_kind="chain"
        ) as span:
            span.set_input(value=messages)
            response=client.chat.completions.create(
                model=MODEL,
                messages=messages,
                tools=tools,
            )
            messages.append(response.choices[0].message)
            tool_calls= response.choices[0].message.tool_calls
            print("Received response with tool calls:", bool(tool_calls))
            span.set_status(StatusCode.OK)

        # if the model decides to call function(s), call handle_tool_calls
        if tool_calls:
            print("Processing tool calls")
            messages = handle_tool_calls(tool_calls, messages)
            span.set_output(value=tool_calls)
        else:
            print("No tool calls, returning final response")
            span.set_output(value=response.choices[0].message.content)
            return response.choices[0].message.content
        
                

In [29]:
# result = run_agent('Show me the code for graph of sales by store in Nov 2021, and tell me what trends you see.')

In [30]:
# print(result)

In [31]:
def start_main_span(messages):
    print("Starting main span with messages:", messages)
    
    with tracer.start_as_current_span(
        "AgentRun", openinference_span_kind="agent"
    ) as span:
        span.set_input(value=messages)
        ret = run_agent(messages)
        print("Main span completed with return value:", ret)
        span.set_output(value=ret)
        span.set_status(StatusCode.OK)
        return ret

In [32]:
result = start_main_span([{"role": "user", 
                           "content": "Which stores did the best in 2021?"}])

Starting main span with messages: [{'role': 'user', 'content': 'Which stores did the best in 2021?'}]
Running agent with messages: [{'role': 'user', 'content': 'Which stores did the best in 2021?'}]
Starting router call span
Received response with tool calls: True
Processing tool calls


Setting attribute on ended span.
Setting attribute on ended span.


Starting router call span
Received response with tool calls: True
Processing tool calls


Setting attribute on ended span.
Setting attribute on ended span.


Starting router call span


Setting attribute on ended span.
Setting attribute on ended span.


Received response with tool calls: False
No tool calls, returning final response
Main span completed with return value: In 2021, the best-performing stores based on total sales were:

1. **Store Number 2970**: $84,454.33
2. **Store Number 3300**: $63,205.33
3. **Store Number 1650**: $62,152.43
4. **Store Number 1540**: $58,777.02
5. **Store Number 1210**: $55,435.62
6. **Store Number 1100**: $52,494.89
7. **Store Number 1320**: $52,407.36
8. **Store Number 2750**: $51,959.20
9. **Store Number 2200**: $51,634.51
10. **Store Number 3080**: $50,704.06

The store with the highest total sales was **Store Number 2970**, with total sales of **$84,454.33**.
