In [None]:
!pip3 install flotorch-eval crewai plotly opentelemetry-api opentelemetry-sdk pandas ragas duckduckgo-search openlit langchain_aws


In [1]:
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter


# Create an in-memory span exporter
memory_exporter = InMemorySpanExporter()
span_processor = SimpleSpanProcessor(memory_exporter)

# Set up the tracer provider and add the span processor
tracer_provider = TracerProvider()
tracer_provider.add_span_processor(span_processor)
trace.set_tracer_provider(tracer_provider)

In [2]:
import openlit

# Initialize OpenLit - this will automatically instrument CrewAI when it's imported
openlit.init()

Overriding of current TracerProvider is not allowed
Overriding of current TracerProvider is not allowed


{
    "resource_metrics": [
        {
            "resource": {
                "attributes": {
                    "telemetry.sdk.language": "python",
                    "telemetry.sdk.name": "openlit",
                    "telemetry.sdk.version": "1.34.0",
                    "service.name": "default",
                    "deployment.environment": "default"
                },
                "schema_url": ""
            },
            "scope_metrics": [
                {
                    "scope": {
                        "name": "openlit.otel.metrics",
                        "version": "0.1.0",
                        "schema_url": "",
                        "attributes": null
                    },
                    "metrics": [
                        {
                            "name": "gen_ai.client.token.usage",
                            "description": "Measures number of input and output tokens used",
                            "unit": "{token}",
                 

In [3]:
from crewai.tools import tool
from duckduckgo_search import DDGS

@tool('DuckDuckGoSearch')
def search_tool(search_query: str):
    """Search the web for information on a given topic"""
    return DDGS().text(search_query, max_results=5)

@tool('SalesforceIntegration')
def salesforce_tool(soql_query: str):
    """Call Salesforce API to get data"""
    return "Salesforce Integration"

from crewai import LLM

model = LLM(
    # model="sagemaker/INSERT ENDPOINT NAME",
    model="bedrock/us.amazon.nova-pro-v1:0",
    temperature=0.7, max_tokens=4*1024,
)

from crewai import Agent, Task, Crew


writer = Agent(
        role="Writer",
        goal="You make math engaging and understandable for young children through poetry",
        backstory="You're an expert in writing haikus but you know nothing of math.",
        tools=[search_tool],
        llm=model
    )

task = Task(description=("What is {topic}?"),
            expected_output=("Compose a short poem that includes the answer."),
            agent=writer)

crew = Crew(
  agents=[writer],
  tasks=[task],
  share_crew=False
)

result = crew.kickoff({"topic": "Trignometry"})
print(result)



{
    "name": "Crew Created",
    "context": {
        "trace_id": "0x2cbeec9e119a8f4a0f57acc6aca3058d",
        "span_id": "0x267a46ca5d3ceaa1",
        "trace_state": "[]"
    },
    "kind": "SpanKind.INTERNAL",
    "parent_id": null,
    "start_time": "2025-06-08T23:05:24.874688Z",
    "end_time": "2025-06-08T23:05:24.875540Z",
    "status": {
        "status_code": "OK"
    },
    "attributes": {
        "crewai_version": "0.126.0",
        "python_version": "3.13.0",
        "crew_key": "d56be7d496021dd9ad8d140b7267c207",
        "crew_id": "bbfd6ccf-b49f-4891-be83-978b4dca6f3c",
        "crew_process": "sequential",
        "crew_memory": false,
        "crew_number_of_tasks": 1,
        "crew_number_of_agents": 1,
        "crew_fingerprint": "920abf8a-1839-465d-8ecf-3698c75811fc",
        "crew_fingerprint_created_at": "2025-06-08T16:05:24.873530",
        "crew_agents": "[{\"key\": \"de2feaf934a966ef93a186c418b6f467\", \"id\": \"f24de42d-e906-49bc-834c-d1ebe4ef1896\", \"role\":

In [4]:
spans = memory_exporter.get_finished_spans()

spans

(<opentelemetry.sdk.trace.ReadableSpan at 0x13104ea50>,
 <opentelemetry.sdk.trace.ReadableSpan at 0x13176ce10>,
 <opentelemetry.sdk.trace.ReadableSpan at 0x132058190>,
 <opentelemetry.sdk.trace.ReadableSpan at 0x13201ab10>,
 <opentelemetry.sdk.trace.ReadableSpan at 0x13201a8b0>,
 <opentelemetry.sdk.trace.ReadableSpan at 0x13205ca70>,
 <opentelemetry.sdk.trace.ReadableSpan at 0x132021590>)

In [5]:
from langchain_aws import ChatBedrockConverse
from ragas.llms import LangchainLLMWrapper

bedrock_model = ChatBedrockConverse(
    region_name="us-east-1",
    endpoint_url=f"https://bedrock-runtime.us-east-1.amazonaws.com",
    model_id="us.amazon.nova-micro-v1:0"
)

llm = LangchainLLMWrapper(bedrock_model)



  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from flotorch_eval.agent_eval.core.converter import TraceConverter

def create_trajectory(spans):
    converter = TraceConverter()
    trajectory = converter.from_spans(spans)
    return trajectory


In [8]:
trajectory = create_trajectory(spans)
trajectory

Trajectory(trace_id='2cbeec9e119a8f4a0f57acc6aca3058d', messages=[Message(role='user', content='What is Trignometry?', tool_calls=[], timestamp=datetime.datetime(2025, 6, 8, 16, 5, 24, 881371)), Message(role='assistant', content='I need to find out what trigonometry is to compose a short poem about it. I should start by searching for information on trigonometry.', tool_calls=[ToolCall(name='DuckDuckGoSearch', arguments={'search_query': 'what is trigonometry'}, output=None)], timestamp=datetime.datetime(2025, 6, 8, 16, 5, 24, 881371)), Message(role='tool', content='[{\'title\': \'Trigonometry - Wikipedia\', \'href\': \'https://en.wikipedia.org/wiki/Trigonometry\', \'body\': \'Trigonometry is a branch of mathematics that studies the relationships between angles and side lengths of triangles. Learn about its origins, development, applications, and basic trigonometric functions such as sine, cosine, and tangent.\'}, {\'title\': \'Introduction to Trigonometry - Math is Fun\', \'href\': \'ht

In [11]:
from flotorch_eval.agent_eval.core.evaluator import Evaluator
from flotorch_eval.agent_eval.metrics.langchain_metrics import (
    TrajectoryEvalWithLLMMetric,
    TrajectoryEvalWithoutLLMMetric,
)
from flotorch_eval.agent_eval.metrics.ragas_metrics import (
    AgentGoalAccuracyMetric,
    ToolCallAccuracyMetric,
)
from flotorch_eval.agent_eval.metrics.base import MetricConfig

evaluator = Evaluator([
    ToolCallAccuracyMetric(),
    TrajectoryEvalWithLLMMetric(llm = bedrock_model,config=MetricConfig(
        metric_params={
            "reference_outputs": [
    {"role": "user", "content": "What is Amazon Bedrock? Compose a short poem that includes the answer."},
    {
        "role": "assistant",
        "content": "To compose a poem about Amazon Bedrock, I first need to gather information about what Amazon Bedrock is. I will use the available tool to search for this information.",
        "tool_calls": [
            {
                "function": {
                    "name": "Search the internet with Serper",
                    "arguments": "{\"search_query\": \"Amazon Bedrock\"}"
                }
            }
        ]
    },
    {"role": "tool", "content": "{\"searchParameters\": {\"q\": \"Amazon Bedrock\", \"type\": \"search\", \"num\": 5, \"engine\": \"google\"}, \"organic\": [{\"title\": \"Amazon Bedrock - Generative AI - AWS\", \"link\": \"https://aws.amazon.com/bedrock/\", \"snippet\": \"Amazon Bedrock Data Automation streamlines the generation of valuable insights from unstructured multimodal content such as documents, images, audio, and videos ...\", \"position\": 1, \"sitelinks\": [{\"title\": \"Amazon Bedrock\", \"link\": \"https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html\"}, {\"title\": \"Amazon Bedrock Pricing\", \"link\": \"https://aws.amazon.com/bedrock/pricing/\"}, {\"title\": \"Amazon Bedrock Documentation\", \"link\": \"https://docs.aws.amazon.com/bedrock/\"}, {\"title\": \"Amazon Bedrock FAQs\", \"link\": \"https://aws.amazon.com/bedrock/faqs/\"}, {\"title\": \"Amazon Bedrock Agents\", \"link\": \"https://aws.amazon.com/bedrock/agents/\"}]}, {\"title\": \"Getting Started with Amazon Bedrock - AWS\", \"link\": \"https://aws.amazon.com/awstv/watch/6ff4cd6fa97/\", \"snippet\": \"So check the region that you're currently in, make sure it's a region that's supported by Bedrock. Then I'm gonna scroll to the bottom of this ...\", \"position\": 2}], \"relatedSearches\": [{\"query\": \"Amazon Bedrock pricing\"}, {\"query\": \"Amazon Bedrock documentation\"}, {\"query\": \"Amazon Bedrock Claude\"}, {\"query\": \"Amazon Bedrock logo\"}, {\"query\": \"Amazon Bedrock DeepSeek\"}], \"credits\": 1}"},
    {"role": "assistant", "content": "Based on the observation, I have learned that **Amazon Bedrock** is a service by AWS that streamlines the generation of valuable insights from unstructured multimodal content. Here's a haiku about it:\n\nIn cloud's embrace,\nBedrock whispers secrets,\nData's gentle art."}
]
        }
    ))])

# Evaluate trajectory
results = await evaluator.evaluate(trajectory)



Invalid type NoneType for attribute 'gen_ai.completion' value. Expected one of ['bool', 'str', 'bytes', 'int', 'float'] or a sequence of those types


{'user': {'role': 'user', 'content': '{\'text\': \'You are an expert data labeler.\\nYour task is to grade the accuracy of an AI agent\\\'s internal trajectory.\\n\\n<Rubric>\\n  An accurate trajectory:\\n  - Makes logical sense between steps\\n  - Shows clear progression\\n  - Is relatively efficient, though it does not need to be perfectly efficient\\n  - Is semantically equivalent to the provided reference trajectory\\n</Rubric>\\n\\nBased on the following reference trajectory:\\n\\n<reference_trajectory>\\n<user>\\nWhat is Amazon Bedrock? Compose a short poem that includes the answer.\\n</user>\\n\\n<assistant>\\nTo compose a poem about Amazon Bedrock, I first need to gather information about what Amazon Bedrock is. I will use the available tool to search for this information.\\n<tool_call>\\n<name>Search the internet with Serper</name>\\n<arguments>{"search_query": "Amazon Bedrock"}</arguments>\\n</tool_call>\\n</assistant>\\n\\n<tool>\\n{"searchParameters": {"q": "Amazon Bedrock"

Invalid type dict in attribute 'gen_ai.completion' value sequence. Expected one of ['bool', 'str', 'bytes', 'int', 'float'] or None


{
    "name": "chat us.amazon.nova-micro-v1:0",
    "context": {
        "trace_id": "0x530f2494de6b539dbe781f1aa8e701fa",
        "span_id": "0x58505ed723ad6576",
        "trace_state": "[]"
    },
    "kind": "SpanKind.CLIENT",
    "parent_id": null,
    "start_time": "2025-06-08T23:07:10.503499Z",
    "end_time": "2025-06-08T23:07:12.185722Z",
    "status": {
        "status_code": "OK"
    },
    "attributes": {
        "telemetry.sdk.name": "openlit",
        "gen_ai.operation.name": "chat",
        "gen_ai.system": "langchain",
        "gen_ai.request.model": "us.amazon.nova-micro-v1:0",
        "gen_ai.response.model": "us.amazon.nova-micro-v1:0",
        "gen_ai.request.temperature": "None",
        "gen_ai.request.top_k": "1",
        "gen_ai.request.top_p": "None",
        "gen_ai.usage.input_tokens": 2681,
        "gen_ai.usage.output_tokens": 2235,
        "server.address": "NOT_FOUND",
        "server.port": "NOT_FOUND",
        "deployment.environment": "default",
       

In [12]:
results.scores

[MetricResult(name='tool_call_accuracy', score=1.0, details={'evaluation_type': 'tool_call_accuracy'}),
 MetricResult(name='trajectory_eval_with_llm', score=1.0, details={'comment': 'The trajectory makes logical sense as it begins with a user query, proceeds to a search for information, and ends with the creation of a poem based on the retrieved information. The progression from search to poem composition is clear. Though not perfectly efficient, the trajectory uses a search tool to gather necessary information before composing the poem. The trajectory is semantically equivalent to the provided reference trajectory.', 'has_reference': True, 'raw_score': True})]