# Agent Evaluation Demo with NovaEval

This notebook demonstrates how to:
1. Load agent trace data from dataset.json or dataset_tool_calls_removed.json
2. Map trace spans to AgentData format
3. Create an AgentDataset 
4. Evaluate agent performance using AgentEvaluator with Gemini model
5. Analyze results with multiple scorers


In [96]:
import json
import re
from typing import Any, Dict, List, Optional, Union

# NovaEval imports
from novaeval.agents.agent_data import AgentData, ToolSchema, ToolCall, ToolResult
from novaeval.datasets.agent_dataset import AgentDataset
from novaeval.evaluators.agent_evaluator import AgentEvaluator
from novaeval.models.gemini import GeminiModel
from novaeval.scorers.agent_scorers import (
    context_relevancy_scorer,
    role_adherence_scorer,
    task_progression_scorer,
    tool_relevancy_scorer,
    tool_correctness_scorer,
    parameter_correctness_scorer
)
from dotenv import load_dotenv

load_dotenv()

print("✅ All imports successful!")


✅ All imports successful!


In [97]:
# !pwd
# !python langchain_agent/traces/fetch_traces_api.py 10
# !python langchain_agent/traces/combine_spans_api_compat.py

# !cp langchain_agent/traces/traces/dataset.json .

# !python clean_tool_call_spans.py 

In [98]:
!ls split_datasets/

agent_comment_gen_dataset.json	post_validation_dataset.json
agent_query_gen_dataset.json	tavily_search_results_dataset.json
email_gen_send_dataset.json


## Step 1: Load and Examine Dataset Structure


In [99]:
# Load the dataset
file_name = 'split_datasets/tavily_search_results_dataset.json'
#
with open(file_name, 'r') as f:
    spans_data = json.load(f)

print(f"📊 Loaded {len(spans_data)} spans from {file_name}")
print(f"\n🔍 Available span types:")

# Analyze span types
span_types = {}
for span in spans_data[:]:  # Look at first 10 spans
    span_name = span.get('name', 'unknown')
    if span_name not in span_types:
        span_types[span_name] = 0
    span_types[span_name] += 1

for span_type, count in span_types.items():
    print(f"  - {span_type}: {count}")


📊 Loaded 2301 spans from split_datasets/tavily_search_results_dataset.json

🔍 Available span types:
  - tool:tavily_search_results_json:tavily_search_results_json: 2301


## Step 2: Implement Field Mapping Logic


In [100]:
def parse_tools_from_prompt(prompt: str) -> List[ToolSchema]:
    """
    Parse tool definitions from LLM prompts using regex.
    
    Expected format: tool_name(param: type = default) -> return_type - description
    """
    # Pattern to match tool signatures
    pattern = r'(\w+)\(([^)]*)\)\s*->\s*(\w+)\s*-\s*(.+?)(?=\n\w+\(|$)'
    matches = re.findall(pattern, prompt, re.DOTALL)
    
    tools = []
    for match in matches:
        tool_name, params_str, return_type, description = match
        
        # Parse parameters
        args_schema = parse_params(params_str)
        
        tool = ToolSchema(
            name=tool_name,
            description=description.strip(),
            args_schema=args_schema,
            return_schema={"type": return_type}
        )
        tools.append(tool)
    
    return tools

def parse_params(params_str: str) -> Dict[str, Any]:
    """
    Parse parameter string into schema dictionary.
    
    Format: param_name: type = default_value
    """
    if not params_str.strip():
        return {}
    
    # Split parameters by comma
    params = [p.strip() for p in params_str.split(',') if p.strip()]
    schema = {}
    
    for param in params:
        if ':' in param:
            parts = param.split(':', 1)
            param_name = parts[0].strip()
            type_and_default = parts[1].strip()
            
            # Extract type and default value
            if '=' in type_and_default:
                type_part, default_part = type_and_default.split('=', 1)
                param_type = type_part.strip()
                default_val = default_part.strip().strip('"\'')
                schema[param_name] = {'type': param_type, 'default': default_val}
            else:
                param_type = type_and_default.strip()
                schema[param_name] = {'type': param_type}
    
    return schema

def identify_span_type(span: Dict[str, Any]) -> str:
    """
    Identify span type based on attributes.
    """
    attributes = span.get('attributes', {})
    agent_span_names = ['reddit_agent_run_1', 'reddit_agent_run_2', 'agent.query_generation', 'agent.comment_generation']
    tool_span_names = ['post_validation', 'email_generation_and_sending']
    # Check for agent attributes
    if any('chain.name' == key for key in attributes.keys()) or span['name'] in agent_span_names:
        return 'agent'
    
    # Check for LLM attributes
    if any('llm.model' == key for key in attributes.keys()):
        return 'llm'
    
    # Check for tool attributes
    if any('tool.name' == key for key in attributes.keys()) or span['name'] in tool_span_names:
        return 'tool'
    print('returning unknown type for span')
    print(span)
    return 'unknown'


In [101]:
global count_unknowns
count_unknowns = 0
def map_span_to_agent_data(span: Dict[str, Any]) -> AgentData:
    """
    Map a single span from file_name to AgentData format.
    """

    attributes = span.get('attributes', {})
    events = span.get('events', [])
    span_type = identify_span_type(span)

    # Base mappings
    data = {
        'user_id': span.get('metadata', {}).get('user_id', None),
        'task_id': span.get('trace_id'),
        'turn_id': span.get('span_id'),
        'ground_truth': None,
        'expected_tool_call': None,
        'agent_name': span_type,
        'agent_role': span_type,
        'system_prompt': "You are a helpful customer support agent",
        'metadata': None,
        'exit_status': span.get('status'),
        'tools_available': [],
        'tool_calls': [],
        'parameters_passed': {},
        'tool_call_results': [],
        'retrieval_query': None,
        'retrieved_context': None,
        'agent_exit': False,
        'trace': None
    }
    
    # Span-specific mappings
    if span_type == 'agent':
        # Agent task
        chain_inputs = attributes.get('chain.inputs', {})

        if isinstance(chain_inputs, dict) and 'input' in chain_inputs:
            data['agent_task'] = chain_inputs['input']
        elif attributes.get("agent_task"):
            data['agent_task'] = attributes.get("agent_task")
        else:
            print('agent_task not found')
        # Agent response
        finish_values = attributes.get('agent.output.finish.return_values', {})
        if isinstance(finish_values, dict) and 'output' in finish_values:
            data['agent_response'] = finish_values['output']
        elif attributes.get("agent_response"):
            data['agent_response'] = attributes.get("agent_response")
        else:
            print("agent_response is not available  " + span['span_id'])
        # Tool calls from agent actions
        tool_name = attributes.get('agent.output.action.tool')
        tool_input = attributes.get('agent.output.action.tool_input')
        
        if tool_name:
            tool_call = ToolCall(
                tool_name=tool_name,
                parameters={'input': tool_input} if tool_input else {},
                call_id=span['span_id']
            )
            data['tool_calls'] = [tool_call]
            data['parameters_passed'] = {'input': tool_input} if tool_input else {}
            
            # Handle retrieval query for langchain_retriever
            if tool_name == 'langchain_retriever' and tool_input:
                data['retrieval_query'] = [tool_input]
        
        # Agent exit status
        data['agent_exit'] = any(event.get('name') == 'agent_finish' for event in events)
        
        # Trace (dump events as JSON)
        if events:
            data['trace'] = json.dumps(events)
    
    elif span_type == 'llm':
        # Agent response from LLM output
        llm_input = attributes.get('llm.input.prompts', ['input is not available'])
        data['agent_task'] = llm_input[0]

        llm_responses = attributes.get('llm.output.response', [])
        if llm_responses:
            data['agent_response'] = llm_responses[0]
        else:
            print("llm_response is not available")
        # Parse tools from prompt
        prompts = attributes.get('llm.input.prompts', [])
        if prompts:
            try:
                tools = parse_tools_from_prompt(prompts[0])
                data['tools_available'] = tools
            except Exception:
                # Fallback to empty list if parsing fails
                data['tools_available'] = []
        
        data['parameters_passed'] = {}
    
    elif span_type == 'tool':
        # Agent response from tool output
        tool_output = attributes.get('tool.output.output')
        data['agent_task'] = f"This is a simple tool call, and the tool will execute as programmed. Its name is - {attributes.get('tool.name')}"
        if tool_output:
            data['agent_response'] = tool_output
        elif attributes.get("tool_response"):
            data['agent_response'] = attributes.get("tool_response")
        else:
            print("tool_output is not available " + span['span_id'])
        # Tool call results
        tool_name = attributes.get('tool.name')
        if tool_name and tool_output is not None:
            tool_result = ToolResult(
                call_id=span['span_id'],
                result=tool_output,
                success=span.get('status') == 'ok',
                error_message=None if span.get('status') == 'ok' else 'Tool execution failed'
            )
            data['tool_call_results'] = [tool_result]
            
            # Handle retrieved context for langchain_retriever
            if tool_name == 'langchain_retriever':
                data['retrieved_context'] = [[tool_output]]
        
        # Parameters from tool input
        tool_input_keys = [key for key in attributes.keys() if key.startswith('tool.input.')]
        tool_params = {}
        for key in tool_input_keys:
            param_name = key.replace('tool.input.', '')
            tool_params[param_name] = attributes[key]
        data['parameters_passed'] = tool_params
    else:
        global count_unknowns
        count_unknowns += 1
        print('Spans with unknown type: ' + str(count_unknowns))
    return AgentData(**data)

print("✅ Field mapping functions defined!")


✅ Field mapping functions defined!


## Step 3: Create AgentDataset from Spans


In [102]:
# Convert spans to AgentData objects
print("🔄 Converting spans to AgentData objects...")

agent_data_list = []
errors = []

for i, span in enumerate(spans_data):
    try:
        agent_data = map_span_to_agent_data(span)
        agent_data_list.append(agent_data)
    except Exception as e:
        errors.append(f"Span {i}: {str(e)}")
        if len(errors) <= 5:  # Show first 5 errors only
            print(f"⚠️  Error processing span {i}: {e}")
            print(span)

print(f"\n✅ Successfully converted {len(agent_data_list)} spans to AgentData")
if errors:
    print(f"❌ {len(errors)} spans had errors")

# Create AgentDataset
dataset = AgentDataset()
dataset.data = agent_data_list

print(f"📊 AgentDataset created with {len(dataset.data)} records")


🔄 Converting spans to AgentData objects...

✅ Successfully converted 2301 spans to AgentData
📊 AgentDataset created with 2301 records


## Step 4: Examine Sample Data


In [103]:
# Show statistics about the dataset
print("📈 Dataset Statistics:")

agent_types = {}
tool_usage = {}
with_responses = 0
with_tool_calls = 0
with_retrieval = 0

for data in dataset.data:
    # Agent types
    if data.agent_name:
        agent_types[data.agent_name] = agent_types.get(data.agent_name, 0) + 1
    
    # Responses
    if data.agent_response:
        with_responses += 1
    
    # Tool calls
    if data.tool_calls:
        with_tool_calls += 1
        for tool_call in data.tool_calls:
            if hasattr(tool_call, 'tool_name'):
                tool_usage[tool_call.tool_name] = tool_usage.get(tool_call.tool_name, 0) + 1
    
    # Retrieval
    if data.retrieval_query:
        with_retrieval += 1

print(f"\nAgent Types: {dict(agent_types)}")
print(f"Records with responses: {with_responses}")
print(f"Records with tool calls: {with_tool_calls}")
print(f"Records with retrieval: {with_retrieval}")
print(f"Tool usage: {dict(tool_usage)}")

# Show sample records
print("\n🔍 Sample AgentData records:")
for i, data in enumerate(dataset.data[:3]):
    print(f"\n--- Record {i+1} ({data.agent_name}) ---")
    print(f"Task: {data.agent_task[:100] if data.agent_task else 'None'}...")
    print(f"Response: {data.agent_response[:100] if data.agent_response else 'None'}...")
    print(f"Tool calls: {len(data.tool_calls) if data.tool_calls else 0}")
    print(f"Exit status: {data.exit_status}")


📈 Dataset Statistics:

Agent Types: {'tool': 2301}
Records with responses: 2301
Records with tool calls: 0
Records with retrieval: 0
Tool usage: {}

🔍 Sample AgentData records:

--- Record 1 (tool) ---
Task: This is a simple tool call, and the tool will execute as programmed. Its name is - tavily_search_res...
Response: HTTPError('432 Client Error:  for url: https://api.tavily.com/search')...
Tool calls: 0
Exit status: ok

--- Record 2 (tool) ---
Task: This is a simple tool call, and the tool will execute as programmed. Its name is - tavily_search_res...
Response: HTTPError('432 Client Error:  for url: https://api.tavily.com/search')...
Tool calls: 0
Exit status: ok

--- Record 3 (tool) ---
Task: This is a simple tool call, and the tool will execute as programmed. Its name is - tavily_search_res...
Response: HTTPError('432 Client Error:  for url: https://api.tavily.com/search')...
Tool calls: 0
Exit status: ok


## Step 5: Setup Gemini Model and Evaluator


In [104]:
import os

# Check for API key
if 'GEMINI_API_KEY' not in os.environ:
    print("⚠️  GEMINI_API_KEY environment variable not set!")
    print("Please set it before running evaluation:")
    print("export GEMINI_API_KEY='your-api-key-here'")
else:
    print("✅ GEMINI_API_KEY found in environment")

# Initialize Gemini model
try:
    gemini_model = GeminiModel(
        model_name="gemini-1.5-flash",  # Using flash model for cost efficiency
        temperature=0.1,  # Low temperature for consistent evaluation
        max_tokens=1024
    )
    print("✅ Gemini model initialized")
except Exception as e:
    print(f"❌ Error initializing Gemini model: {e}")
    gemini_model = None


✅ GEMINI_API_KEY found in environment
2025-09-24 18:31:28 - INFO - novaeval.models.base - Noveum tracing initialized successfully
✅ Gemini model initialized


In [105]:
# Initialize scoring functions for evaluation
scoring_functions = [
    task_progression_scorer,
    context_relevancy_scorer,
    role_adherence_scorer,
    tool_relevancy_scorer,
    parameter_correctness_scorer
]

print(f"✅ Initialized {len(scoring_functions)} scoring functions:")
for func in scoring_functions:
    print(f"  - {func.__name__}")

# Create AgentEvaluator
if gemini_model:
    evaluator = AgentEvaluator(
        agent_dataset=dataset,
        models=[gemini_model],
        scoring_functions=scoring_functions,
        output_dir="./demo_results",
        stream=False,
        include_reasoning=True
    )
    print("\n✅ AgentEvaluator created with Gemini model and scoring functions")
else:
    print("\n❌ Cannot create evaluator - Gemini model not available")


✅ Initialized 5 scoring functions:
  - task_progression_scorer
  - context_relevancy_scorer
  - role_adherence_scorer
  - tool_relevancy_scorer
  - parameter_correctness_scorer

✅ AgentEvaluator created with Gemini model and scoring functions


## Step 6: Run Evaluation (Sample)


In [106]:
# Run evaluation using the AgentEvaluator's run_all method
print("🚀 Running evaluation on sample data...")

if gemini_model and evaluator:
    try:
        # Create a smaller dataset for demo purposes
        sample_data = [data for data in dataset.data if data.agent_response][:25]
        print(f"\n📊 Evaluating {len(sample_data)} sample records...")
        
        # Create a temporary dataset with just the sample data
        sample_dataset = AgentDataset()
        sample_dataset.data = sample_data
        
        # Create a new evaluator with the sample dataset
        sample_evaluator = AgentEvaluator(
            agent_dataset=sample_dataset,
            models=[gemini_model],
            scoring_functions=scoring_functions,
            output_dir=f"./demo_results/{(os.path.splitext(os.path.basename(file_name))[0])}",
            stream=False,
            include_reasoning=True
        )
        
        # Run the evaluation
        sample_evaluator.run_all(save_every=1, file_type="csv")
        
        print("\n✅ Evaluation completed!")
        
        # Read and display results
        import pandas as pd
        results_file = "./demo_results/sample_evaluation/agent_evaluation_results.csv"
        
        if pd.io.common.file_exists(results_file):
            results_df = pd.read_csv(results_file)
            print(f"\n📊 Results Summary:")
            
            # Calculate averages for each scorer
            scorer_columns = [col for col in results_df.columns if col not in ['user_id', 'task_id', 'turn_id', 'agent_name'] and not col.endswith('_reasoning')]
            
            for col in scorer_columns:
                if results_df[col].dtype in ['float64', 'int64']:
                    avg_score = results_df[col].mean()
                    print(f"  - {col}: {avg_score:.2f}")
            
            # Show individual scores
            print(f"\n🔍 Individual Scores:")
            for i, row in results_df.iterrows():
                print(f"\n  Record {i+1} (Task: {row.get('task_id', 'N/A')}):")
                for col in scorer_columns:
                    if pd.notna(row[col]):
                        print(f"    - {col}: {row[col]}")
        else:
            print("❌ Results file not found")
        
    except Exception as e:
        print(f"❌ Error during evaluation: {e}")
        print(f"Error type: {type(e).__name__}")
        import traceback
        traceback.print_exc()
        
else:
    print("⚠️  Skipping evaluation - missing model or evaluator")


🚀 Running evaluation on sample data...

📊 Evaluating 25 sample records...
2025-09-24 18:31:28 - INFO - novaeval.evaluators.agent_evaluator - Starting agent evaluation process


Evaluating samples: 0it [00:00, ?it/s]

2025-09-24 18:31:28 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:30 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:30 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 82c85f1a-ac7e-4f01-a4c8-a3260921bfda) - 1 spans
2025-09-24 18:31:30 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 82c85f1a-ac7e-4f01-a4c8-a3260921bfda) - 1 spans
2025-09-24 18:31:30 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 82c85f1a-ac7e-4f01-a4c8-a3260921bfda
2025-09-24 18:31:30 - noveum_trace.transport.http_transport - INFO - ✅ Trace 82c85f1a-ac7e-4f01-a4c8-a3260921bfda successfully queued for export


2025-09-24 18:31:30 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:31 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:31 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: b5c746ba-c6ef-4be4-9755-08086ea6a5fd) - 1 spans
2025-09-24 18:31:31 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: b5c746ba-c6ef-4be4-9755-08086ea6a5fd) - 1 spans
2025-09-24 18:31:31 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace b5c746ba-c6ef-4be4-9755-08086ea6a5fd
2025-09-24 18:31:31 - noveum_trace.transport.http_transport - INFO - ✅ Trace b5c746ba-c6ef-4be4-9755-08086ea6a5fd successfully queued for export


2025-09-24 18:31:31 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:32 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:32 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 4777fb1e-1d3a-4cb3-9812-f9ae79e0f1d1) - 1 spans
2025-09-24 18:31:32 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 4777fb1e-1d3a-4cb3-9812-f9ae79e0f1d1) - 1 spans
2025-09-24 18:31:32 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 4777fb1e-1d3a-4cb3-9812-f9ae79e0f1d1
2025-09-24 18:31:32 - noveum_trace.transport.http_transport - INFO - ✅ Trace 4777fb1e-1d3a-4cb3-9812-f9ae79e0f1d1 successfully queued for export


2025-09-24 18:31:32 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 1 samples
2025-09-24 18:31:32 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 1it [00:03,  3.89s/it]

2025-09-24 18:31:32 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:33 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:33 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 7fa70895-8960-4621-a2db-7a09e817ad08) - 1 spans
2025-09-24 18:31:33 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 7fa70895-8960-4621-a2db-7a09e817ad08) - 1 spans
2025-09-24 18:31:33 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 7fa70895-8960-4621-a2db-7a09e817ad08
2025-09-24 18:31:33 - noveum_trace.transport.http_transport - INFO - ✅ Trace 7fa70895-8960-4621-a2db-7a09e817ad08 successfully queued for export


2025-09-24 18:31:33 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:35 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:35 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 470150bf-d627-45db-8d0c-94d2a1fe8593) - 1 spans
2025-09-24 18:31:35 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 470150bf-d627-45db-8d0c-94d2a1fe8593) - 1 spans
2025-09-24 18:31:35 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 470150bf-d627-45db-8d0c-94d2a1fe8593
2025-09-24 18:31:35 - noveum_trace.transport.http_transport - INFO - ✅ Trace 470150bf-d627-45db-8d0c-94d2a1fe8593 successfully queued for export


2025-09-24 18:31:35 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:36 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:36 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: e13aee99-f1ce-49cc-b182-4829948ad6ad) - 1 spans
2025-09-24 18:31:36 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: e13aee99-f1ce-49cc-b182-4829948ad6ad) - 1 spans
2025-09-24 18:31:36 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace e13aee99-f1ce-49cc-b182-4829948ad6ad
2025-09-24 18:31:36 - noveum_trace.transport.http_transport - INFO - ✅ Trace e13aee99-f1ce-49cc-b182-4829948ad6ad successfully queued for export


2025-09-24 18:31:36 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 2 samples
2025-09-24 18:31:36 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 2it [00:07,  3.71s/it]

2025-09-24 18:31:36 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:37 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:37 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: fdf1ef40-e31f-4b37-bde0-ce59c18a754b) - 1 spans
2025-09-24 18:31:37 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: fdf1ef40-e31f-4b37-bde0-ce59c18a754b) - 1 spans
2025-09-24 18:31:37 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace fdf1ef40-e31f-4b37-bde0-ce59c18a754b
2025-09-24 18:31:37 - noveum_trace.transport.http_transport - INFO - ✅ Trace fdf1ef40-e31f-4b37-bde0-ce59c18a754b successfully queued for export


2025-09-24 18:31:37 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:38 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:38 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: c3325bd7-9dd8-44c1-aacd-6e5cdcb5099b) - 1 spans
2025-09-24 18:31:38 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: c3325bd7-9dd8-44c1-aacd-6e5cdcb5099b) - 1 spans
2025-09-24 18:31:38 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace c3325bd7-9dd8-44c1-aacd-6e5cdcb5099b
2025-09-24 18:31:38 - noveum_trace.transport.http_transport - INFO - ✅ Trace c3325bd7-9dd8-44c1-aacd-6e5cdcb5099b successfully queued for export


2025-09-24 18:31:38 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:39 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:39 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: bd3b0607-d971-4de5-adef-4c4ef312623c) - 1 spans
2025-09-24 18:31:39 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: bd3b0607-d971-4de5-adef-4c4ef312623c) - 1 spans
2025-09-24 18:31:39 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace bd3b0607-d971-4de5-adef-4c4ef312623c
2025-09-24 18:31:39 - noveum_trace.transport.http_transport - INFO - ✅ Trace bd3b0607-d971-4de5-adef-4c4ef312623c successfully queued for export


2025-09-24 18:31:39 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 3 samples
2025-09-24 18:31:39 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 3it [00:11,  3.71s/it]

2025-09-24 18:31:39 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:41 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:41 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: af008822-05f4-452f-9ac4-064eeccdde26) - 1 spans
2025-09-24 18:31:41 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: af008822-05f4-452f-9ac4-064eeccdde26) - 1 spans
2025-09-24 18:31:41 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace af008822-05f4-452f-9ac4-064eeccdde26
2025-09-24 18:31:41 - noveum_trace.transport.http_transport - INFO - ✅ Trace af008822-05f4-452f-9ac4-064eeccdde26 successfully queued for export


2025-09-24 18:31:41 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:42 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:42 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: b3f61c76-2c3a-483f-8e45-681712a7010d) - 1 spans
2025-09-24 18:31:42 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: b3f61c76-2c3a-483f-8e45-681712a7010d) - 1 spans
2025-09-24 18:31:42 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace b3f61c76-2c3a-483f-8e45-681712a7010d
2025-09-24 18:31:42 - noveum_trace.transport.http_transport - INFO - ✅ Trace b3f61c76-2c3a-483f-8e45-681712a7010d successfully queued for export


2025-09-24 18:31:42 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:43 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:43 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: c996486a-cef9-4b86-9feb-fa2c449710f3) - 1 spans
2025-09-24 18:31:43 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: c996486a-cef9-4b86-9feb-fa2c449710f3) - 1 spans
2025-09-24 18:31:43 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace c996486a-cef9-4b86-9feb-fa2c449710f3
2025-09-24 18:31:43 - noveum_trace.transport.http_transport - INFO - ✅ Trace c996486a-cef9-4b86-9feb-fa2c449710f3 successfully queued for export


2025-09-24 18:31:43 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 4 samples
2025-09-24 18:31:43 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 4it [00:15,  3.82s/it]

2025-09-24 18:31:43 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:44 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:44 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 30cd3863-e67d-422e-b16f-2342a7383825) - 1 spans
2025-09-24 18:31:44 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 30cd3863-e67d-422e-b16f-2342a7383825) - 1 spans
2025-09-24 18:31:44 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 30cd3863-e67d-422e-b16f-2342a7383825
2025-09-24 18:31:44 - noveum_trace.transport.http_transport - INFO - ✅ Trace 30cd3863-e67d-422e-b16f-2342a7383825 successfully queued for export


2025-09-24 18:31:44 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:46 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:46 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 64aab416-9386-4c20-b97e-6a8ccac0e9e4) - 1 spans
2025-09-24 18:31:46 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 64aab416-9386-4c20-b97e-6a8ccac0e9e4) - 1 spans
2025-09-24 18:31:46 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 64aab416-9386-4c20-b97e-6a8ccac0e9e4
2025-09-24 18:31:46 - noveum_trace.transport.http_transport - INFO - ✅ Trace 64aab416-9386-4c20-b97e-6a8ccac0e9e4 successfully queued for export


2025-09-24 18:31:46 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:47 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:47 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: b67f8cf9-102c-422b-aff1-b785509dfe7e) - 1 spans
2025-09-24 18:31:47 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: b67f8cf9-102c-422b-aff1-b785509dfe7e) - 1 spans
2025-09-24 18:31:47 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace b67f8cf9-102c-422b-aff1-b785509dfe7e
2025-09-24 18:31:47 - noveum_trace.transport.http_transport - INFO - ✅ Trace b67f8cf9-102c-422b-aff1-b785509dfe7e successfully queued for export


2025-09-24 18:31:47 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 5 samples
2025-09-24 18:31:47 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 5it [00:18,  3.77s/it]

2025-09-24 18:31:47 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:48 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:48 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: ff873622-fd07-4fcd-ae6d-60ba559bf44e) - 1 spans
2025-09-24 18:31:48 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: ff873622-fd07-4fcd-ae6d-60ba559bf44e) - 1 spans
2025-09-24 18:31:48 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace ff873622-fd07-4fcd-ae6d-60ba559bf44e
2025-09-24 18:31:48 - noveum_trace.transport.http_transport - INFO - ✅ Trace ff873622-fd07-4fcd-ae6d-60ba559bf44e successfully queued for export


2025-09-24 18:31:48 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:50 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:50 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: cf93e068-f921-48cc-856b-ffcef63852b0) - 1 spans
2025-09-24 18:31:50 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: cf93e068-f921-48cc-856b-ffcef63852b0) - 1 spans
2025-09-24 18:31:50 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace cf93e068-f921-48cc-856b-ffcef63852b0
2025-09-24 18:31:50 - noveum_trace.transport.http_transport - INFO - ✅ Trace cf93e068-f921-48cc-856b-ffcef63852b0 successfully queued for export


2025-09-24 18:31:50 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:51 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:51 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 8509c67f-995f-4c09-9299-2959f7967e92) - 1 spans
2025-09-24 18:31:51 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 8509c67f-995f-4c09-9299-2959f7967e92) - 1 spans
2025-09-24 18:31:51 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 8509c67f-995f-4c09-9299-2959f7967e92
2025-09-24 18:31:51 - noveum_trace.transport.http_transport - INFO - ✅ Trace 8509c67f-995f-4c09-9299-2959f7967e92 successfully queued for export


2025-09-24 18:31:51 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 6 samples
2025-09-24 18:31:51 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 6it [00:22,  3.67s/it]

2025-09-24 18:31:51 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:52 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:52 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 68cba948-5b22-4647-b226-9238677e01f7) - 1 spans
2025-09-24 18:31:52 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 68cba948-5b22-4647-b226-9238677e01f7) - 1 spans
2025-09-24 18:31:52 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 68cba948-5b22-4647-b226-9238677e01f7
2025-09-24 18:31:52 - noveum_trace.transport.http_transport - INFO - ✅ Trace 68cba948-5b22-4647-b226-9238677e01f7 successfully queued for export


2025-09-24 18:31:52 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:53 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:53 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 8d56cfda-e1ca-495c-8a5f-74a95112f2cb) - 1 spans
2025-09-24 18:31:53 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 8d56cfda-e1ca-495c-8a5f-74a95112f2cb) - 1 spans
2025-09-24 18:31:53 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 8d56cfda-e1ca-495c-8a5f-74a95112f2cb
2025-09-24 18:31:53 - noveum_trace.transport.http_transport - INFO - ✅ Trace 8d56cfda-e1ca-495c-8a5f-74a95112f2cb successfully queued for export


2025-09-24 18:31:53 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:54 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:54 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: b2c756cb-36c9-41b3-ad8b-7c623e98ddd2) - 1 spans
2025-09-24 18:31:54 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: b2c756cb-36c9-41b3-ad8b-7c623e98ddd2) - 1 spans
2025-09-24 18:31:54 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace b2c756cb-36c9-41b3-ad8b-7c623e98ddd2
2025-09-24 18:31:54 - noveum_trace.transport.http_transport - INFO - ✅ Trace b2c756cb-36c9-41b3-ad8b-7c623e98ddd2 successfully queued for export


2025-09-24 18:31:54 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 7 samples
2025-09-24 18:31:54 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 7it [00:26,  3.68s/it]

2025-09-24 18:31:54 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:55 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:55 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: dd5b177c-648e-4ff3-a5c1-35b73b9de2bc) - 1 spans
2025-09-24 18:31:55 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: dd5b177c-648e-4ff3-a5c1-35b73b9de2bc) - 1 spans
2025-09-24 18:31:55 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace dd5b177c-648e-4ff3-a5c1-35b73b9de2bc
2025-09-24 18:31:55 - noveum_trace.transport.http_transport - INFO - ✅ Trace dd5b177c-648e-4ff3-a5c1-35b73b9de2bc successfully queued for export


2025-09-24 18:31:55 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:57 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:57 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: bcd89886-4526-418c-96ea-40aab8d6d3e8) - 1 spans
2025-09-24 18:31:57 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: bcd89886-4526-418c-96ea-40aab8d6d3e8) - 1 spans
2025-09-24 18:31:57 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace bcd89886-4526-418c-96ea-40aab8d6d3e8
2025-09-24 18:31:57 - noveum_trace.transport.http_transport - INFO - ✅ Trace bcd89886-4526-418c-96ea-40aab8d6d3e8 successfully queued for export


2025-09-24 18:31:57 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:58 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:58 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 919d7430-bed2-4b98-9906-fc4cf1b7c5f2) - 1 spans
2025-09-24 18:31:58 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 919d7430-bed2-4b98-9906-fc4cf1b7c5f2) - 1 spans
2025-09-24 18:31:58 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 919d7430-bed2-4b98-9906-fc4cf1b7c5f2
2025-09-24 18:31:58 - noveum_trace.transport.http_transport - INFO - ✅ Trace 919d7430-bed2-4b98-9906-fc4cf1b7c5f2 successfully queued for export


2025-09-24 18:31:58 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 8 samples
2025-09-24 18:31:58 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 8it [00:29,  3.68s/it]

2025-09-24 18:31:58 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:31:59 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:31:59 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 1a09571d-112f-4e4e-9ffb-114cf1d550b4) - 1 spans
2025-09-24 18:31:59 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 1a09571d-112f-4e4e-9ffb-114cf1d550b4) - 1 spans
2025-09-24 18:31:59 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 1a09571d-112f-4e4e-9ffb-114cf1d550b4
2025-09-24 18:31:59 - noveum_trace.transport.http_transport - INFO - ✅ Trace 1a09571d-112f-4e4e-9ffb-114cf1d550b4 successfully queued for export


2025-09-24 18:31:59 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:00 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:00 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: a8298df8-9056-4646-9454-483601695c8d) - 1 spans
2025-09-24 18:32:00 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: a8298df8-9056-4646-9454-483601695c8d) - 1 spans
2025-09-24 18:32:00 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace a8298df8-9056-4646-9454-483601695c8d
2025-09-24 18:32:00 - noveum_trace.transport.http_transport - INFO - ✅ Trace a8298df8-9056-4646-9454-483601695c8d successfully queued for export


2025-09-24 18:32:00 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:02 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:02 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 0330d18a-49c2-46ca-ab0f-84ede5afc73d) - 1 spans
2025-09-24 18:32:02 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 0330d18a-49c2-46ca-ab0f-84ede5afc73d) - 1 spans
2025-09-24 18:32:02 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 0330d18a-49c2-46ca-ab0f-84ede5afc73d
2025-09-24 18:32:02 - noveum_trace.transport.http_transport - INFO - ✅ Trace 0330d18a-49c2-46ca-ab0f-84ede5afc73d successfully queued for export


2025-09-24 18:32:02 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 9 samples
2025-09-24 18:32:02 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 9it [00:33,  3.84s/it]

2025-09-24 18:32:02 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:03 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:03 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: bd6d731c-9d25-4bd2-bf79-0494b82f4604) - 1 spans
2025-09-24 18:32:03 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: bd6d731c-9d25-4bd2-bf79-0494b82f4604) - 1 spans
2025-09-24 18:32:03 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace bd6d731c-9d25-4bd2-bf79-0494b82f4604
2025-09-24 18:32:03 - noveum_trace.transport.http_transport - INFO - ✅ Trace bd6d731c-9d25-4bd2-bf79-0494b82f4604 successfully queued for export


2025-09-24 18:32:03 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:05 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:05 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: ac509ac2-d353-4e8f-8d9e-ba0e7d927c46) - 1 spans
2025-09-24 18:32:05 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: ac509ac2-d353-4e8f-8d9e-ba0e7d927c46) - 1 spans
2025-09-24 18:32:05 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace ac509ac2-d353-4e8f-8d9e-ba0e7d927c46
2025-09-24 18:32:05 - noveum_trace.transport.http_transport - INFO - ✅ Trace ac509ac2-d353-4e8f-8d9e-ba0e7d927c46 successfully queued for export


2025-09-24 18:32:05 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:06 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:06 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 2839fce9-e6f0-426c-b198-a14fe9a75fae) - 1 spans
2025-09-24 18:32:06 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 2839fce9-e6f0-426c-b198-a14fe9a75fae) - 1 spans
2025-09-24 18:32:06 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 2839fce9-e6f0-426c-b198-a14fe9a75fae
2025-09-24 18:32:06 - noveum_trace.transport.http_transport - INFO - ✅ Trace 2839fce9-e6f0-426c-b198-a14fe9a75fae successfully queued for export


2025-09-24 18:32:06 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 10 samples
2025-09-24 18:32:06 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 10it [00:37,  3.76s/it]

2025-09-24 18:32:06 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:07 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:07 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 90a0acc7-37ac-4630-9521-16dd0f5fcde2) - 1 spans
2025-09-24 18:32:07 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 90a0acc7-37ac-4630-9521-16dd0f5fcde2) - 1 spans
2025-09-24 18:32:07 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 90a0acc7-37ac-4630-9521-16dd0f5fcde2
2025-09-24 18:32:07 - noveum_trace.transport.http_transport - INFO - ✅ Trace 90a0acc7-37ac-4630-9521-16dd0f5fcde2 successfully queued for export


2025-09-24 18:32:07 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:08 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:08 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 3b92ca95-c495-477c-8719-621ed46836fe) - 1 spans
2025-09-24 18:32:08 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 3b92ca95-c495-477c-8719-621ed46836fe) - 1 spans
2025-09-24 18:32:08 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 3b92ca95-c495-477c-8719-621ed46836fe
2025-09-24 18:32:08 - noveum_trace.transport.http_transport - INFO - ✅ Trace 3b92ca95-c495-477c-8719-621ed46836fe successfully queued for export


2025-09-24 18:32:08 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:10 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:10 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: eb883a10-2cdf-47f2-b9f0-0a294c912e62) - 1 spans
2025-09-24 18:32:10 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: eb883a10-2cdf-47f2-b9f0-0a294c912e62) - 1 spans
2025-09-24 18:32:10 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace eb883a10-2cdf-47f2-b9f0-0a294c912e62
2025-09-24 18:32:10 - noveum_trace.transport.http_transport - INFO - ✅ Trace eb883a10-2cdf-47f2-b9f0-0a294c912e62 successfully queued for export


2025-09-24 18:32:10 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 11 samples
2025-09-24 18:32:10 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 11it [00:42,  4.02s/it]

2025-09-24 18:32:10 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:12 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:12 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: e6a89586-d6ac-4cc6-9244-4eb0f77c5db3) - 1 spans
2025-09-24 18:32:12 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: e6a89586-d6ac-4cc6-9244-4eb0f77c5db3) - 1 spans
2025-09-24 18:32:12 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace e6a89586-d6ac-4cc6-9244-4eb0f77c5db3
2025-09-24 18:32:12 - noveum_trace.transport.http_transport - INFO - ✅ Trace e6a89586-d6ac-4cc6-9244-4eb0f77c5db3 successfully queued for export


2025-09-24 18:32:12 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:13 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:13 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: c61fe1d7-437f-489a-8e83-b5de5c9155d0) - 1 spans
2025-09-24 18:32:13 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: c61fe1d7-437f-489a-8e83-b5de5c9155d0) - 1 spans
2025-09-24 18:32:13 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace c61fe1d7-437f-489a-8e83-b5de5c9155d0
2025-09-24 18:32:13 - noveum_trace.transport.http_transport - INFO - ✅ Trace c61fe1d7-437f-489a-8e83-b5de5c9155d0 successfully queued for export


2025-09-24 18:32:13 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:14 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:14 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: e24c92f5-8e7c-44a9-a8f6-496a28f4081b) - 1 spans
2025-09-24 18:32:14 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: e24c92f5-8e7c-44a9-a8f6-496a28f4081b) - 1 spans
2025-09-24 18:32:14 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace e24c92f5-8e7c-44a9-a8f6-496a28f4081b
2025-09-24 18:32:14 - noveum_trace.transport.http_transport - INFO - ✅ Trace e24c92f5-8e7c-44a9-a8f6-496a28f4081b successfully queued for export


2025-09-24 18:32:14 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 12 samples
2025-09-24 18:32:14 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 12it [00:45,  3.92s/it]

2025-09-24 18:32:14 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:15 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:15 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 23d01af7-a492-45d7-9705-70d6092fedb1) - 1 spans
2025-09-24 18:32:15 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 23d01af7-a492-45d7-9705-70d6092fedb1) - 1 spans
2025-09-24 18:32:15 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 23d01af7-a492-45d7-9705-70d6092fedb1
2025-09-24 18:32:15 - noveum_trace.transport.http_transport - INFO - ✅ Trace 23d01af7-a492-45d7-9705-70d6092fedb1 successfully queued for export


2025-09-24 18:32:15 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:16 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:16 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: de0d5f3a-c48c-45f3-a4a6-9e89ba3b0690) - 1 spans
2025-09-24 18:32:16 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: de0d5f3a-c48c-45f3-a4a6-9e89ba3b0690) - 1 spans
2025-09-24 18:32:16 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace de0d5f3a-c48c-45f3-a4a6-9e89ba3b0690
2025-09-24 18:32:16 - noveum_trace.transport.http_transport - INFO - ✅ Trace de0d5f3a-c48c-45f3-a4a6-9e89ba3b0690 successfully queued for export


2025-09-24 18:32:16 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:18 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:18 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 3c411eb8-b7da-454d-8dbd-bcb17f7cb4dc) - 1 spans
2025-09-24 18:32:18 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 3c411eb8-b7da-454d-8dbd-bcb17f7cb4dc) - 1 spans
2025-09-24 18:32:18 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 3c411eb8-b7da-454d-8dbd-bcb17f7cb4dc
2025-09-24 18:32:18 - noveum_trace.transport.http_transport - INFO - ✅ Trace 3c411eb8-b7da-454d-8dbd-bcb17f7cb4dc successfully queued for export


2025-09-24 18:32:18 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 13 samples
2025-09-24 18:32:18 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 13it [00:49,  3.88s/it]

2025-09-24 18:32:18 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:19 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:19 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 5309acba-b930-4066-8339-1c6df69f2b1d) - 1 spans
2025-09-24 18:32:19 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 5309acba-b930-4066-8339-1c6df69f2b1d) - 1 spans
2025-09-24 18:32:19 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 5309acba-b930-4066-8339-1c6df69f2b1d
2025-09-24 18:32:19 - noveum_trace.transport.http_transport - INFO - ✅ Trace 5309acba-b930-4066-8339-1c6df69f2b1d successfully queued for export


2025-09-24 18:32:19 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:20 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:20 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: fbf4665b-5ce8-4064-87c2-70590cc677eb) - 1 spans
2025-09-24 18:32:20 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: fbf4665b-5ce8-4064-87c2-70590cc677eb) - 1 spans
2025-09-24 18:32:20 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace fbf4665b-5ce8-4064-87c2-70590cc677eb
2025-09-24 18:32:20 - noveum_trace.transport.http_transport - INFO - ✅ Trace fbf4665b-5ce8-4064-87c2-70590cc677eb successfully queued for export


2025-09-24 18:32:20 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:22 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:22 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 5d78943e-3c81-4bd4-90cf-93007719396d) - 1 spans
2025-09-24 18:32:22 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 5d78943e-3c81-4bd4-90cf-93007719396d) - 1 spans
2025-09-24 18:32:22 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 5d78943e-3c81-4bd4-90cf-93007719396d
2025-09-24 18:32:22 - noveum_trace.transport.http_transport - INFO - ✅ Trace 5d78943e-3c81-4bd4-90cf-93007719396d successfully queued for export


2025-09-24 18:32:22 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 14 samples
2025-09-24 18:32:22 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 14it [00:53,  3.82s/it]

2025-09-24 18:32:22 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:23 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:23 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 0ed24183-0b8e-47cb-ae9f-4ac31e5bfdae) - 1 spans
2025-09-24 18:32:23 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 0ed24183-0b8e-47cb-ae9f-4ac31e5bfdae) - 1 spans
2025-09-24 18:32:23 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 0ed24183-0b8e-47cb-ae9f-4ac31e5bfdae
2025-09-24 18:32:23 - noveum_trace.transport.http_transport - INFO - ✅ Trace 0ed24183-0b8e-47cb-ae9f-4ac31e5bfdae successfully queued for export


2025-09-24 18:32:23 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:24 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:24 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 98cea8be-d6ef-40d9-8803-3154e83a690a) - 1 spans
2025-09-24 18:32:24 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 98cea8be-d6ef-40d9-8803-3154e83a690a) - 1 spans
2025-09-24 18:32:24 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 98cea8be-d6ef-40d9-8803-3154e83a690a
2025-09-24 18:32:24 - noveum_trace.transport.http_transport - INFO - ✅ Trace 98cea8be-d6ef-40d9-8803-3154e83a690a successfully queued for export


2025-09-24 18:32:24 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:25 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:25 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: a286ad4a-088e-40c6-b261-5a938b598ea9) - 1 spans
2025-09-24 18:32:25 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: a286ad4a-088e-40c6-b261-5a938b598ea9) - 1 spans
2025-09-24 18:32:25 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace a286ad4a-088e-40c6-b261-5a938b598ea9
2025-09-24 18:32:25 - noveum_trace.transport.http_transport - INFO - ✅ Trace a286ad4a-088e-40c6-b261-5a938b598ea9 successfully queued for export


2025-09-24 18:32:25 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 15 samples
2025-09-24 18:32:25 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 15it [00:56,  3.77s/it]

2025-09-24 18:32:25 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:26 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:26 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: d7b26d83-4975-4629-9067-a1264bd62988) - 1 spans
2025-09-24 18:32:26 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: d7b26d83-4975-4629-9067-a1264bd62988) - 1 spans
2025-09-24 18:32:26 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace d7b26d83-4975-4629-9067-a1264bd62988
2025-09-24 18:32:26 - noveum_trace.transport.http_transport - INFO - ✅ Trace d7b26d83-4975-4629-9067-a1264bd62988 successfully queued for export


2025-09-24 18:32:26 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:28 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:28 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: bf313fed-81e0-4485-8fed-f4a06a62e75b) - 1 spans
2025-09-24 18:32:28 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: bf313fed-81e0-4485-8fed-f4a06a62e75b) - 1 spans
2025-09-24 18:32:28 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace bf313fed-81e0-4485-8fed-f4a06a62e75b
2025-09-24 18:32:28 - noveum_trace.transport.http_transport - INFO - ✅ Trace bf313fed-81e0-4485-8fed-f4a06a62e75b successfully queued for export


2025-09-24 18:32:28 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:29 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:29 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 06aa92de-d5bc-4464-906a-351fa3895c4a) - 1 spans
2025-09-24 18:32:29 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 06aa92de-d5bc-4464-906a-351fa3895c4a) - 1 spans
2025-09-24 18:32:29 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 06aa92de-d5bc-4464-906a-351fa3895c4a
2025-09-24 18:32:29 - noveum_trace.transport.http_transport - INFO - ✅ Trace 06aa92de-d5bc-4464-906a-351fa3895c4a successfully queued for export


2025-09-24 18:32:29 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 16 samples
2025-09-24 18:32:29 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 16it [01:00,  3.75s/it]

2025-09-24 18:32:29 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:30 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:30 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 080ec7b6-e32c-43ad-bc47-99a978558ca8) - 1 spans
2025-09-24 18:32:30 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 080ec7b6-e32c-43ad-bc47-99a978558ca8) - 1 spans
2025-09-24 18:32:30 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 080ec7b6-e32c-43ad-bc47-99a978558ca8
2025-09-24 18:32:30 - noveum_trace.transport.http_transport - INFO - ✅ Trace 080ec7b6-e32c-43ad-bc47-99a978558ca8 successfully queued for export


2025-09-24 18:32:30 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:32 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:32 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 558b6a8f-643c-44c4-acde-6fe2f05a85bf) - 1 spans
2025-09-24 18:32:32 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 558b6a8f-643c-44c4-acde-6fe2f05a85bf) - 1 spans
2025-09-24 18:32:32 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 558b6a8f-643c-44c4-acde-6fe2f05a85bf
2025-09-24 18:32:32 - noveum_trace.transport.http_transport - INFO - ✅ Trace 558b6a8f-643c-44c4-acde-6fe2f05a85bf successfully queued for export


2025-09-24 18:32:32 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:33 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:33 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 85beb87d-b78f-4e28-aec7-38f5f7fc43a2) - 1 spans
2025-09-24 18:32:33 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 85beb87d-b78f-4e28-aec7-38f5f7fc43a2) - 1 spans
2025-09-24 18:32:33 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 85beb87d-b78f-4e28-aec7-38f5f7fc43a2
2025-09-24 18:32:33 - noveum_trace.transport.http_transport - INFO - ✅ Trace 85beb87d-b78f-4e28-aec7-38f5f7fc43a2 successfully queued for export


2025-09-24 18:32:33 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 17 samples
2025-09-24 18:32:33 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 17it [01:04,  3.79s/it]

2025-09-24 18:32:33 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:34 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:34 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 0ac6d2c0-7fe4-4ebb-b459-1bbc4b16c254) - 1 spans
2025-09-24 18:32:34 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 0ac6d2c0-7fe4-4ebb-b459-1bbc4b16c254) - 1 spans
2025-09-24 18:32:34 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 0ac6d2c0-7fe4-4ebb-b459-1bbc4b16c254
2025-09-24 18:32:34 - noveum_trace.transport.http_transport - INFO - ✅ Trace 0ac6d2c0-7fe4-4ebb-b459-1bbc4b16c254 successfully queued for export


2025-09-24 18:32:34 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:35 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:35 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 45ef8ae6-5bac-4af6-b3fa-adb7a8d5779e) - 1 spans
2025-09-24 18:32:35 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 45ef8ae6-5bac-4af6-b3fa-adb7a8d5779e) - 1 spans
2025-09-24 18:32:35 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 45ef8ae6-5bac-4af6-b3fa-adb7a8d5779e
2025-09-24 18:32:35 - noveum_trace.transport.http_transport - INFO - ✅ Trace 45ef8ae6-5bac-4af6-b3fa-adb7a8d5779e successfully queued for export


2025-09-24 18:32:35 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:36 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:36 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: dc8fce8a-cbb9-4dd6-8bd3-e92be7c3ac77) - 1 spans
2025-09-24 18:32:36 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: dc8fce8a-cbb9-4dd6-8bd3-e92be7c3ac77) - 1 spans
2025-09-24 18:32:36 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace dc8fce8a-cbb9-4dd6-8bd3-e92be7c3ac77
2025-09-24 18:32:36 - noveum_trace.transport.http_transport - INFO - ✅ Trace dc8fce8a-cbb9-4dd6-8bd3-e92be7c3ac77 successfully queued for export


2025-09-24 18:32:36 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 18 samples
2025-09-24 18:32:36 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 18it [01:08,  3.77s/it]

2025-09-24 18:32:36 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:38 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:38 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: e0e05016-3785-4c15-ba53-a189fab4c0fc) - 1 spans
2025-09-24 18:32:38 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: e0e05016-3785-4c15-ba53-a189fab4c0fc) - 1 spans
2025-09-24 18:32:38 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace e0e05016-3785-4c15-ba53-a189fab4c0fc
2025-09-24 18:32:38 - noveum_trace.transport.http_transport - INFO - ✅ Trace e0e05016-3785-4c15-ba53-a189fab4c0fc successfully queued for export


2025-09-24 18:32:38 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:39 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:39 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 97cee31f-c62e-4dac-ad88-442d8084f822) - 1 spans
2025-09-24 18:32:39 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 97cee31f-c62e-4dac-ad88-442d8084f822) - 1 spans
2025-09-24 18:32:39 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 97cee31f-c62e-4dac-ad88-442d8084f822
2025-09-24 18:32:39 - noveum_trace.transport.http_transport - INFO - ✅ Trace 97cee31f-c62e-4dac-ad88-442d8084f822 successfully queued for export


2025-09-24 18:32:39 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:40 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:40 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: ee87ea90-0e31-40f2-8ccd-724585266f6f) - 1 spans
2025-09-24 18:32:40 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: ee87ea90-0e31-40f2-8ccd-724585266f6f) - 1 spans
2025-09-24 18:32:40 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace ee87ea90-0e31-40f2-8ccd-724585266f6f
2025-09-24 18:32:40 - noveum_trace.transport.http_transport - INFO - ✅ Trace ee87ea90-0e31-40f2-8ccd-724585266f6f successfully queued for export


2025-09-24 18:32:40 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 19 samples
2025-09-24 18:32:40 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 19it [01:12,  3.77s/it]

2025-09-24 18:32:40 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:41 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:41 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 962fa416-1f84-4f8c-be48-c9344f9b9fc3) - 1 spans
2025-09-24 18:32:41 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 962fa416-1f84-4f8c-be48-c9344f9b9fc3) - 1 spans
2025-09-24 18:32:41 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 962fa416-1f84-4f8c-be48-c9344f9b9fc3
2025-09-24 18:32:41 - noveum_trace.transport.http_transport - INFO - ✅ Trace 962fa416-1f84-4f8c-be48-c9344f9b9fc3 successfully queued for export


2025-09-24 18:32:41 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:43 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:43 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: e0260d17-5d0c-4c0c-b2ac-e3f0d1621508) - 1 spans
2025-09-24 18:32:43 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: e0260d17-5d0c-4c0c-b2ac-e3f0d1621508) - 1 spans
2025-09-24 18:32:43 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace e0260d17-5d0c-4c0c-b2ac-e3f0d1621508
2025-09-24 18:32:43 - noveum_trace.transport.http_transport - INFO - ✅ Trace e0260d17-5d0c-4c0c-b2ac-e3f0d1621508 successfully queued for export


2025-09-24 18:32:43 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:44 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:44 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 57514081-620c-4111-9c59-f8ff08761397) - 1 spans
2025-09-24 18:32:44 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 57514081-620c-4111-9c59-f8ff08761397) - 1 spans
2025-09-24 18:32:44 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 57514081-620c-4111-9c59-f8ff08761397
2025-09-24 18:32:44 - noveum_trace.transport.http_transport - INFO - ✅ Trace 57514081-620c-4111-9c59-f8ff08761397 successfully queued for export


2025-09-24 18:32:44 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 20 samples
2025-09-24 18:32:44 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 20it [01:15,  3.77s/it]

2025-09-24 18:32:44 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:45 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:45 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: e97e3130-2204-4a8c-a4c7-89999d60f31d) - 1 spans
2025-09-24 18:32:45 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: e97e3130-2204-4a8c-a4c7-89999d60f31d) - 1 spans
2025-09-24 18:32:45 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace e97e3130-2204-4a8c-a4c7-89999d60f31d
2025-09-24 18:32:45 - noveum_trace.transport.http_transport - INFO - ✅ Trace e97e3130-2204-4a8c-a4c7-89999d60f31d successfully queued for export


2025-09-24 18:32:45 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:47 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:47 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 85ee9097-3423-47cd-a26c-6f651149b7e2) - 1 spans
2025-09-24 18:32:47 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 85ee9097-3423-47cd-a26c-6f651149b7e2) - 1 spans
2025-09-24 18:32:47 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 85ee9097-3423-47cd-a26c-6f651149b7e2
2025-09-24 18:32:47 - noveum_trace.transport.http_transport - INFO - ✅ Trace 85ee9097-3423-47cd-a26c-6f651149b7e2 successfully queued for export


2025-09-24 18:32:47 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:48 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:48 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: f4978a1b-4853-4d36-82d7-0fb2e7477e47) - 1 spans
2025-09-24 18:32:48 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: f4978a1b-4853-4d36-82d7-0fb2e7477e47) - 1 spans
2025-09-24 18:32:48 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace f4978a1b-4853-4d36-82d7-0fb2e7477e47
2025-09-24 18:32:48 - noveum_trace.transport.http_transport - INFO - ✅ Trace f4978a1b-4853-4d36-82d7-0fb2e7477e47 successfully queued for export


2025-09-24 18:32:48 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 21 samples
2025-09-24 18:32:48 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 21it [01:19,  3.81s/it]

2025-09-24 18:32:48 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:49 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:49 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: d3968a0e-3198-4180-ab30-19c65fc250f5) - 1 spans
2025-09-24 18:32:49 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: d3968a0e-3198-4180-ab30-19c65fc250f5) - 1 spans
2025-09-24 18:32:49 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace d3968a0e-3198-4180-ab30-19c65fc250f5
2025-09-24 18:32:49 - noveum_trace.transport.http_transport - INFO - ✅ Trace d3968a0e-3198-4180-ab30-19c65fc250f5 successfully queued for export


2025-09-24 18:32:49 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:50 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:50 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 7ad04fb9-a455-4187-b4d3-75cf631247e5) - 1 spans
2025-09-24 18:32:50 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 7ad04fb9-a455-4187-b4d3-75cf631247e5) - 1 spans
2025-09-24 18:32:50 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 7ad04fb9-a455-4187-b4d3-75cf631247e5
2025-09-24 18:32:50 - noveum_trace.transport.http_transport - INFO - ✅ Trace 7ad04fb9-a455-4187-b4d3-75cf631247e5 successfully queued for export


2025-09-24 18:32:50 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:52 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:52 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 929dd17d-92b1-42e0-b58e-a6233c243839) - 1 spans
2025-09-24 18:32:52 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 929dd17d-92b1-42e0-b58e-a6233c243839) - 1 spans
2025-09-24 18:32:52 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 929dd17d-92b1-42e0-b58e-a6233c243839
2025-09-24 18:32:52 - noveum_trace.transport.http_transport - INFO - ✅ Trace 929dd17d-92b1-42e0-b58e-a6233c243839 successfully queued for export


2025-09-24 18:32:52 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 22 samples
2025-09-24 18:32:52 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 22it [01:23,  3.77s/it]

2025-09-24 18:32:52 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:53 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:53 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 4ab3c81c-c834-48df-a663-71542b274394) - 1 spans
2025-09-24 18:32:53 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 4ab3c81c-c834-48df-a663-71542b274394) - 1 spans
2025-09-24 18:32:53 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 4ab3c81c-c834-48df-a663-71542b274394
2025-09-24 18:32:53 - noveum_trace.transport.http_transport - INFO - ✅ Trace 4ab3c81c-c834-48df-a663-71542b274394 successfully queued for export


2025-09-24 18:32:53 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:54 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:54 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 8b4adea0-b580-4f86-b7ce-45fe6f8a0320) - 1 spans
2025-09-24 18:32:54 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 8b4adea0-b580-4f86-b7ce-45fe6f8a0320) - 1 spans
2025-09-24 18:32:54 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 8b4adea0-b580-4f86-b7ce-45fe6f8a0320
2025-09-24 18:32:54 - noveum_trace.transport.http_transport - INFO - ✅ Trace 8b4adea0-b580-4f86-b7ce-45fe6f8a0320 successfully queued for export


2025-09-24 18:32:54 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:55 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:55 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 8b4fd6ff-bf8a-41d5-8f5f-415b5d9e1887) - 1 spans
2025-09-24 18:32:55 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 8b4fd6ff-bf8a-41d5-8f5f-415b5d9e1887) - 1 spans
2025-09-24 18:32:55 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 8b4fd6ff-bf8a-41d5-8f5f-415b5d9e1887
2025-09-24 18:32:55 - noveum_trace.transport.http_transport - INFO - ✅ Trace 8b4fd6ff-bf8a-41d5-8f5f-415b5d9e1887 successfully queued for export


2025-09-24 18:32:55 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 23 samples
2025-09-24 18:32:55 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 23it [01:26,  3.71s/it]

2025-09-24 18:32:55 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:56 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:56 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: d18980ad-512a-4343-b55f-68167b3b13c2) - 1 spans
2025-09-24 18:32:56 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: d18980ad-512a-4343-b55f-68167b3b13c2) - 1 spans
2025-09-24 18:32:56 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace d18980ad-512a-4343-b55f-68167b3b13c2
2025-09-24 18:32:56 - noveum_trace.transport.http_transport - INFO - ✅ Trace d18980ad-512a-4343-b55f-68167b3b13c2 successfully queued for export


2025-09-24 18:32:56 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:58 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:58 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 4c3a07e3-0d89-42c1-9b57-441a1068ceca) - 1 spans
2025-09-24 18:32:58 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 4c3a07e3-0d89-42c1-9b57-441a1068ceca) - 1 spans
2025-09-24 18:32:58 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 4c3a07e3-0d89-42c1-9b57-441a1068ceca
2025-09-24 18:32:58 - noveum_trace.transport.http_transport - INFO - ✅ Trace 4c3a07e3-0d89-42c1-9b57-441a1068ceca successfully queued for export


2025-09-24 18:32:58 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:32:59 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:32:59 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 2351391d-0353-4c95-b8fe-0a83af209271) - 1 spans
2025-09-24 18:32:59 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 2351391d-0353-4c95-b8fe-0a83af209271) - 1 spans
2025-09-24 18:32:59 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 2351391d-0353-4c95-b8fe-0a83af209271
2025-09-24 18:32:59 - noveum_trace.transport.http_transport - INFO - ✅ Trace 2351391d-0353-4c95-b8fe-0a83af209271 successfully queued for export


2025-09-24 18:32:59 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 24 samples
2025-09-24 18:32:59 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 24it [01:30,  3.69s/it]

2025-09-24 18:32:59 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:33:00 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:33:00 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: aed9a73b-e7c3-4ffc-83ae-5b5a78def4b1) - 1 spans
2025-09-24 18:33:00 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: aed9a73b-e7c3-4ffc-83ae-5b5a78def4b1) - 1 spans
2025-09-24 18:33:00 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace aed9a73b-e7c3-4ffc-83ae-5b5a78def4b1
2025-09-24 18:33:00 - noveum_trace.transport.http_transport - INFO - ✅ Trace aed9a73b-e7c3-4ffc-83ae-5b5a78def4b1 successfully queued for export


2025-09-24 18:33:00 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:33:01 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:33:01 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: a18faaac-8866-426e-91af-6ea3930d4967) - 1 spans
2025-09-24 18:33:01 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: a18faaac-8866-426e-91af-6ea3930d4967) - 1 spans
2025-09-24 18:33:01 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace a18faaac-8866-426e-91af-6ea3930d4967
2025-09-24 18:33:01 - noveum_trace.transport.http_transport - INFO - ✅ Trace a18faaac-8866-426e-91af-6ea3930d4967 successfully queued for export


2025-09-24 18:33:01 - INFO - google_genai.models - AFC is enabled with max remote calls: 10.
2025-09-24 18:33:03 - INFO - google_genai.models - AFC remote call 1 is done.


2025-09-24 18:33:03 - noveum_trace.transport.http_transport - INFO - 📤 EXPORTING TRACE: auto_trace_generate (ID: 66c852c8-9e41-4224-8ace-37751c185ea4) - 1 spans
2025-09-24 18:33:03 - noveum_trace.transport.batch_processor - INFO - 📥 ADDING TRACE TO QUEUE: auto_trace_generate (ID: 66c852c8-9e41-4224-8ace-37751c185ea4) - 1 spans
2025-09-24 18:33:03 - noveum_trace.transport.batch_processor - INFO - ✅ Successfully queued trace 66c852c8-9e41-4224-8ace-37751c185ea4
2025-09-24 18:33:03 - noveum_trace.transport.http_transport - INFO - ✅ Trace 66c852c8-9e41-4224-8ace-37751c185ea4 successfully queued for export


2025-09-24 18:33:03 - INFO - novaeval.evaluators.agent_evaluator - Saving intermediate results after 25 samples
2025-09-24 18:33:03 - INFO - novaeval.evaluators.agent_evaluator - Intermediate results saved to demo_results/tavily_search_results_dataset/agent_evaluation_results.csv


Evaluating samples: 25it [01:34,  3.77s/it]

2025-09-24 18:33:03 - INFO - novaeval.evaluators.agent_evaluator - Saving final results
2025-09-24 18:33:03 - INFO - novaeval.evaluators.agent_evaluator - Reloaded 25 results from CSV
2025-09-24 18:33:03 - INFO - novaeval.evaluators.agent_evaluator - Agent evaluation completed

✅ Evaluation completed!

📊 Results Summary:
  - task_progression: 1.84
  - context_relevancy: 4.98
  - role_adherence: 5.17
  - tool_relevancy: 0.00
  - parameter_correctness: 0.00

🔍 Individual Scores:

  Record 1 (Task: b11593a1-b6ee-44c8-baed-18ebbd0330d9):
    - task_progression: 1.8
    - context_relevancy: 7.2
    - role_adherence: 4.5
    - tool_relevancy: 0.0
    - parameter_correctness: 0.0

  Record 2 (Task: a4444467-5b32-47f0-9b21-b86e42e87944):
    - task_progression: 1.8
    - context_relevancy: 7.2
    - role_adherence: 4.5
    - tool_relevancy: 0.0
    - parameter_correctness: 0.0

  Record 3 (Task: 7133e89b-588b-451f-8fdc-454dba47d752):
    - task_progression: 1.2
    - context_relevancy: 2.1
   




## Step 7: Analysis and Insights


In [107]:
# Analyze the dataset characteristics
print("🔍 Dataset Analysis:")
print("\n=== Agent Behavior Patterns ===")

# Analyze tool usage patterns
tool_patterns = {}
task_types = {}
response_lengths = []

for data in dataset.data:
    # Tool usage
    if data.tool_calls:
        for tool_call in data.tool_calls:
            if hasattr(tool_call, 'tool_name'):
                tool_name = tool_call.tool_name
                if tool_name not in tool_patterns:
                    tool_patterns[tool_name] = {'count': 0, 'success_rate': 0}
                tool_patterns[tool_name]['count'] += 1
    
    # Task analysis
    if data.agent_task:
        # Simple categorization
        task_lower = data.agent_task.lower()
        if 'user_input' in task_lower:
            task_types['user_input'] = task_types.get('user_input', 0) + 1
        elif 'exit' in task_lower:
            task_types['exit_command'] = task_types.get('exit_command', 0) + 1
        else:
            task_types['other'] = task_types.get('other', 0) + 1
    
    # Response analysis
    if data.agent_response:
        response_lengths.append(len(data.agent_response))

print(f"\n📈 Tool Usage:")
for tool, stats in tool_patterns.items():
    print(f"  - {tool}: {stats['count']} uses")

print(f"\n📋 Task Types:")
for task_type, count in task_types.items():
    print(f"  - {task_type}: {count}")

if response_lengths:
    avg_response_length = sum(response_lengths) / len(response_lengths)
    print(f"\n📝 Response Statistics:")
    print(f"  - Average response length: {avg_response_length:.1f} characters")
    print(f"  - Min response length: {min(response_lengths)}")
    print(f"  - Max response length: {max(response_lengths)}")


🔍 Dataset Analysis:

=== Agent Behavior Patterns ===

📈 Tool Usage:

📋 Task Types:
  - other: 2301

📝 Response Statistics:
  - Average response length: 679.2 characters
  - Min response length: 70
  - Max response length: 7309


## Step 8: Export Results (Optional)


In [108]:

# Export the processed dataset for future use
print("💾 Exporting processed dataset...")

try:
    # Export to JSON
    dataset.export_to_json(f'processed_agent_{file_name}')
    print(f"✅ Exported to processed_agent_{file_name}")
    
    # Export to CSV (optional)
    dataset.export_to_csv('processed_agent_dataset.csv')
    print("✅ Exported to processed_agent_dataset.csv")
    
except Exception as e:
    print(f"❌ Export error: {e}")

print("\n🎉 Demo completed successfully!")
print("\n📋 Summary:")
print(f"  - Processed {len(spans_data)} spans from {file_name}")
print(f"  - Created {len(dataset.data)} AgentData records")
print(f"  - Configured evaluation with Gemini model and 6 scorers")
if 'results' in locals():
    print(f"  - Successfully evaluated sample data")
print(f"  - Exported processed dataset for future use")


💾 Exporting processed dataset...
❌ Export error: [Errno 2] No such file or directory: 'processed_agent_split_datasets/tavily_search_results_dataset.json'

🎉 Demo completed successfully!

📋 Summary:
  - Processed 2301 spans from split_datasets/tavily_search_results_dataset.json
  - Created 2301 AgentData records
  - Configured evaluation with Gemini model and 6 scorers
  - Exported processed dataset for future use
