In [1]:
import os

# Set your JSON file path here
# Configuration - can be overridden with environment variables for portability
JSON_FILE_PATH = os.getenv("NOVEUM_JSON_PATH", "/mnt/drive2/trace_details.json")
OUTPUT_CSV = os.getenv("NOVEUM_OUTPUT_CSV", "/mnt/drive2/noveum_spans_output.csv")
CHUNK_SIZE = int(os.getenv("NOVEUM_CHUNK_SIZE", "2"))  # Set to 2 for testing purposes to verify streaming behavior

# Alternative: Use relative paths if the files are in the current directory
# JSON_FILE_PATH = os.getenv("NOVEUM_JSON_PATH", "./trace_details.json")
# OUTPUT_CSV = os.getenv("NOVEUM_OUTPUT_CSV", "./noveum_spans_output.csv")

# Output paths


In [2]:
import json
import os
import sys

import pandas as pd

# Add the parent directory to the path to import noveum_spans_dataset
sys.path.append(os.path.dirname(os.getcwd()))

from novaeval.datasets.noveum_spans_dataset import (
    create_dataset,
    noveum_spans_preprocessing,
    stream_dataset,
)


In [3]:
# Test the preprocessing function
print("Testing noveum_spans_preprocessing...")

try:
    noveum_spans_preprocessing(
        json_files=[JSON_FILE_PATH],
        output_csv=OUTPUT_CSV
    )
    print("✓ Preprocessing completed successfully!")

    # Check the output CSV
    if os.path.exists(OUTPUT_CSV):
        df = pd.read_csv(OUTPUT_CSV)
        print(f"\nOutput CSV created with {len(df)} rows")
        print(f"Columns: {list(df.columns)}")

        # Show first few rows
        print("\nFirst few rows:")
        print(df[["turn_id", "agent_name", "agent_task", "status"]].head())

except Exception as e:
    print(f"❌ Error during preprocessing: {e}")
    import traceback
    traceback.print_exc()


Testing noveum_spans_preprocessing...
✓ Preprocessing completed successfully!

Output CSV created with 8 rows
Columns: ['turn_id', 'agent_name', 'agent_task', 'agent_response', 'metadata', 'trace_id', 'span_name', 'status', 'start_time', 'end_time', 'attributes', 'exit_status', 'agent_exit']

First few rows:
                                turn_id            agent_name  \
0  f2d9563d-6a0b-4ff6-b46f-330c1cdf9b07  research_coordinator   
1  d33dba4a-dc67-46ae-9836-ac49ed938c6f          search_agent   
2  f1e4204b-9cbb-4322-9ba0-cfada1607da7       web_search_tool   
3  8387dbbd-9923-4299-aef9-62a60b0e4c70  academic_search_tool   
4  7826fa8a-3534-4206-a336-d15fe41ae35a        analysis_agent   

                                          agent_task status  
0              artificial intelligence in healthcare     ok  
1                                                NaN     ok  
2              artificial intelligence in healthcare     ok  
3              artificial intelligence in healthcar

In [4]:
# Check if file exists and preview its structure
if os.path.exists(JSON_FILE_PATH):
    print(f"✓ JSON file found: {JSON_FILE_PATH}")

    # Load and preview the JSON structure
    with open(JSON_FILE_PATH) as f:
        data = json.load(f)

    print(f"\nTrace ID: {data.get('trace_id')}")
    print(f"Number of spans: {len(data.get('spans', []))}")
    print(f"Trace name: {data.get('name')}")

    # Preview first span
    if data.get("spans"):
        first_span = data["spans"][0]
        print("\nFirst span:")
        print(f"  - Span ID: {first_span.get('span_id')}")
        print(f"  - Name: {first_span.get('name')}")
        print(f"  - Status: {first_span.get('status')}")
        print(f"  - Duration: {first_span.get('duration_ms')} ms")

        # Check attributes
        attributes = first_span.get("attributes", {})
        print(f"  - Function name: {attributes.get('function.name')}")

        # Check for input fields
        input_fields = [k for k in attributes if k.startswith("agent.input.") or k.startswith("tool.input.")]
        print(f"  - Input fields: {input_fields}")

else:
    print(f"❌ JSON file not found: {JSON_FILE_PATH}")
    print("Please update the JSON_FILE_PATH variable above.")


✓ JSON file found: /mnt/drive2/trace_details.json

Trace ID: 2d842385-2baa-40f3-8e12-9a7da4240d17
Number of spans: 8
Trace name: test_research_workflow_openai_gpt-3.5-turbo

First span:
  - Span ID: f2d9563d-6a0b-4ff6-b46f-330c1cdf9b07
  - Name: agent:research_coordinator:research_coordinator
  - Status: ok
  - Duration: 1801.633 ms
  - Function name: research_coordinator
  - Input fields: ['agent.input.research_topic']


In [5]:
# Test the preprocessing function
print("Testing noveum_spans_preprocessing...")

try:
    noveum_spans_preprocessing(
        json_files=[JSON_FILE_PATH],
        output_csv=OUTPUT_CSV
    )
    print("✓ Preprocessing completed successfully!")

    # Check the output CSV
    if os.path.exists(OUTPUT_CSV):
        df = pd.read_csv(OUTPUT_CSV)
        print(f"\nOutput CSV created with {len(df)} rows")
        print(f"Columns: {list(df.columns)}")

        # Show first few rows
        print("\nFirst few rows:")
        print(df[["turn_id", "agent_name", "agent_task", "status"]].head())

except Exception as e:
    print(f"❌ Error during preprocessing: {e}")
    import traceback
    traceback.print_exc()


Testing noveum_spans_preprocessing...
✓ Preprocessing completed successfully!

Output CSV created with 8 rows
Columns: ['turn_id', 'agent_name', 'agent_task', 'agent_response', 'metadata', 'trace_id', 'span_name', 'status', 'start_time', 'end_time', 'attributes', 'exit_status', 'agent_exit']

First few rows:
                                turn_id            agent_name  \
0  f2d9563d-6a0b-4ff6-b46f-330c1cdf9b07  research_coordinator   
1  d33dba4a-dc67-46ae-9836-ac49ed938c6f          search_agent   
2  f1e4204b-9cbb-4322-9ba0-cfada1607da7       web_search_tool   
3  8387dbbd-9923-4299-aef9-62a60b0e4c70  academic_search_tool   
4  7826fa8a-3534-4206-a336-d15fe41ae35a        analysis_agent   

                                          agent_task status  
0              artificial intelligence in healthcare     ok  
1                                                NaN     ok  
2              artificial intelligence in healthcare     ok  
3              artificial intelligence in healthcar

In [6]:
# Test creating a dataset from the CSV
print("Testing create_dataset...")

try:
    dataset = create_dataset(OUTPUT_CSV)
    print("✓ Dataset created successfully!")

    # Check dataset properties
    print("\nDataset info:")
    print(f"Number of records: {len(dataset.data)}")

    if dataset.data:
        first_record = dataset.data[0]
        print("\nFirst record:")
        print(f"  - Turn ID: {first_record.turn_id}")
        print(f"  - Agent name: {first_record.agent_name}")
        print(f"  - Agent role: {first_record.agent_role}")
        if len(first_record.agent_task) > 100:
            print(f"  - Task: {first_record.agent_task[:100]}...")
        else:
            print(f"  - Task: {first_record.agent_task}")
        if len(first_record.agent_response) > 100:
            print(f"  - Response: {first_record.agent_response[:100]}...")
        else:
            print(f"  - Response: {first_record.agent_response}")

        # Check metadata
        if first_record.metadata:
            metadata = json.loads(first_record.metadata)
            print(f"  - Metadata keys: {list(metadata.keys())}")

except Exception as e:
    print(f"❌ Error creating dataset: {e}")
    import traceback
    traceback.print_exc()


Testing create_dataset...
✓ Dataset created successfully!

Dataset info:
Number of records: 8

First record:
  - Turn ID: f2d9563d-6a0b-4ff6-b46f-330c1cdf9b07
  - Agent name: research_coordinator
  - Agent role: coordinator
  - Task: artificial intelligence in healthcare
  - Response: {'topic': 'artificial intelligence in healthcare', 'search_results': [{'title': 'Understanding artif...
  - Metadata keys: ['trace_id', 'duration_ms', 'parent_span_id', 'status_message', 'agent_type', 'exit_status', 'agent_exit', 'span_name', 'status', 'start_time', 'end_time']


In [7]:
# Sample a few random objects from the dataset and save them as JSON
import random

print("Sampling random dataset objects...")

try:
    if 'dataset' in locals() and dataset.data:
        # Sample 3 random objects (or fewer if dataset is smaller)
        sample_size = min(3, len(dataset.data))
        sampled_objects = random.sample(dataset.data, sample_size)
        
        print(f"✓ Sampled {sample_size} objects from dataset")
        
        # Convert AgentData objects to JSON-serializable format
        sample_json_data = []
        for obj in sampled_objects:
            # Use the model_dump() method to convert Pydantic model to dict
            sample_json_data.append(obj.model_dump())
        
        # Save to JSON file
        sample_file_path = "sample_dataset_object.json"
        with open(sample_file_path, 'w', encoding='utf-8') as f:
            json.dump(sample_json_data, f, indent=2, ensure_ascii=False)
        
        print(f"✓ Saved sample objects to: {sample_file_path}")
        
        # Show preview of what was saved
        print("\nPreview of sampled objects:")
        for i, obj in enumerate(sampled_objects):
            print(f"\nSample {i+1}:")
            print(f"  - Turn ID: {obj.turn_id}")
            print(f"  - Agent Name: {obj.agent_name}")
            print(f"  - Agent Role: {obj.agent_role}")
            task_preview = obj.agent_task[:80] + "..." if len(obj.agent_task) > 80 else obj.agent_task
            print(f"  - Task: {task_preview}")
            response_preview = obj.agent_response[:80] + "..." if len(obj.agent_response) > 80 else obj.agent_response
            print(f"  - Response: {response_preview}")
            
        print(f"\n✓ Complete JSON objects saved to {sample_file_path}")
        
    else:
        print("❌ No dataset available to sample from. Make sure the dataset creation step completed successfully.")
        
except Exception as e:
    print(f"❌ Error sampling dataset objects: {e}")
    import traceback
    traceback.print_exc()


Sampling random dataset objects...
✓ Sampled 3 objects from dataset
✓ Saved sample objects to: sample_dataset_object.json

Preview of sampled objects:

Sample 1:
  - Turn ID: 8387dbbd-9923-4299-aef9-62a60b0e4c70
  - Agent Name: academic_search_tool
  - Agent Role: academic_database
  - Task: artificial intelligence in healthcare
  - Response: [{'title': 'Recent Advances in artificial intelligence in healthcare: A Comprehe...

Sample 2:
  - Turn ID: 7826fa8a-3534-4206-a336-d15fe41ae35a
  - Agent Name: analysis_agent
  - Agent Role: analyzer
  - Task: [{'title': 'Understanding artificial intelligence in healthcare: A Comprehensive...
  - Response: {'topic': 'artificial intelligence in healthcare', 'input_sources': 3, 'llm_anal...

Sample 3:
  - Turn ID: 4165ef25-8865-460d-8acd-e14e8fc9a47e
  - Agent Name: summary_agent
  - Agent Role: summarizer
  - Task: {'topic': 'artificial intelligence in healthcare', 'input_sources': 3, 'llm_anal...
  - Response: This research on artificial intellig

In [8]:
# Test streaming the dataset with chunk size 2
print(f"Testing stream_dataset with chunk_size={CHUNK_SIZE}...")

try:
    chunk_count = 0
    total_records = 0

    for chunk in stream_dataset(OUTPUT_CSV, chunk_size=CHUNK_SIZE):
        chunk_count += 1
        chunk_size_actual = len(chunk)
        total_records += chunk_size_actual

        print(f"\nChunk {chunk_count}: {chunk_size_actual} records")

        # Show details for first chunk
        if chunk_count == 1:
            for i, record in enumerate(chunk):
                print(f"  Record {i+1}:")
                print(f"    - Turn ID: {record.turn_id}")
                print(f"    - Agent: {record.agent_name}")
                print(f"    - Role: {record.agent_role}")
                task_preview = record.agent_task[:50] + "..." if len(record.agent_task) > 50 else record.agent_task
                print(f"    - Task: {task_preview}")

        # Limit output for large datasets
        if chunk_count >= 5:
            print("\n... (showing first 5 chunks only)")
            # Continue counting without printing
            # Continue counting remaining chunks without printing to preserve streaming behavior
            # Skip the first 5 chunks and process the rest
            skip_count = 0
            for remaining_chunk in stream_dataset(OUTPUT_CSV, chunk_size=CHUNK_SIZE):
                if skip_count < 5:
                    skip_count += 1
                    continue
                chunk_count += 1
                total_records += len(remaining_chunk)
            break

    print("\n✓ Streaming completed!")
    print(f"Total chunks: {chunk_count}")
    print(f"Total records: {total_records}")

except Exception as e:
    print(f"❌ Error during streaming: {e}")
    import traceback
    traceback.print_exc()


Testing stream_dataset with chunk_size=2...

Chunk 1: 2 records
  Record 1:
    - Turn ID: f2d9563d-6a0b-4ff6-b46f-330c1cdf9b07
    - Agent: research_coordinator
    - Role: coordinator
    - Task: artificial intelligence in healthcare
  Record 2:
    - Turn ID: d33dba4a-dc67-46ae-9836-ac49ed938c6f
    - Agent: search_agent
    - Role: researcher
❌ Error during streaming: object of type 'NoneType' has no len()


Traceback (most recent call last):
  File "/tmp/ipykernel_19741/1077325344.py", line 22, in <module>
    task_preview = record.agent_task[:50] + "..." if len(record.agent_task) > 50 else record.agent_task
TypeError: object of type 'NoneType' has no len()


In [9]:
# Analyze the processed data
print("Data Analysis:")

if os.path.exists(OUTPUT_CSV):
    df = pd.read_csv(OUTPUT_CSV)

    print(f"\nUnique agent names: {df['agent_name'].nunique()}")
    print("Agent name distribution:")
    print(df["agent_name"].value_counts())

    print("\nStatus distribution:")
    print(df["status"].value_counts())

    # Check for empty fields
    print(f"\nEmpty agent_task fields: {df['agent_task'].isna().sum() + (df['agent_task'] == '').sum()}")
    print(f"Empty agent_response fields: {df['agent_response'].isna().sum() + (df['agent_response'] == '').sum()}")

    # Show a sample of different span types
    print("\nSample of different span types:")
    unique_names = df["agent_name"].unique()[:5]
    for name in unique_names:
        sample = df[df["agent_name"] == name].iloc[0]
        print(f"\n{name}:")
        print(f"  Span name: {sample['span_name']}")
        if len(str(sample["agent_task"])) > 100:
            print(f"  Task: {sample['agent_task'][:100]}...")
        else:
            print(f"  Task: {sample['agent_task']}")
        print(f"  Response length: {len(str(sample['agent_response']))} chars")

else:
    print("❌ Output CSV not found")


Data Analysis:

Unique agent names: 8
Agent name distribution:
agent_name
research_coordinator    1
search_agent            1
web_search_tool         1
academic_search_tool    1
analysis_agent          1
llm_analysis_call       1
summary_agent           1
llm_summary_call        1
Name: count, dtype: int64

Status distribution:
status
ok    8
Name: count, dtype: int64

Empty agent_task fields: 1
Empty agent_response fields: 0

Sample of different span types:

research_coordinator:
  Span name: agent:research_coordinator:research_coordinator
  Task: artificial intelligence in healthcare
  Response length: 1000 chars

search_agent:
  Span name: agent:search_agent:search_agent
  Task: nan
  Response length: 1000 chars

web_search_tool:
  Span name: tool:web_search:web_search_tool
  Task: artificial intelligence in healthcare
  Response length: 632 chars

academic_search_tool:
  Span name: tool:academic_search:academic_search_tool
  Task: artificial intelligence in healthcare
  Response le

In [10]:
# Uncomment the line below to clean up the output CSV file
# os.remove(OUTPUT_CSV)
# print(f"Cleaned up {OUTPUT_CSV}")

print("Testing completed!")


Testing completed!
