### Test 1: Maketing Data

In [1]:
# This script shows how to call `run_domain_detector`
# and save / print the results without hitting the
# “JSON object must be str” TypeError.

import os
import sys
import json

# ------------------------------------------------------------------
# 1. Make sure Python can find your agent package / module
#    (adapt the path if your repo layout is different)
# ------------------------------------------------------------------
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath("__file__")))
sys.path.append(ROOT_DIR)

from agents.domain_detector_wrap import run_domain_detector   # noqa: E402

In [2]:
def main():
    import json
    import os
    import pandas as pd
    import tempfile

    # Google Drive URL
    url = "https://drive.google.com/uc?export=download&id=1JhsgpIulCv8Q9NPTZGhrz5-y_RUufMoO"

    print(f"Downloading dataset from Google Drive: {url}")

    try:
        # Create a temporary file for the CSV
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv')
        temp_path = temp_file.name
        temp_file.close()

        # Download directly with pandas
        df = pd.read_csv(url)
        df.to_csv(temp_path, index=False)

        print(f"Dataset downloaded successfully ({len(df)} rows, {len(df.columns)} columns)")

        # Run the domain detector
        print("Analyzing dataset...")
        result = run_domain_detector(temp_path)  # or your desired value

        # --- print per-iteration history
        history = result.get("history", [])
        if not history:
            print("\nNo iteration history available in the result.")
        else:
            print(f"\nFound {len(history)} iterations in history:")
            for item in history:
                print_iteration(item)  # Print every log, including iteration = 0

            # Print the full raw history for debugging
            print("\nFull raw history:")
            import pprint
            pprint.pprint(history)

        # --- summary
        analysis = result.get('analysis', {})
        domain = analysis.get('domain', 'Unknown')
        scores = result.get('scores', {})

        print("\n✅  Final detected domain:", domain)
        print("✅  Score card:", scores)

        # --- save analysis
        with open("analysis_output.json", "w", encoding="utf-8") as f:
            json.dump(analysis, f, indent=2, ensure_ascii=False)
        print("\nFull analysis written to analysis_output.json")

    except Exception as e:
        print(f"Error during processing: {e}")

    finally:
        # Clean up temporary file
        if 'temp_path' in locals() and os.path.exists(temp_path):
            try:
                os.unlink(temp_path)
                print("Temporary file removed")
            except:
                pass

if __name__ == "__main__":
    main()

Downloading dataset from Google Drive: https://drive.google.com/uc?export=download&id=1JhsgpIulCv8Q9NPTZGhrz5-y_RUufMoO
Dataset downloaded successfully (3117 rows, 19 columns)
Analyzing dataset...
Successfully read CSV: 3117 rows, 19 columns
Data profile built successfully
Starting analysis with max_cycles=5
Decision point – iteration 1, scores: {'correctness': 4, 'relevance': 3, 'coverage': 3, 'insightfulness': 3, 'novelty': 3}
🔄 Analysis needs improvement (iteration 1)
Decision point – iteration 2, scores: {'correctness': 4, 'relevance': 3, 'coverage': 3, 'insightfulness': 3, 'novelty': 3}
🔄 Analysis needs improvement (iteration 2)
Decision point – iteration 3, scores: {'correctness': 4, 'relevance': 3, 'coverage': 3, 'insightfulness': 4, 'novelty': 4}
🛑 Reached maximum cycles (3), ending execution.

Found 1 iterations in history:
Error during processing: name 'print_iteration' is not defined
Temporary file removed


In [2]:
# This script shows how to call `run_domain_detector`
# and save / print the results with detailed history visualization

import os
import sys
import json
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import seaborn as sns

# ------------------------------------------------------------------
# 1. Make sure Python can find your agent package / module
#    (adapt the path if your repo layout is different)
# ------------------------------------------------------------------
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath("__file__")))
sys.path.append(ROOT_DIR)

from agents.domain_detector_wrap import run_domain_detector   # noqa: E402

def visualize_history(result):
    """
    Visualize the domain detection history across iterations.
    
    Args:
        result: The result from run_domain_detector
    """
    if not result or "history" not in result or not result["history"]:
        print("No history data available")
        return
    
    # Convert history to DataFrame for easier manipulation
    history_df = pd.DataFrame(result["history"])
    
    # Print a nice tabular summary
    print("\n=== DOMAIN DETECTION HISTORY ===")
    
    for i, entry in enumerate(result["history"]):
        iteration = entry.get("iteration", i)
        print(f"\n📊 ITERATION {iteration}")
        print(f"📌 Domain: {entry.get('domain', 'Unknown')}")
        
        # Format scores if they exist
        scores = entry.get("scores", {})
        if scores:
            print("📈 Scores:")
            for score_name, score_value in scores.items():
                stars = "★" * score_value + "☆" * (5 - score_value)
                print(f"   {score_name.ljust(15)}: {stars} ({score_value}/5)")
                
        # Print analysis snippet
        analysis_head = entry.get("analysis_head", "")
        if analysis_head:
            print(f"📝 Analysis: {analysis_head}...")
            
    # Plot score evolution if more than one iteration with scores
    scored_entries = [entry for entry in result["history"] if entry.get("scores")]
    if len(scored_entries) > 1:
        plt.figure(figsize=(10, 6))
        
        # Prepare data for plotting
        iterations = []
        score_data = {}
        
        for entry in scored_entries:
            iter_num = entry.get("iteration", 0)
            iterations.append(iter_num)
            
            for score_name, score_value in entry.get("scores", {}).items():
                if score_name not in score_data:
                    score_data[score_name] = []
                score_data[score_name].append(score_value)
        
        # Plot each score metric
        for score_name, values in score_data.items():
            plt.plot(iterations, values, marker='o', label=score_name)
            
        plt.title("Evolution of Scores Across Iterations")
        plt.xlabel("Iteration")
        plt.ylabel("Score (0-5)")
        plt.ylim(0, 5.5)
        plt.grid(True, linestyle='--', alpha=0.7)
        plt.legend()
        plt.tight_layout()
        plt.show()

def run_and_visualize(csv_path, max_cycles=5):
    """
    Run domain detector and visualize results with history.
    
    Args:
        csv_path: Path to the CSV file
        max_cycles: Maximum number of improvement cycles
    
    Returns:
        The complete result from the domain detector
    """
    print(f"🚀 Running domain detector on {csv_path} with max_cycles={max_cycles}")
    
    # Run the domain detector
    result = run_domain_detector(csv_path, max_cycles)
    
    # Visualize history
    visualize_history(result)
    
    # Print final results
    if "analysis" in result:
        analysis = result["analysis"]
        print("\n=== FINAL RESULTS ===")
        print(f"✅ Domain: {analysis.get('domain', 'Unknown')}")
        print(f"✅ Core Concepts: {', '.join(analysis.get('core_concepts', []))}")
        
        if "analysis" in analysis:
            print("\n📊 DESCRIPTIVE ANALYSIS:")
            print(analysis["analysis"].get("descriptive", ""))
            
            print("\n🔮 PREDICTIVE ANALYSIS:")
            print(analysis["analysis"].get("predictive", ""))
            
            print("\n🌐 DOMAIN-RELATED ANALYSIS:")
            print(analysis["analysis"].get("domain_related", ""))
    
    return result

# Example usage:
result = run_and_visualize("https://drive.google.com/uc?export=download&id=1JhsgpIulCv8Q9NPTZGhrz5-y_RUufMoO", max_cycles=5)

# To save results to a file:
# with open("domain_analysis_results.json", "w") as f:
#     json.dump(result, f, indent=2)

🚀 Running domain detector on https://drive.google.com/uc?export=download&id=1JhsgpIulCv8Q9NPTZGhrz5-y_RUufMoO with max_cycles=5
Successfully read CSV: 3117 rows, 19 columns
Data profile built successfully
Starting analysis with max_cycles=5
Decision point – iteration 1, scores: {'correctness': 4, 'relevance': 3, 'coverage': 3, 'insightfulness': 4, 'novelty': 4}
🔄 Analysis needs improvement (iteration 1)
Decision point – iteration 2, scores: {'correctness': 4, 'relevance': 3, 'coverage': 3, 'insightfulness': 4, 'novelty': 4}
🔄 Analysis needs improvement (iteration 2)
Decision point – iteration 3, scores: {'correctness': 4, 'relevance': 3, 'coverage': 3, 'insightfulness': 4, 'novelty': 4}
🛑 Reached maximum cycles (3), ending execution.

=== DOMAIN DETECTION HISTORY ===

📊 ITERATION 2
📌 Domain: Customer Relationship Management (CRM)
📈 Scores:
   correctness    : ★★★★☆ (4/5)
   relevance      : ★★★☆☆ (3/5)
   coverage       : ★★★☆☆ (3/5)
   insightfulness : ★★★★☆ (4/5)
   novelty        : 