In [22]:
#!/usr/bin/env python3
"""
Deep Search Advanced System - Complete Single Code Implementation
Multi-Engine Search • Agentic AI Research • Dual-Model Evaluation • Gradio Interface
"""

# ============================================
# IMPORTS & INITIALIZATION
# ============================================

import os
import sys
import json
import time
import asyncio
import random
import urllib.request
import urllib.parse
from datetime import datetime
from typing import Dict, List, Optional, Any
from html.parser import HTMLParser
import concurrent.futures

# Fix asyncio for Jupyter
import nest_asyncio
nest_asyncio.apply()

# AI Agent imports
from agents import Agent, WebSearchTool, trace, Runner, gen_trace_id, function_tool
from agents.model_settings import ModelSettings
from pydantic import BaseModel, Field
from dotenv import load_dotenv

# LLM clients
from openai import OpenAI
from anthropic import Anthropic

# UI imports
import gradio as gr
import pandas as pd
from IPython.display import display, Markdown, HTML

# Initialize environment
load_dotenv(override=True)

try:
    openai_client = OpenAI()
    claude_client = Anthropic()
    LLM_AVAILABLE = True
    print("✅ LLM clients initialized")
except Exception as e:
    LLM_AVAILABLE = False
    print(f"⚠️ LLM clients not available: {e}")

# Setup directories
BASE_DIR = os.getcwd()
WORKSPACE_DIR = os.path.join(BASE_DIR, "workspace")
RESULTS_DIR = os.path.join(WORKSPACE_DIR, "results")
DATA_DIR = os.path.join(WORKSPACE_DIR, "data")

for dir_path in [WORKSPACE_DIR, RESULTS_DIR, DATA_DIR]:
    os.makedirs(dir_path, exist_ok=True)

# Configuration
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
]

print("✅ Deep Search Advanced System initialized!")

# ============================================
# PYDANTIC MODELS
# ============================================

class WebSearchItem(BaseModel):
    reason: str = Field(description="Reasoning for this search")
    query: str = Field(description="Search term to use")

class WebSearchPlan(BaseModel):
    searches: List[WebSearchItem] = Field(description="List of strategic searches")

class ReportData(BaseModel):
    short_summary: str = Field(description="Short summary")
    markdown_report: str = Field(description="Detailed report")
    follow_up_questions: List[str] = Field(description="Follow-up questions")

class ResearchSubquery(BaseModel):
    subquery: str = Field(description="Specific research subquery")
    reasoning: str = Field(description="Why this subquery is needed")
    priority: int = Field(description="Priority level (1-5, 5 being highest)")
    search_type: str = Field(description="Type of search: web, academic, news, technical")

class ResearchPlan(BaseModel):
    main_query: str = Field(description="Original research query")
    subqueries: List[ResearchSubquery] = Field(description="List of research subqueries")
    research_strategy: str = Field(description="Overall research approach")
    estimated_depth: int = Field(description="Expected research depth (1-5)")

class ResearchEvidence(BaseModel):
    source_url: str = Field(description="Source URL")
    content_snippet: str = Field(description="Relevant content snippet")
    credibility_score: float = Field(description="Source credibility (0-1)")
    relevance_score: float = Field(description="Content relevance (0-1)")
    evidence_type: str = Field(description="Type: factual, opinion, analysis, data")

class ResearchFindings(BaseModel):
    subquery: str = Field(description="The subquery this answers")
    key_findings: List[str] = Field(description="Main findings")
    evidence: List[ResearchEvidence] = Field(description="Supporting evidence")
    confidence_level: float = Field(description="Confidence in findings (0-1)")
    gaps_identified: List[str] = Field(description="Information gaps found")

class DeepResearchReport(BaseModel):
    query: str = Field(description="Original research question")
    executive_summary: str = Field(description="High-level summary")
    detailed_analysis: str = Field(description="Comprehensive analysis")
    key_insights: List[str] = Field(description="Main insights discovered")
    evidence_strength: str = Field(description="Overall evidence quality")
    research_limitations: List[str] = Field(description="Limitations and gaps")
    follow_up_questions: List[str] = Field(description="Questions for further research")
    methodology_notes: str = Field(description="Research methodology used")

class EvaluationCriteria(BaseModel):
    accuracy_score: float = Field(description="Factual correctness (0-10)")
    completeness_score: float = Field(description="Comprehensive coverage (0-10)")
    relevance_score: float = Field(description="Query relevance (0-10)")
    clarity_score: float = Field(description="Organization and clarity (0-10)")
    depth_score: float = Field(description="Insight and analysis depth (0-10)")

class DetailedEvaluation(BaseModel):
    criteria_scores: EvaluationCriteria
    overall_score: float
    strengths: List[str]
    weaknesses: List[str]
    missing_aspects: List[str]
    recommendations: List[str]
    confidence_level: str

class UniversalEvaluation(BaseModel):
    query: str
    content_type: str
    gpt_evaluation: DetailedEvaluation
    claude_evaluation: DetailedEvaluation
    consensus_score: float
    final_recommendations: List[str]
    evaluation_summary: str

# ============================================
# EXPLORER SEARCH FUNCTIONS
# ============================================

class LinkExtractor(HTMLParser):
    def __init__(self):
        super().__init__()
        self.links = []
    
    def handle_starttag(self, tag, attrs):
        if tag == 'a':
            for attr_name, attr_value in attrs:
                if attr_name == 'href' and attr_value and attr_value.startswith('http'):
                    self.links.append(attr_value)

def simple_html_parse(html_content, max_links=20):
    parser = LinkExtractor()
    try:
        parser.feed(html_content)
        return parser.links[:max_links]
    except Exception as e:
        print(f"Error parsing HTML: {e}")
        return []

def fetch_duckduckgo_links(query, results=10):
    endpoint = "https://duckduckgo.com/html/"
    params = {"q": query}
    url = endpoint + "?" + urllib.parse.urlencode(params)
    
    try:
        req = urllib.request.Request(url)
        req.add_header('User-Agent', random.choice(USER_AGENTS))
        
        with urllib.request.urlopen(req, timeout=10) as response:
            html_content = response.read().decode('utf-8')
        
        links = simple_html_parse(html_content, results)
        clean_links = [link for link in links if 'duckduckgo.com' not in link]
        return clean_links[:results]
    except Exception as e:
        print(f"Error fetching DuckDuckGo results: {e}")
        return []

def fetch_brave_links(query, results=10):
    endpoint = "https://search.brave.com/search"
    params = {"q": query}
    url = endpoint + "?" + urllib.parse.urlencode(params)
    
    try:
        req = urllib.request.Request(url)
        req.add_header('User-Agent', random.choice(USER_AGENTS))
        req.add_header('Accept-Encoding', 'identity')
        
        with urllib.request.urlopen(req, timeout=10) as response:
            html_content = response.read().decode('utf-8')
        
        links = simple_html_parse(html_content, results)
        clean_links = [link for link in links if 'search.brave.com' not in link]
        return clean_links[:results]
    except Exception as e:
        print(f"Error fetching Brave results: {e}")
        return []

def scrape_pages_text(urls, max_urls=5):
    collected = []
    
    for url in urls[:max_urls]:
        print(f"Scraping: {url}")
        try:
            req = urllib.request.Request(url)
            req.add_header('User-Agent', random.choice(USER_AGENTS))
            
            with urllib.request.urlopen(req, timeout=15) as response:
                html_content = response.read().decode('utf-8', errors='ignore')
            
            import re
            text = re.sub(r'<script[^>]*>.*?</script>', '', html_content, flags=re.DOTALL | re.IGNORECASE)
            text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
            text = re.sub(r'<[^>]+>', ' ', text)
            text = re.sub(r'\s+', ' ', text).strip()
            
            collected.append({
                "url": url,
                "text": text[:3000],
                "length": len(text),
                "scraped_at": datetime.now().isoformat()
            })
        except Exception as e:
            collected.append({"url": url, "text": f"Error: {str(e)}", "error": True})
    
    return collected

# ============================================
# AI AGENTS SETUP
# ============================================

# Research Planner Agent
RESEARCH_PLANNER_INSTRUCTIONS = """You are an expert research strategist. Given a research query, create a comprehensive research plan by:

1. Breaking down the main query into 3-5 focused subqueries
2. Prioritizing subqueries by importance 
3. Determining the best search strategy for each
4. Estimating the research depth needed

Consider different angles: factual, analytical, historical, future implications, expert opinions, and data-driven insights.
Make your plan systematic and thorough."""

research_planner = Agent(
    name="ResearchPlanner",
    instructions=RESEARCH_PLANNER_INSTRUCTIONS,
    model="gpt-4o-mini",
    output_type=ResearchPlan,
)

# Deep Search Agent
DEEP_SEARCH_INSTRUCTIONS = """You are a specialized deep research agent. For each subquery:

1. Conduct thorough web searches
2. Extract the most relevant and credible information
3. Evaluate source credibility and content relevance  
4. Identify key findings and supporting evidence
5. Note any information gaps or limitations

Focus on factual accuracy and provide specific, actionable findings. Always include source credibility assessment."""

deep_search_agent = Agent(
    name="DeepSearchAgent", 
    instructions=DEEP_SEARCH_INSTRUCTIONS,
    tools=[WebSearchTool(search_context_size="high")],
    model="gpt-4o-mini",
    model_settings=ModelSettings(tool_choice="required"),
    output_type=ResearchFindings,
)

# Research Synthesizer Agent
SYNTHESIS_INSTRUCTIONS = """You are a research synthesis expert. Combine multiple research findings into a comprehensive report:

1. Create an executive summary of all findings
2. Develop detailed analysis connecting all insights
3. Identify the strongest evidence and key insights
4. Note research limitations and gaps
5. Suggest follow-up research questions
6. Document the methodology used

Ensure the report is structured, evidence-based, and provides clear value to the reader."""

synthesis_agent = Agent(
    name="ResearchSynthesizer",
    instructions=SYNTHESIS_INSTRUCTIONS,
    model="gpt-4o-mini", 
    output_type=DeepResearchReport,
)

# Basic agents for compatibility
search_agent = Agent(
    name="SearchAgent",
    instructions="You are a research assistant. Search the web and provide a concise 200-word summary of key findings.",
    tools=[WebSearchTool(search_context_size="medium")],
    model="gpt-4o-mini",
    model_settings=ModelSettings(tool_choice="required"),
)

planner_agent = Agent(
    name="PlannerAgent",
    instructions="You are a research planner. Create 3 strategic web searches to comprehensively answer the query.",
    model="gpt-4o-mini",
    output_type=WebSearchPlan,
)

writer_agent = Agent(
    name="WriterAgent",
    instructions="You are a senior researcher. Create a comprehensive, well-structured markdown report (800+ words) synthesizing all research findings.",
    model="gpt-4o-mini",
    output_type=ReportData,
)

# ============================================
# AGENTIC DEEP SEARCH
# ============================================

async def execute_agentic_deep_search(query: str) -> Dict[str, Any]:
    print(f"🔬 Starting Agentic Deep Search for: {query}")
    
    with trace("Agentic Deep Search Pipeline"):
        
        # Phase 1: Research Planning
        print("📋 Phase 1: Creating Research Plan...")
        planning_result = await Runner.run(research_planner, f"Research Query: {query}")
        research_plan = planning_result.final_output
        
        print(f"   📊 Generated {len(research_plan.subqueries)} subqueries")
        print(f"   🎯 Research Strategy: {research_plan.research_strategy}")
        print(f"   📈 Estimated Depth: {research_plan.estimated_depth}/5")
        
        # Phase 2: Parallel Deep Search Execution
        print("🔍 Phase 2: Executing Deep Research...")
        search_tasks = []
        
        sorted_subqueries = sorted(research_plan.subqueries, key=lambda x: x.priority, reverse=True)
        
        for i, subquery in enumerate(sorted_subqueries):
            print(f"   🔎 Subquery {i+1}: {subquery.subquery[:50]}...")
            search_input = f"""
Subquery: {subquery.subquery}
Reasoning: {subquery.reasoning}  
Search Type: {subquery.search_type}
Priority: {subquery.priority}/5

Please conduct thorough research and provide detailed findings.
"""
            search_tasks.append(Runner.run(deep_search_agent, search_input))
        
        search_results = await asyncio.gather(*search_tasks)
        research_findings = [result.final_output for result in search_results]
        
        print(f"   ✅ Completed {len(research_findings)} deep searches")
        
        # Phase 3: Research Synthesis
        print("🧠 Phase 3: Synthesizing Research...")
        
        synthesis_input = f"""
Original Query: {query}
Research Plan: {research_plan.research_strategy}

Research Findings:
"""
        
        for i, finding in enumerate(research_findings):
            synthesis_input += f"""
Subquery {i+1}: {finding.subquery}
Key Findings: {'; '.join(finding.key_findings)}
Confidence Level: {finding.confidence_level:.2f}
Evidence Count: {len(finding.evidence)}
Gaps Identified: {'; '.join(finding.gaps_identified)}

"""
        
        synthesis_result = await Runner.run(synthesis_agent, synthesis_input)
        final_report = synthesis_result.final_output
        
        print("✅ Agentic Deep Search Completed!")
        
        results = {
            "query": query,
            "timestamp": datetime.now().isoformat(),
            "research_plan": research_plan.dict(),
            "findings": [finding.dict() for finding in research_findings],
            "final_report": final_report.dict(),
            "methodology": {
                "subqueries_executed": len(research_findings),
                "total_evidence_pieces": sum(len(f.evidence) for f in research_findings),
                "average_confidence": sum(f.confidence_level for f in research_findings) / len(research_findings),
                "search_strategy": research_plan.research_strategy
            }
        }
        
        return results

# Basic agentic search for compatibility
async def perform_agentic_search(query: str):
    with trace("Basic Agentic Search"):
        result = await Runner.run(planner_agent, f"Query: {query}")
        search_plan = result.final_output
        
        search_tasks = []
        for item in search_plan.searches:
            input_text = f"Search term: {item.query}\nReason: {item.reason}"
            search_tasks.append(Runner.run(search_agent, input_text))
        
        search_results = await asyncio.gather(*search_tasks)
        search_summaries = [result.final_output for result in search_results]
        
        input_text = f"Original query: {query}\nResearch findings: {search_summaries}"
        report_result = await Runner.run(writer_agent, input_text)
        
        return report_result.final_output, search_plan, search_summaries

# ============================================
# DUAL-MODEL EVALUATION
# ============================================

async def evaluate_with_gpt(query: str, research_report: str) -> DetailedEvaluation:
    if not LLM_AVAILABLE:
        return DetailedEvaluation(
            criteria_scores=EvaluationCriteria(
                accuracy_score=7.0, completeness_score=7.0, relevance_score=7.0,
                clarity_score=7.0, depth_score=7.0
            ),
            overall_score=7.0,
            strengths=["Mock evaluation - GPT-4 not available"],
            weaknesses=["Unable to evaluate - API not configured"],
            missing_aspects=["Full evaluation requires API access"],
            recommendations=["Configure OpenAI API key for real evaluation"],
            confidence_level="Low"
        )
    
    evaluation_prompt = f"""You are an expert research evaluator. Evaluate this research report based on the original query.

Original Query: {query}

Research Report:
{research_report}

Evaluate based on:
1. Accuracy - Are facts correct and well-sourced?
2. Completeness - Does it cover all important aspects?
3. Relevance - Does it directly address the query?
4. Clarity - Is it well-organized and easy to understand?
5. Depth - Does it provide meaningful insights?

Provide detailed evaluation with specific examples."""

    try:
        response = openai_client.beta.chat.completions.parse(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a critical research evaluator. Be thorough and specific."},
                {"role": "user", "content": evaluation_prompt}
            ],
            response_format=DetailedEvaluation,
            temperature=0.3
        )
        return response.choices[0].message.parsed
    except Exception as e:
        print(f"GPT evaluation failed: {e}")
        return DetailedEvaluation(
            criteria_scores=EvaluationCriteria(
                accuracy_score=6.0, completeness_score=6.0, relevance_score=6.0,
                clarity_score=6.0, depth_score=6.0
            ),
            overall_score=6.0,
            strengths=["Evaluation failed - using fallback"],
            weaknesses=[f"Error: {str(e)}"],
            missing_aspects=["Unable to complete evaluation"],
            recommendations=["Check API configuration"],
            confidence_level="Low"
        )

async def evaluate_with_claude(query: str, research_report: str) -> DetailedEvaluation:
    if not LLM_AVAILABLE:
        return DetailedEvaluation(
            criteria_scores=EvaluationCriteria(
                accuracy_score=7.5, completeness_score=7.5, relevance_score=7.5,
                clarity_score=7.5, depth_score=7.5
            ),
            overall_score=7.5,
            strengths=["Mock evaluation - Claude not available"],
            weaknesses=["Unable to evaluate - API not configured"],
            missing_aspects=["Full evaluation requires API access"],
            recommendations=["Configure Anthropic API key for real evaluation"],
            confidence_level="Low"
        )
    
    evaluation_prompt = f"""You are an expert research evaluator. Evaluate this research report based on the original query.

Original Query: {query}

Research Report:
{research_report}

Evaluate based on:
1. Accuracy - Are facts correct and well-sourced?
2. Completeness - Does it cover all important aspects?
3. Relevance - Does it directly address the query?
4. Clarity - Is it well-organized and easy to understand?
5. Depth - Does it provide meaningful insights?

Return your evaluation in this exact JSON format:
{{
    "criteria_scores": {{
        "accuracy_score": <0-10>,
        "completeness_score": <0-10>,
        "relevance_score": <0-10>,
        "clarity_score": <0-10>,
        "depth_score": <0-10>
    }},
    "overall_score": <0-10>,
    "strengths": ["strength1", "strength2", ...],
    "weaknesses": ["weakness1", "weakness2", ...],
    "missing_aspects": ["aspect1", "aspect2", ...],
    "recommendations": ["recommendation1", "recommendation2", ...],
    "confidence_level": "High/Medium/Low"
}}"""

    try:
        response = claude_client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=2000,
            temperature=0.3,
            messages=[
                {"role": "user", "content": evaluation_prompt}
            ]
        )
        
        evaluation_json = json.loads(response.content[0].text)
        return DetailedEvaluation(**evaluation_json)
    except Exception as e:
        print(f"Claude evaluation failed: {e}")
        return DetailedEvaluation(
            criteria_scores=EvaluationCriteria(
                accuracy_score=6.5, completeness_score=6.5, relevance_score=6.5,
                clarity_score=6.5, depth_score=6.5
            ),
            overall_score=6.5,
            strengths=["Evaluation failed - using fallback"],
            weaknesses=[f"Error: {str(e)}"],
            missing_aspects=["Unable to complete evaluation"],
            recommendations=["Check API configuration"],
            confidence_level="Low"
        )

def calculate_consensus(gpt_eval: DetailedEvaluation, claude_eval: DetailedEvaluation) -> Dict:
    gpt_scores = gpt_eval.criteria_scores.model_dump()
    claude_scores = claude_eval.criteria_scores.model_dump()
    
    score_differences = {}
    for criterion, gpt_score in gpt_scores.items():
        claude_score = claude_scores[criterion]
        score_differences[criterion] = abs(gpt_score - claude_score)
    
    avg_difference = sum(score_differences.values()) / len(score_differences)
    consensus_score = 10 - avg_difference
    
    divergence_areas = []
    for criterion, diff in score_differences.items():
        if diff > 2:
            divergence_areas.append(
                f"{criterion}: GPT={gpt_scores[criterion]:.1f}, Claude={claude_scores[criterion]:.1f}"
            )
    
    all_recommendations = list(set(gpt_eval.recommendations + claude_eval.recommendations))
    
    return {
        "consensus_score": round(consensus_score, 2),
        "divergence_areas": divergence_areas,
        "final_recommendations": all_recommendations,
        "score_differences": score_differences
    }

async def finalize_research_evaluation(query: str, research_report: str) -> UniversalEvaluation:
    print("🤖 Starting GPT-4 evaluation...")
    gpt_eval = await evaluate_with_gpt(query, research_report)
    
    print("🧠 Starting Claude evaluation...")
    claude_eval = await evaluate_with_claude(query, research_report)
    
    print("🔄 Building consensus...")
    consensus = calculate_consensus(gpt_eval, claude_eval)
    
    summary = f"""## Dual-Model Evaluation Summary

**Query**: {query}

### Overall Scores:
- GPT-4 Overall Score: {gpt_eval.overall_score}/10
- Claude Overall Score: {claude_eval.overall_score}/10
- Consensus Score: {consensus['consensus_score']}/10

### Key Findings:
- **Areas of Agreement**: {5 - len(consensus['divergence_areas'])}/5 criteria
- **Areas of Disagreement**: {len(consensus['divergence_areas'])} criteria with significant divergence

### Confidence Levels:
- GPT-4: {gpt_eval.confidence_level}
- Claude: {claude_eval.confidence_level}
"""
    
    return UniversalEvaluation(
        query=query,
        content_type="research_report",
        gpt_evaluation=gpt_eval,
        claude_evaluation=claude_eval,
        consensus_score=consensus['consensus_score'],
        final_recommendations=consensus['final_recommendations'],
        evaluation_summary=summary
    )

# ============================================
# COMBINED SEARCH ORCHESTRATOR
# ============================================

async def execute_combined_deep_search(query: str, include_explorer=True, include_agentic=True, include_evaluation=True) -> tuple:
    print(f"🚀 Starting Combined Deep Search: {query}")
    
    results = {
        "query": query,
        "timestamp": datetime.now().isoformat(),
        "search_methods": [],
        "type": "combined_deep_search"
    }
    
    # Explorer Search
    if include_explorer:
        print("🔍 Phase 1: Explorer Search...")
        results["search_methods"].append("explorer")
        
        dd_urls = fetch_duckduckgo_links(query, 8)
        brave_urls = fetch_brave_links(query, 8)
        all_urls = list(set(dd_urls + brave_urls))
        
        print(f"   📊 Found {len(dd_urls)} DuckDuckGo + {len(brave_urls)} Brave URLs")
        
        scraped_data = scrape_pages_text(all_urls[:6])
        
        results["explorer_results"] = []
        for item in scraped_data:
            item["type"] = "scraped_content"
            results["explorer_results"].append(item)
        
        print(f"   ✅ Scraped {len(scraped_data)} pages")
    
    # Agentic Deep Search
    if include_agentic:
        print("🧠 Phase 2: Agentic Deep Search...")
        results["search_methods"].append("agentic")
        
        agentic_results = await execute_agentic_deep_search(query)
        results["agentic_results"] = agentic_results
        
        print(f"   ✅ Completed agentic research with {agentic_results['methodology']['subqueries_executed']} subqueries")
    
    # Dual-Model Evaluation
    if include_evaluation and include_agentic and "agentic_results" in results:
        print("📊 Phase 3: Dual-Model Evaluation...")
        
        report_text = results["agentic_results"]["final_report"]["detailed_analysis"]
        evaluation = await finalize_research_evaluation(query, report_text)
        
        results["evaluation"] = evaluation.dict()
        results["consensus_score"] = evaluation.consensus_score
        results["quality_approved"] = evaluation.consensus_score >= 7.5
        
        print(f"   📈 Consensus Score: {evaluation.consensus_score}/10")
        print(f"   ✅ Quality: {'APPROVED' if results['quality_approved'] else 'NEEDS IMPROVEMENT'}")
    
    # Create summary
    summary_parts = []
    if include_explorer:
        explorer_count = len(results.get('explorer_results', []))
        summary_parts.append(f"Explorer: {explorer_count} sources")
    
    if include_agentic:
        methodology = results.get('agentic_results', {}).get('methodology', {})
        subqueries = methodology.get('subqueries_executed', 0)
        evidence = methodology.get('total_evidence_pieces', 0)
        summary_parts.append(f"Agentic: {subqueries} subqueries, {evidence} evidence pieces")
    
    if include_evaluation:
        score = results.get('consensus_score', 0)
        summary_parts.append(f"Evaluation: {score}/10")
    
    results["combined_summary"] = " | ".join(summary_parts)
    
    # Save results
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f"combined_deep_search_{timestamp}.json"
    filepath = os.path.join(DATA_DIR, filename)
    
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2)
    
    print(f"💾 Complete results saved to: {filename}")
    
    return results, filepath

# Main research function
async def run_research(query: str):
    results, filepath = await execute_combined_deep_search(query, True, True, True)
    
    print(f"\n{'='*60}")
    print("📊 RESEARCH SUMMARY")
    print(f"{'='*60}")
    print(f"🎯 Query: {query}")
    print(f"🔧 Methods: {', '.join(results['search_methods'])}")
    
    if 'evaluation' in results:
        eval_data = results['evaluation']
        print(f"📈 GPT-4 Score: {eval_data['gpt_evaluation']['overall_score']}/10")
        print(f"🧠 Claude Score: {eval_data['claude_evaluation']['overall_score']}/10")
        print(f"🎯 Consensus: {results['consensus_score']}/10")
        print(f"✅ Quality: {'APPROVED' if results['quality_approved'] else 'NEEDS IMPROVEMENT'}")
    
    print(f"💾 Saved to: {filepath}")
    
    return {
        "report": type('obj', (object,), {
            "short_summary": results["agentic_results"]["final_report"]["executive_summary"],
            "markdown_report": results["agentic_results"]["final_report"]["detailed_analysis"],
            "follow_up_questions": results["agentic_results"]["final_report"]["follow_up_questions"]
        })(),
        "evaluation": type('obj', (object,), {
            "gpt_evaluation": type('obj', (object,), {
                "overall_score": results["evaluation"]["gpt_evaluation"]["overall_score"],
                "criteria_scores": type('obj', (object,), results["evaluation"]["gpt_evaluation"]["criteria_scores"])()
            })(),
            "claude_evaluation": type('obj', (object,), {
                "overall_score": results["evaluation"]["claude_evaluation"]["overall_score"],
                "criteria_scores": type('obj', (object,), results["evaluation"]["claude_evaluation"]["criteria_scores"])()
            })(),
            "consensus_score": results["consensus_score"],
            "final_recommendations": results["evaluation"]["final_recommendations"]
        })(),
        "quality_approved": results["quality_approved"],
        "filepath": filepath
    }

# ============================================
# FILE MANAGEMENT UTILITIES
# ============================================

def list_available_json_files():
    files = []
    if os.path.exists(DATA_DIR):
        for filename in os.listdir(DATA_DIR):
            if filename.endswith('.json'):
                filepath = os.path.join(DATA_DIR, filename)
                try:
                    with open(filepath, 'r') as f:
                        data = json.load(f)
                    
                    stat = os.stat(filepath)
                    
                    files.append({
                        'filename': filename,
                        'filepath': filepath,
                        'query': data.get('query', 'Unknown'),
                        'type': data.get('type', 'unknown'),
                        'timestamp': data.get('timestamp', 'Unknown'),
                        'size': stat.st_size,
                        'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
                        'has_evaluation': 'evaluation' in data or 'consensus_score' in data
                    })
                except:
                    files.append({
                        'filename': filename,
                        'filepath': filepath,
                        'query': 'Error reading file',
                        'type': 'error',
                        'timestamp': 'Unknown',
                        'size': os.path.getsize(filepath),
                        'modified': datetime.fromtimestamp(os.path.getmtime(filepath)).isoformat(),
                        'has_evaluation': False
                    })
    
    files.sort(key=lambda x: x['modified'], reverse=True)
    return files

def display_research_summary():
    files = list_available_json_files()
    
    if not files:
        print("📂 No research files found")
        return
    
    print(f"📊 Research Files Summary ({len(files)} total)")
    print("="*60)
    
    by_type = {}
    total_size = 0
    
    for file_info in files:
        file_type = file_info['type']
        if file_type not in by_type:
            by_type[file_type] = []
        by_type[file_type].append(file_info)
        total_size += file_info['size']
    
    for file_type, type_files in by_type.items():
        print(f"🔧 {file_type}: {len(type_files)} files")
        for file_info in type_files[:3]:
            status = "📊 Evaluated" if file_info['has_evaluation'] else "📄 Raw"
            print(f"   {status} {file_info['filename']}: {file_info['query'][:40]}...")
        if len(type_files) > 3:
            print(f"   ... and {len(type_files) - 3} more")
        print()
    
    print(f"💾 Total storage: {total_size/1024:.1f} KB")

# ============================================
# GRADIO INTERFACE
# ============================================

def run_explorer_search_interface(query: str):
    if not query.strip():
        return "❌ Please enter a search query", "", "", ""
    
    try:
        print(f"🔍 Starting Explorer Search for: {query}")
        all_data = []
        
        print("🦆 Fetching DuckDuckGo results...")
        dd_urls = fetch_duckduckgo_links(query, 5)
        if dd_urls:
            dd_scraped = scrape_pages_text(dd_urls[:3])
            for entry in dd_scraped:
                entry["source"] = "duckduckgo"
                entry["type"] = "scraped_content"
                all_data.append(entry)
        
        print("🛡️ Fetching Brave results...")
        brave_urls = fetch_brave_links(query, 5)
        if brave_urls:
            brave_scraped = scrape_pages_text(brave_urls[:3])
            for entry in brave_scraped:
                entry["source"] = "brave"
                entry["type"] = "scraped_content"
                all_data.append(entry)
        
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f"explorer_{timestamp}.json"
        filepath = os.path.join(DATA_DIR, filename)
        
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump({
                "query": query,
                "timestamp": timestamp,
                "type": "explorer_only",
                "results": all_data
            }, f, indent=2)
        
        progress = f"✅ Explorer search completed!\n📊 Found {len(all_data)} results\n💾 Saved: {filename}"
        summary = f"DuckDuckGo: {len(dd_urls)} URLs | Brave: {len(brave_urls)} URLs | Scraped: {len(all_data)} pages"
        
        report = "# 🔍 Explorer Search Results\n\n"
        for i, item in enumerate(all_data[:5], 1):
            if not item.get("error"):
                report += f"## Source {i}: {item['source'].title()}\n"
                report += f"**URL**: {item.get('url', 'Unknown')}\n\n"
                report += f"{item.get('text', 'No content')[:500]}...\n\n---\n\n"
        
        return progress, summary, report, filepath
        
    except Exception as e:
        error_msg = f"❌ Explorer search failed: {str(e)}"
        return error_msg, "", "", ""

def run_complete_research_interface(query: str):
    if not query.strip():
        return "❌ Please enter a search query", "", "", ""
    
    try:
        async def async_research():
            return await run_research(query)
        
        loop = asyncio.get_event_loop()
        if loop.is_running():
            with concurrent.futures.ThreadPoolExecutor() as executor:
                future = executor.submit(asyncio.run, async_research())
                results = future.result(timeout=300)
        else:
            results = asyncio.run(async_research())
        
        if results:
            progress = f"""✅ Complete research finished!
🔍 Query: {query}
📊 GPT-4 Score: {results['evaluation'].gpt_evaluation.overall_score}/10
🧠 Claude Score: {results['evaluation'].claude_evaluation.overall_score}/10
🎯 Consensus: {results['evaluation'].consensus_score}/10
✅ Quality: {'APPROVED' if results['quality_approved'] else 'NEEDS IMPROVEMENT'}
💾 Saved: {os.path.basename(results['filepath'])}"""
            
            summary = results['report'].short_summary
            
            report = f"""# 🔬 Complete Research Report

## 📖 Research Findings
{results['report'].markdown_report}

## 📊 Dual-Model Evaluation Results

### Overall Scores
- **GPT-4 Evaluation**: {results['evaluation'].gpt_evaluation.overall_score}/10
- **Claude Evaluation**: {results['evaluation'].claude_evaluation.overall_score}/10  
- **Consensus Score**: {results['evaluation'].consensus_score}/10

### Quality Assessment
**Status**: {'🟢 APPROVED' if results['quality_approved'] else '🔴 NEEDS IMPROVEMENT'}

### Key Recommendations
{chr(10).join(f"• {rec}" for rec in results['evaluation'].final_recommendations[:5])}

### Follow-up Questions
{chr(10).join(f"• {q}" for q in results['report'].follow_up_questions)}
"""
            
            return progress, summary, report, results['filepath']
        else:
            return "❌ Research failed - no results returned", "", "", ""
            
    except Exception as e:
        error_msg = f"❌ Research failed: {str(e)}\n\n💡 Make sure all requirements are installed"
        return error_msg, "", "", ""

def get_available_files():
    try:
        files = list_available_json_files()
        if not files:
            return [("No files found", "")]
        
        options = []
        for file_info in files:
            query_preview = file_info['query'][:35]
            if len(file_info['query']) > 35:
                query_preview += '...'
            
            file_type = file_info['type']
            if file_info['has_evaluation']:
                file_type += " (evaluated)"
            
            display_name = f"📄 {file_info['filename']} | {query_preview} | {file_type}"
            options.append((display_name, file_info['filepath']))
        
        return options
    except Exception as e:
        return [(f"Error: {str(e)}", "")]

def format_file_table():
    try:
        files = list_available_json_files()
        if not files:
            return "<p>📂 No research files found</p>"
        
        table_data = []
        for f in files[:10]:
            size_kb = f['size'] / 1024
            modified_date = f['modified'][:16].replace('T', ' ')
            query_short = (f['query'][:30] + "...") if len(f['query']) > 30 else f['query']
            
            file_type = f['type']
            if f['has_evaluation']:
                file_type += " ✓"
            
            table_data.append([
                f"📄 {f['filename']}",
                query_short,
                f"🔧 {file_type}",
                f"💾 {size_kb:.1f} KB",
                f"📅 {modified_date}"
            ])
        
        df = pd.DataFrame(table_data, columns=[
            "File", "Query", "Type", "Size", "Modified"
        ])
        
        html_table = df.to_html(
            index=False, 
            classes="table table-striped table-hover",
            escape=False
        )
        
        return f"""
        <div style="max-height: 400px; overflow-y: auto; border: 1px solid #ddd; border-radius: 5px; padding: 10px;">
            {html_table}
        </div>
        <p style="text-align: center; color: #666; margin-top: 10px;">
            📊 Latest 10 files | Total: {len(files)} files
        </p>
        """
        
    except Exception as e:
        return f"<p>❌ Error loading file table: {str(e)}</p>"

# Custom CSS
custom_css = """
.gradio-container {
    max-width: 1400px !important;
    margin: auto;
}
.tab-nav button {
    font-size: 14px !important;
    padding: 10px 16px !important;
}
"""

# Create Gradio interface
with gr.Blocks(title="Deep Search Advanced", theme=gr.themes.Soft(), css=custom_css) as demo:
    
    gr.Markdown("""
    # 🔬 Deep Search Advanced System
    **Multi-Engine Search • Agentic AI Research • Dual-Model Evaluation**
    
    *Complete research solution with Explorer scraping, AI agents, and quality assessment*
    """)
    
    with gr.Tabs():
        
        # Tab 1: Complete Research
        with gr.TabItem("🚀 Complete Research"):
            gr.Markdown("### Full Research Pipeline with Dual-Model Evaluation")
            
            with gr.Row():
                with gr.Column(scale=2):
                    research_query = gr.Textbox(
                        label="🔬 Research Query", 
                        placeholder="Enter your research question for complete analysis...",
                        lines=3
                    )
                    research_btn = gr.Button("🚀 Start Complete Research", variant="primary", size="lg")
                
                with gr.Column(scale=3):
                    research_progress = gr.Textbox(label="📋 Progress & Results", lines=8, interactive=False)
            
            with gr.Row():
                research_summary = gr.Textbox(label="📊 Executive Summary", lines=4, interactive=False)
                research_file = gr.Textbox(label="💾 Saved File", lines=2, interactive=False)
            
            research_report = gr.Textbox(label="📖 Complete Research Report", lines=20, max_lines=25, interactive=False)
            
            research_btn.click(
                run_complete_research_interface,
                inputs=research_query,
                outputs=[research_progress, research_summary, research_report, research_file]
            )
        
        # Tab 2: Explorer Search
        with gr.TabItem("🔍 Explorer Search"):
            gr.Markdown("### Multi-Engine Web Scraping (DuckDuckGo + Brave)")
            
            with gr.Row():
                with gr.Column(scale=2):
                    explorer_query = gr.Textbox(
                        label="🔍 Search Query", 
                        placeholder="Enter search query for web scraping...",
                        lines=2
                    )
                    explorer_btn = gr.Button("🔍 Start Explorer Search", variant="primary", size="lg")
                
                with gr.Column(scale=3):
                    explorer_progress = gr.Textbox(label="📋 Progress Log", lines=6, interactive=False)
            
            with gr.Row():
                explorer_summary = gr.Textbox(label="📊 Summary", lines=4, interactive=False)
                explorer_file = gr.Textbox(label="💾 Saved File", lines=2, interactive=False)
            
            explorer_report = gr.Textbox(label="📖 Search Results", lines=15, interactive=False)
            
            explorer_btn.click(
                run_explorer_search_interface,
                inputs=explorer_query,
                outputs=[explorer_progress, explorer_summary, explorer_report, explorer_file]
            )
        
        # Tab 3: File Manager
        with gr.TabItem("📁 Research Files"):
            gr.Markdown("### Research History & File Management")
            
            file_table_output = gr.HTML(format_file_table())
            
            with gr.Row():
                refresh_table_btn = gr.Button("🔄 Refresh Table", variant="secondary")
                gr.Markdown("*Shows latest research files with evaluation indicators*")
            
            refresh_table_btn.click(
                lambda: gr.HTML.update(value=format_file_table()),
                outputs=file_table_output
            )
    
    # Footer
    gr.Markdown("""
    ---
    **💡 System Guide:**
    
    1. **Complete Research**: Full pipeline with web search, AI analysis, and dual-model evaluation
    2. **Explorer Search**: Raw multi-engine web scraping for data collection  
    3. **Research Files**: Browse and manage your research history
    
    **🎯 Quality Threshold**: Research with consensus score ≥7.5/10 is marked as approved
    **🔧 Technologies**: DuckDuckGo, Brave Search, GPT-4, Claude, Multi-Agent AI
    """)

# ============================================
# LAUNCH & TEST FUNCTIONS
# ============================================

async def test_complete_system():
    print("🚀 Testing Complete System...")
    
    # Test explorer
    print("🧪 Testing Explorer Search...")
    query = "latest AI developments 2025"
    dd_urls = fetch_duckduckgo_links(query, 3)
    brave_urls = fetch_brave_links(query, 3)
    
    print(f"🦆 DuckDuckGo: {len(dd_urls)} URLs")
    print(f"🛡️ Brave: {len(brave_urls)} URLs")
    
    if dd_urls or brave_urls:
        print("✅ Explorer search working!")
    else:
        print("❌ Explorer search issues")
    
    # Test agentic
    print("🧪 Testing Agentic Search...")
    try:
        query = "AI agent frameworks comparison"
        results = await execute_agentic_deep_search(query)
        
        print(f"✅ Agentic search completed!")
        print(f"📊 Subqueries: {results['methodology']['subqueries_executed']}")
        print(f"📚 Evidence: {results['methodology']['total_evidence_pieces']}")
    except Exception as e:
        print(f"❌ Agentic search failed: {e}")
    
    # Test file management
    files = list_available_json_files()
    print(f"📁 Found {len(files)} existing files")
    
    print("✅ System test completed!")

def show_system_status():
    print("🔬 Deep Search Advanced System Status")
    print("="*50)
    print(f"📁 Workspace: {WORKSPACE_DIR}")
    print(f"💾 Data Dir: {DATA_DIR}")
    print(f"📊 Results Dir: {RESULTS_DIR}")
    print(f"🤖 LLM Status: {'Available' if LLM_AVAILABLE else 'Mock Mode'}")
    
    files = list_available_json_files()
    print(f"📄 Research Files: {len(files)}")
    
    by_type = {}
    for file_info in files:
        file_type = file_info['type']
        by_type[file_type] = by_type.get(file_type, 0) + 1
    
    for file_type, count in by_type.items():
        print(f"  🔧 {file_type}: {count}")
    
    print("\n🚀 Ready to launch!")

def launch_gradio_interface():
    print("🌐 Launching Gradio Interface...")
    demo.launch(
        share=False,      # Keep localhost only
        server_port=7860, # Default port
        show_error=True,  # Show errors
        debug=True,       # Debug mode
        inbrowser=True    # Open browser automatically
    )

# Show system status
show_system_status()

print("\n💡 Available Commands:")
print("  • await test_complete_system() - Test all functionality")
print("  • await run_research('your query') - Complete research")
print("  • display_research_summary() - Show file summary")
print("  • launch_gradio_interface() - Launch web interface")
print("  • demo.launch() - Quick launch Gradio")

print("\n✅ Deep Search Advanced System Ready!")
print("🚀 To start: Run launch_gradio_interface() or demo.launch()")


✅ LLM clients initialized
✅ Deep Search Advanced System initialized!
🔬 Deep Search Advanced System Status
📁 Workspace: d:\Workspace\LLMs_projects\agents\DeepSearch\workspace
💾 Data Dir: d:\Workspace\LLMs_projects\agents\DeepSearch\workspace\data
📊 Results Dir: d:\Workspace\LLMs_projects\agents\DeepSearch\workspace\results
🤖 LLM Status: Available
📄 Research Files: 23
  🔧 unknown: 10
  🔧 explorer_only: 3
  🔧 combined_deep_search: 9
  🔧 combined: 1

🚀 Ready to launch!

💡 Available Commands:
  • await test_complete_system() - Test all functionality
  • await run_research('your query') - Complete research
  • display_research_summary() - Show file summary
  • launch_gradio_interface() - Launch web interface
  • demo.launch() - Quick launch Gradio

✅ Deep Search Advanced System Ready!
🚀 To start: Run launch_gradio_interface() or demo.launch()


In [23]:
demo.launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




🚀 Starting Combined Deep Search: To help Halim to create similar box sets in the future, we would like to create a Python algorithm that suggests a possible solution for any given set of jars.

We will use a list to store the collection of jars available. For instance, using the current set of 10 jars, our list would be as follows:
jars = [150,20,20,10,80,130,110,90,100,40]

The aim of this challenge is to use a backtracking / recursive algorithm to work out a possible solution for this puzzle by creating three lists of jars, using the values from the above jars list to create 3 box sets of exactly 250g each.
🔍 Phase 1: Explorer Search...
   📊 Found 0 DuckDuckGo + 7 Brave URLs
Scraping: https://brave.com/download/?mtm_source=brave-search&mtm_medium=searchfooter&mtm_campaign=brave-search&mtm_content=evergreen
Scraping: https://status.brave.app/
Scraping: https://talk.brave.com/?mtm_source=brave-search&mtm_medium=searchfooter&mtm_campaign=brave-search&mtm_content=evergreen
Scraping: http

Error getting response: Error code: 500 - {'error': {'message': 'The server had an error processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if you keep seeing this error. (Please include the request ID req_efb58d95191bdff246b2f8c3be26557a in your email.)', 'type': 'server_error', 'param': None, 'code': None}}. (request_id: req_efb58d95191bdff246b2f8c3be26557a)


🚀 Starting Combined Deep Search: To help Halim to create similar box sets in the future, we would like to create a Python algorithm that suggests a possible solution for any given set of jars.

We will use a list to store the collection of jars available. For instance, using the current set of 10 jars, our list would be as follows:
jars = [150,20,20,10,80,130,110,90,100,40]

The aim of this challenge is to use a backtracking / recursive algorithm to work out a possible solution for this puzzle by creating three lists of jars, using the values from the above jars list to create 3 box sets of exactly 250g each.
🔍 Phase 1: Explorer Search...
   📊 Found 0 DuckDuckGo + 7 Brave URLs
Scraping: https://brave.com/download/?mtm_source=brave-search&mtm_medium=searchfooter&mtm_campaign=brave-search&mtm_content=evergreen
Scraping: https://status.brave.app/
Scraping: https://talk.brave.com/?mtm_source=brave-search&mtm_medium=searchfooter&mtm_campaign=brave-search&mtm_content=evergreen
Scraping: http

C:\Users\deepa\AppData\Local\Temp\ipykernel_18480\2974044853.py:395: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  "research_plan": research_plan.dict(),
C:\Users\deepa\AppData\Local\Temp\ipykernel_18480\2974044853.py:396: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  "findings": [finding.dict() for finding in research_findings],
C:\Users\deepa\AppData\Local\Temp\ipykernel_18480\2974044853.py:397: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  "final_report": final_report.dict(),


🧠 Starting Claude evaluation...
🔄 Building consensus...
   📈 Consensus Score: 7.8/10
   ✅ Quality: APPROVED
💾 Complete results saved to: combined_deep_search_20250818_194405.json

📊 RESEARCH SUMMARY
🎯 Query: To help Halim to create similar box sets in the future, we would like to create a Python algorithm that suggests a possible solution for any given set of jars.

We will use a list to store the collection of jars available. For instance, using the current set of 10 jars, our list would be as follows:
jars = [150,20,20,10,80,130,110,90,100,40]

The aim of this challenge is to use a backtracking / recursive algorithm to work out a possible solution for this puzzle by creating three lists of jars, using the values from the above jars list to create 3 box sets of exactly 250g each.
🔧 Methods: explorer, agentic
📈 GPT-4 Score: 7.8/10
🧠 Claude Score: 5.6/10
🎯 Consensus: 7.8/10
✅ Quality: APPROVED
💾 Saved to: d:\Workspace\LLMs_projects\agents\DeepSearch\workspace\data\combined_deep_search_2

C:\Users\deepa\AppData\Local\Temp\ipykernel_18480\2974044853.py:678: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  results["evaluation"] = evaluation.dict()


In [None]:
# ============================================
# CELL 1: IMPORTS AND ENVIRONMENT SETUP
# ============================================

import os
import sys
import json
import time
import asyncio
import random
import urllib.request
import urllib.parse
from datetime import datetime
from typing import Dict, List, Optional, Any
from html.parser import HTMLParser

# AI Agent imports
from agents import Agent, WebSearchTool, trace, Runner, gen_trace_id, function_tool
from agents.model_settings import ModelSettings
from pydantic import BaseModel, Field
from dotenv import load_dotenv

# LLM clients for evaluation
from openai import OpenAI
from anthropic import Anthropic

# UI and display imports
import gradio as gr
import pandas as pd
from IPython.display import display, Markdown, HTML

# Initialize environment
load_dotenv(override=True)
openai_client = OpenAI()
claude_client = Anthropic()

# Setup workspace directories
BASE_DIR = os.getcwd()
WORKSPACE_DIR = os.path.join(BASE_DIR, "workspace")
RESULTS_DIR = os.path.join(WORKSPACE_DIR, "results")
DATA_DIR = os.path.join(WORKSPACE_DIR, "data")

for dir_path in [WORKSPACE_DIR, RESULTS_DIR, DATA_DIR]:
    os.makedirs(dir_path, exist_ok=True)

print("✅ Environment and directories initialized!")

# ============================================
# CELL 2: CONFIGURATION AND PYDANTIC MODELS
# ============================================

# ===== USER CONFIGURABLE SETTINGS =====
# Modify these variables based on your requirements

# Web scraping configuration
MAX_LINKS_TO_EXTRACT = 20      # Maximum links to extract from search results
MAX_URLS_TO_SCRAPE = 5         # Maximum URLs to scrape content from
MAX_TEXT_LENGTH = 3000         # Maximum text length per scraped page
SCRAPING_TIMEOUT = 15          # Timeout in seconds for web scraping

# Agentic search configuration  
MAX_STRATEGIC_SEARCHES = 3     # Number of strategic searches to plan
SEARCH_CONTEXT_SIZE = "medium" # "small", "medium", "large" - affects search depth
MAX_SEARCH_RESULTS = 10        # Maximum results per search
REPORT_MIN_LENGTH = 1000       # Minimum report length in words

# Deep search configuration
ENABLE_DEEP_SEARCH = True      # Enable/disable deep search mode
DEEP_SEARCH_ITERATIONS = 5     # Number of deep search iterations
DEEP_SEARCH_REFINEMENT = True  # Enable search refinement

# User agents for web scraping
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
]

# Pydantic Models for structured data
class WebSearchItem(BaseModel):
    reason: str = Field(description="Reasoning for this search")
    query: str = Field(description="Search term to use")

class WebSearchPlan(BaseModel):
    searches: List[WebSearchItem] = Field(description="List of strategic searches")

class ReportData(BaseModel):
    short_summary: str = Field(description="Short summary")
    markdown_report: str = Field(description="Detailed report")
    follow_up_questions: List[str] = Field(description="Follow-up questions")

class CombinedSearchResults(BaseModel):
    query: str
    timestamp: str
    explorer_results: List[Dict] = Field(description="Raw scraping results")
    agentic_results: ReportData = Field(description="AI-generated report")
    combined_summary: str = Field(description="Summary of both approaches")

# Evaluation Models
class EvaluationCriteria(BaseModel):
    accuracy_score: float = Field(description="Factual correctness (0-10)")
    completeness_score: float = Field(description="Comprehensive coverage (0-10)")
    relevance_score: float = Field(description="Query relevance (0-10)")
    clarity_score: float = Field(description="Organization and clarity (0-10)")
    depth_score: float = Field(description="Insight and analysis depth (0-10)")

class DetailedEvaluation(BaseModel):
    criteria_scores: EvaluationCriteria
    overall_score: float
    strengths: List[str]
    weaknesses: List[str]
    missing_aspects: List[str]
    recommendations: List[str]
    confidence_level: str

class UniversalEvaluation(BaseModel):
    query: str
    content_type: str  # "explorer", "agentic", "combined"
    gpt_evaluation: DetailedEvaluation
    claude_evaluation: DetailedEvaluation
    consensus_score: float
    final_recommendations: List[str]
    evaluation_summary: str

print("✅ Configuration and models loaded!")

# ============================================
# CELL 3: HTML PARSING UTILITIES
# ============================================

class LinkExtractor(HTMLParser):
    """Custom HTML parser to extract links from web pages"""
    def __init__(self):
        super().__init__()
        self.links = []
    
    def handle_starttag(self, tag, attrs):
        if tag == 'a':
            for attr_name, attr_value in attrs:
                if attr_name == 'href' and attr_value and attr_value.startswith('http'):
                    self.links.append(attr_value)

def simple_html_parse(html_content, max_links=None):
    """Simple HTML parser to extract links using built-in HTMLParser"""
    if max_links is None:
        max_links = MAX_LINKS_TO_EXTRACT
        
    parser = LinkExtractor()
    try:
        parser.feed(html_content)
        return parser.links[:max_links]
    except Exception as e:
        print(f"Error parsing HTML: {e}")
        return []

print("✅ HTML parsing utilities loaded!")

# ============================================
# CELL 4: GOOGLE SEARCH FUNCTIONS (DISABLED)
# ============================================

# Google API Configuration (commented out to avoid costs)
# GOOGLE_API_URL = "https://google-search-master-mega.p.rapidapi.com/search"
# GOOGLE_HEADERS = {
#     "x-rapidapi-key": "your-api-key-here",
#     "x-rapidapi-host": "google-search-master-mega.p.rapidapi.com"
# }

# def fetch_google_results(query=""):
#     """
#     Fetch search results from Google via RapidAPI using urllib.
#     CURRENTLY DISABLED - Uncomment to re-enable Google search
#     """
#     params = {
#         "q": query,
#         "gl": "us",
#         "hl": "en", 
#         "num": "10",
#         "page": "1"
#     }
#     
#     url = GOOGLE_API_URL + "?" + urllib.parse.urlencode(params)
#     
#     try:
#         req = urllib.request.Request(url)
#         for key, value in GOOGLE_HEADERS.items():
#             req.add_header(key, value)
#         
#         with urllib.request.urlopen(req, timeout=10) as response:
#             data = response.read().decode('utf-8')
#             return json.loads(data)
#     
#     except Exception as e:
#         print(f"Error fetching Google results: {e}")
#         return {}

print("🚫 Google search functions disabled (uncomment to enable)")

# ============================================
# CELL 5: DUCKDUCKGO SEARCH FUNCTIONS
# ============================================

def fetch_duckduckgo_links(query, results=None):
    """
    Fetch search results from DuckDuckGo using direct HTML scraping.
    
    This function:
    1. Builds DuckDuckGo search URL with query parameters
    2. Sends HTTP request with random User-Agent to avoid blocking
    3. Parses HTML response to extract result links
    4. Filters out DuckDuckGo internal links
    5. Returns clean list of external URLs
    """
    if results is None:
        results = MAX_SEARCH_RESULTS
        
    endpoint = "https://duckduckgo.com/html/"
    params = {"q": query}
    url = endpoint + "?" + urllib.parse.urlencode(params)
    
    try:
        # Create request with random user agent to avoid detection
        req = urllib.request.Request(url)
        req.add_header('User-Agent', random.choice(USER_AGENTS))
        
        # Fetch HTML content
        with urllib.request.urlopen(req, timeout=10) as response:
            html_content = response.read().decode('utf-8')
        
        # Extract links from HTML
        links = simple_html_parse(html_content, results)
        
        # Filter out DuckDuckGo internal links
        clean_links = [link for link in links if 'duckduckgo.com' not in link]
        return clean_links[:results]
        
    except Exception as e:
        print(f"Error fetching DuckDuckGo results: {e}")
        return []

print("✅ DuckDuckGo search functions loaded!")

# ============================================
# CELL 6: BRAVE SEARCH FUNCTIONS
# ============================================

def fetch_brave_links(query, results=None):
    """
    Fetch search results from Brave Search using direct HTML scraping.
    
    Similar to DuckDuckGo but targets Brave Search engine.
    """
    if results is None:
        results = MAX_SEARCH_RESULTS
        
    endpoint = "https://search.brave.com/search"
    params = {"q": query}
    url = endpoint + "?" + urllib.parse.urlencode(params)
    
    try:
        req = urllib.request.Request(url)
        req.add_header('User-Agent', random.choice(USER_AGENTS))
        req.add_header('Accept-Encoding', 'identity')
        
        with urllib.request.urlopen(req, timeout=10) as response:
            html_content = response.read().decode('utf-8')
        
        links = simple_html_parse(html_content, results)
        clean_links = [link for link in links if 'search.brave.com' not in link]
        return clean_links[:results]
        
    except Exception as e:
        print(f"Error fetching Brave results: {e}")
        return []

print("✅ Brave search functions loaded!")

# ============================================
# CELL 7: WEB SCRAPING FUNCTIONS
# ============================================

def scrape_pages_text(urls, max_urls=None, source_name="Unknown"):
    """
    Extract text content from a list of URLs with detailed progress tracking.
    
    This function:
    1. Iterates through provided URLs (limited to max_urls for performance)
    2. Fetches HTML content from each URL
    3. Strips HTML tags to extract plain text
    4. Returns structured data with URL, text content, and metadata
    """
    if max_urls is None:
        max_urls = MAX_URLS_TO_SCRAPE
        
    collected = []
    
    print(f"🔗 Starting to scrape {min(len(urls), max_urls)} URLs from {source_name}:")
    
    for i, url in enumerate(urls[:max_urls], 1):
        print(f"\n📄 [{i}/{min(len(urls), max_urls)}] Scraping: {url}")
        start_time = time.time()
        
        try:
            # Create request with random user agent
            req = urllib.request.Request(url)
            req.add_header('User-Agent', random.choice(USER_AGENTS))
            
            # Fetch HTML content with timeout
            with urllib.request.urlopen(req, timeout=SCRAPING_TIMEOUT) as response:
                content_type = response.headers.get('content-type', '')
                content_length = response.headers.get('content-length', 'Unknown')
                html_content = response.read().decode('utf-8', errors='ignore')
            
            # Simple text extraction using regex
            import re
            # Remove script and style tags with content
            text = re.sub(r'<script[^>]*>.*?</script>', '', html_content, flags=re.DOTALL | re.IGNORECASE)
            text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
            # Remove all HTML tags
            text = re.sub(r'<[^>]+>', ' ', text)
            # Clean up whitespace
            text = re.sub(r'\s+', ' ', text).strip()
            
            # Calculate processing time
            processing_time = time.time() - start_time
            
            # Store structured result
            result = {
                "url": url,
                "source_engine": source_name, 
                "text": text[:MAX_TEXT_LENGTH],  # Limit text length to avoid memory issues
                "full_text_length": len(text),
                "truncated_length": len(text[:MAX_TEXT_LENGTH]),
                "scraped_at": datetime.now().isoformat(),
                "processing_time_seconds": round(processing_time, 2),
                "content_type": content_type,
                "content_length": content_length,
                "success": True,
                "error": False
            }
            
            collected.append(result)
            
            # Success message with stats
            print(f"   ✅ Success! {len(text):,} chars extracted in {processing_time:.2f}s")
            if len(text) > MAX_TEXT_LENGTH:
                print(f"   ⚠️  Text truncated from {len(text):,} to {MAX_TEXT_LENGTH:,} chars")
            
        except Exception as e:
            processing_time = time.time() - start_time
            error_msg = str(e)
            
            # Store error information for debugging
            result = {
                "url": url,
                "source_engine": source_name,
                "text": f"Error: {error_msg}",
                "full_text_length": 0,
                "truncated_length": 0,
                "scraped_at": datetime.now().isoformat(),
                "processing_time_seconds": round(processing_time, 2),
                "content_type": "error",
                "content_length": "0",
                "success": False,
                "error": True,
                "error_details": error_msg
            }
            
            collected.append(result)
            print(f"   ❌ Failed! {error_msg}")
    
    # Summary for this batch
    successful = len([item for item in collected if not item.get('error')])
    failed = len([item for item in collected if item.get('error')])
    total_chars = sum(item.get('full_text_length', 0) for item in collected if not item.get('error'))
    
    print(f"\n📊 {source_name} Scraping Summary:")
    print(f"   ✅ Successful: {successful}/{len(collected)}")
    print(f"   ❌ Failed: {failed}/{len(collected)}")
    print(f"   📄 Total text extracted: {total_chars:,} characters")
    
    return collected

print("✅ Web scraping functions loaded!")

# ============================================
# CELL 8: EXPLORER SEARCH ORCHESTRATION
# ============================================

def perform_explorer_search(query, save_results=True):
    """
    Enhanced explorer search with detailed link tracking and JSON saving.
    
    This function now only uses DuckDuckGo and Brave Search engines.
    Google search has been commented out to avoid API costs and dependencies.
    
    Search Flow:
    1. DuckDuckGo: Fetch links and scrape top results
    2. Brave Search: Fetch links and scrape top results
    3. Show detailed link information
    4. Save comprehensive JSON data
    """
    print(f"🔍 Starting Explorer Search for: {query}")
    print(f"📊 Configuration: {MAX_SEARCH_RESULTS} results, {MAX_URLS_TO_SCRAPE} URLs to scrape")
    
    all_data = []
    search_metadata = {
        "query": query,
        "timestamp": datetime.now().isoformat(),
        "search_config": {
            "max_search_results": MAX_SEARCH_RESULTS,
            "max_urls_to_scrape": MAX_URLS_TO_SCRAPE,
            "max_text_length": MAX_TEXT_LENGTH,
            "scraping_timeout": SCRAPING_TIMEOUT
        },
        "search_engines": [],
        "total_links_found": 0,
        "total_scraped": 0,
        "errors": []
    }
    
    # DuckDuckGo search (Active)
    print("\n" + "="*50)
    print("🦆 DUCKDUCKGO SEARCH")
    print("="*50)
    
    dd_urls = fetch_duckduckgo_links(query, MAX_SEARCH_RESULTS)
    if dd_urls:
        print(f"✅ Found {len(dd_urls)} DuckDuckGo links:")
        for i, url in enumerate(dd_urls, 1):
            print(f"  {i:2d}. {url}")
        
        print(f"\n🔗 Scraping top {min(len(dd_urls), MAX_URLS_TO_SCRAPE)} DuckDuckGo URLs...")
        dd_scraped = scrape_pages_text(dd_urls[:MAX_URLS_TO_SCRAPE], source_name="DuckDuckGo")
        for entry in dd_scraped:
            entry["source"] = "duckduckgo_scraped"
            entry["type"] = "scraped_content"
            all_data.append(entry)
        
        search_metadata["search_engines"].append({
            "name": "DuckDuckGo",
            "links_found": len(dd_urls),
            "links_scraped": len(dd_scraped),
            "successful_scrapes": len([item for item in dd_scraped if not item.get('error')]),
            "all_links": dd_urls
        })
        search_metadata["total_links_found"] += len(dd_urls)
        search_metadata["total_scraped"] += len(dd_scraped)
    else:
        print("❌ No DuckDuckGo links found")
        search_metadata["errors"].append("DuckDuckGo: No links found")
    
    # Brave Search (Active)
    print("\n" + "="*50)
    print("🛡️ BRAVE SEARCH")
    print("="*50)
    
    brave_urls = fetch_brave_links(query, MAX_SEARCH_RESULTS)
    if brave_urls:
        print(f"✅ Found {len(brave_urls)} Brave links:")
        for i, url in enumerate(brave_urls, 1):
            print(f"  {i:2d}. {url}")
        
        print(f"\n🔗 Scraping top {min(len(brave_urls), MAX_URLS_TO_SCRAPE)} Brave URLs...")
        brave_scraped = scrape_pages_text(brave_urls[:MAX_URLS_TO_SCRAPE], source_name="Brave")
        for entry in brave_scraped:
            entry["source"] = "brave_scraped"
            entry["type"] = "scraped_content"
            all_data.append(entry)
        
        search_metadata["search_engines"].append({
            "name": "Brave",
            "links_found": len(brave_urls),
            "links_scraped": len(brave_scraped),
            "successful_scrapes": len([item for item in brave_scraped if not item.get('error')]),
            "all_links": brave_urls
        })
        search_metadata["total_links_found"] += len(brave_urls)
        search_metadata["total_scraped"] += len(brave_scraped)
    else:
        print("❌ No Brave links found")
        search_metadata["errors"].append("Brave: No links found")
    
    # Summary
    print("\n" + "="*50)
    print("📊 EXPLORER SEARCH SUMMARY")
    print("="*50)
    print(f"🔍 Query: {query}")
    print(f"📊 Total links found: {search_metadata['total_links_found']}")
    print(f"🔗 Total pages scraped: {search_metadata['total_scraped']}")
    print(f"✅ Successful scrapes: {len([item for item in all_data if not item.get('error')])}")
    print(f"❌ Failed scrapes: {len([item for item in all_data if item.get('error')])}")
    print(f"🌐 Search engines used: {', '.join([engine['name'] for engine in search_metadata['search_engines']])}")
    
    # Create comprehensive explorer results
    explorer_results = {
        "metadata": search_metadata,
        "scraped_data": all_data,
        "summary": {
            "query": query,
            "total_sources": len(all_data),
            "successful_scrapes": len([item for item in all_data if not item.get('error')]),
            "total_text_length": sum(len(item.get('text', '')) for item in all_data if not item.get('error')),
            "search_engines_used": [engine['name'] for engine in search_metadata['search_engines']],
            "completed_at": datetime.now().isoformat()
        }
    }
    
    # Save explorer results to JSON if requested
    if save_results:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f"explorer_search_{timestamp}.json"
        filepath = os.path.join(DATA_DIR, filename)
        
        with open(filepath, "w", encoding='utf-8') as f:
            json.dump(explorer_results, f, indent=2, ensure_ascii=False)
        
        print(f"💾 Explorer results saved to: {filepath}")
        print(f"📄 File size: {os.path.getsize(filepath)} bytes")
    
    print("✅ Explorer search completed successfully!")
    return all_data, explorer_results

print("✅ Enhanced explorer search orchestration loaded!")

# ============================================
# CELL 9: AGENTIC SEARCH AGENT DEFINITIONS
# ============================================

# Agent instructions
SEARCH_INSTRUCTIONS = f"""You are a research assistant. Search the web and provide a concise 200-word summary of key findings. Focus on facts, insights, and actionable information. Use up to {MAX_SEARCH_RESULTS} search results for comprehensive coverage."""

PLANNER_INSTRUCTIONS = f"""You are a research planner. Create {MAX_STRATEGIC_SEARCHES} strategic web searches to comprehensively answer the query. Each search should target a different aspect or angle of the topic."""

WRITER_INSTRUCTIONS = f"""You are a senior researcher. Create a comprehensive, well-structured markdown report ({REPORT_MIN_LENGTH}+ words) synthesizing all research findings. Include executive summary, main findings, analysis, and conclusions. Ensure depth and actionable insights."""

# Initialize AI agents
search_agent = Agent(
    name="Search agent",
    instructions=SEARCH_INSTRUCTIONS,
    tools=[WebSearchTool(search_context_size=SEARCH_CONTEXT_SIZE)],
    model="gpt-4o-mini",
    model_settings=ModelSettings(tool_choice="required"),
)

planner_agent = Agent(
    name="PlannerAgent",
    instructions=PLANNER_INSTRUCTIONS,
    model="gpt-4o-mini",
    output_type=WebSearchPlan,
)

writer_agent = Agent(
    name="WriterAgent",
    instructions=WRITER_INSTRUCTIONS,
    model="gpt-4o-mini",
    output_type=ReportData,
)

# ============================================
# DEEP SEARCH AGENTS (NOW ENABLED!)
# ============================================

# Deep Search ChatGPT Agent
DEEP_CHATGPT_INSTRUCTIONS = f"""You are an advanced ChatGPT researcher performing deep analysis. 
Conduct {DEEP_SEARCH_ITERATIONS} iterations of research with progressive refinement. 
For each iteration:
1. Analyze previous findings
2. Identify knowledge gaps
3. Perform targeted searches
4. Synthesize insights
5. Plan next iteration

Provide comprehensive analysis with critical evaluation, multiple perspectives, and actionable recommendations."""

deep_chatgpt_agent = Agent(
    name="DeepChatGPTAgent",
    instructions=DEEP_CHATGPT_INSTRUCTIONS,
    tools=[WebSearchTool(search_context_size="large")],
    model="gpt-4o",  # Use more powerful model for deep search
    model_settings=ModelSettings(temperature=0.7, tool_choice="required"),
)

# Deep Search Claude Agent  
DEEP_CLAUDE_INSTRUCTIONS = f"""You are an advanced Claude researcher performing deep analytical research.
Execute {DEEP_SEARCH_ITERATIONS} research cycles with systematic refinement.

Research methodology:
1. Systematic information gathering
2. Critical source evaluation
3. Multi-angle analysis
4. Gap identification and targeted follow-up
5. Comprehensive synthesis

Emphasize critical thinking, source credibility, logical reasoning, and practical implications."""

# Note: This would require Claude API integration
# deep_claude_agent = Agent(
#     name="DeepClaudeAgent", 
#     instructions=DEEP_CLAUDE_INSTRUCTIONS,
#     tools=[WebSearchTool(search_context_size="large")],
#     model="claude-3-5-sonnet-20241022",  # Would need Claude API setup
#     model_settings=ModelSettings(temperature=0.6),
# )

# Comparative Deep Search Agent
COMPARATIVE_INSTRUCTIONS = f"""You are a comparative research specialist. 
Compare and contrast findings from multiple AI perspectives (ChatGPT vs Claude approaches).

Analysis framework:
1. Identify convergent findings (high confidence)
2. Highlight divergent perspectives (requires investigation)
3. Evaluate evidence quality and source reliability
4. Synthesize balanced conclusions
5. Recommend areas for further research

Provide meta-analysis of research quality and reliability."""

comparative_agent = Agent(
    name="ComparativeAgent",
    instructions=COMPARATIVE_INSTRUCTIONS,
    model="gpt-4o",
    output_type=ReportData,
)

print("✅ Agentic search agents initialized!")
print(f"📊 Configuration: {MAX_STRATEGIC_SEARCHES} searches, {SEARCH_CONTEXT_SIZE} context, {REPORT_MIN_LENGTH}+ word reports")
print("🔥 Deep search agents ENABLED and ready!")


# ============================================
# CELL 10: AGENTIC SEARCH EXECUTION
# ============================================

async def perform_agentic_search(query: str):
    """
    Enhanced agentic search pipeline.
    
    Note: This uses OpenAI Agents SDK which has its own built-in web search capabilities,
    separate from the explorer search engines above.
    """
    print(f"🤖 Starting Agentic Search for: {query}")
    
    with trace("Agentic Search Pipeline"):
        # Plan searches
        print("📋 Planning strategic searches...")
        result = await Runner.run(planner_agent, f"Query: {query}")
        search_plan = result.final_output
        
        # Execute searches
        print(f"🌐 Executing {len(search_plan.searches)} strategic searches...")
        search_tasks = []
        for item in search_plan.searches:
            input_text = f"Search term: {item.query}\nReason: {item.reason}"
            search_tasks.append(Runner.run(search_agent, input_text))
        
        search_results = await asyncio.gather(*search_tasks)
        search_summaries = [result.final_output for result in search_results]
        
        # Generate comprehensive report
        print("📝 Synthesizing comprehensive report...")
        input_text = f"Original query: {query}\nResearch findings: {search_summaries}"
        report_result = await Runner.run(writer_agent, input_text)
        
        print("✅ Agentic search completed")
        return report_result.final_output, search_plan, search_summaries

# Helper functions for agentic search components
async def plan_searches(query: str):
    """Plan strategic searches for the query"""
    result = await Runner.run(planner_agent, f"Query: {query}")
    return result.final_output

async def perform_searches(search_plan):
    """Execute planned searches"""
    search_tasks = []
    for item in search_plan.searches:
        input_text = f"Search term: {item.query}\nReason: {item.reason}"
        search_tasks.append(Runner.run(search_agent, input_text))
    
    search_results = await asyncio.gather(*search_tasks)
    return [result.final_output for result in search_results]

async def write_report(query: str, search_results):
    """Generate comprehensive report from search results"""
    input_text = f"Original query: {query}\nResearch findings: {search_results}"
    report_result = await Runner.run(writer_agent, input_text)
    return report_result.final_output

print("✅ Agentic search execution functions loaded!")

# ============================================
# CELL 10B: DEEP SEARCH EXECUTION (NOW ENABLED!)
# ============================================

async def perform_deep_chatgpt_search(query: str):
    """
    Perform deep iterative search using ChatGPT with progressive refinement
    """
    print(f"🧠 Starting Deep ChatGPT Search for: {query}")
    
    with trace("Deep ChatGPT Search Pipeline"):
        search_history = []
        refined_insights = []
        
        for iteration in range(DEEP_SEARCH_ITERATIONS):
            print(f"🔄 ChatGPT Iteration {iteration + 1}/{DEEP_SEARCH_ITERATIONS}")
            
            # Build context from previous iterations
            context = f"Query: {query}\n"
            if search_history:
                context += f"Previous findings: {search_history}\n"
            context += f"Focus for iteration {iteration + 1}: Identify gaps and explore new angles"
            
            # Perform search with context
            result = await Runner.run(deep_chatgpt_agent, context)
            search_summary = result.final_output
            
            search_history.append({
                "iteration": iteration + 1,
                "findings": search_summary,
                "timestamp": datetime.now().isoformat()
            })
            
            print(f"✓ Iteration {iteration + 1} completed")
            
            # Brief pause between iterations
            if iteration < DEEP_SEARCH_ITERATIONS - 1:
                await asyncio.sleep(2)
        
        # Synthesize final report
        print("📝 Synthesizing ChatGPT deep research findings...")
        synthesis_input = f"Deep research query: {query}\nIterative findings: {search_history}\nCreate comprehensive final report."
        final_result = await Runner.run(writer_agent, synthesis_input)
        
        return {
            "type": "deep_chatgpt",
            "query": query,
            "iterations": DEEP_SEARCH_ITERATIONS,
            "search_history": search_history,
            "final_report": final_result.final_output,
            "completed_at": datetime.now().isoformat()
        }

async def perform_deep_claude_search(query: str):
    """
    Perform deep systematic search using Claude methodology
    Note: This requires Claude API integration - currently simulated with GPT
    """
    print(f"🎭 Starting Deep Claude Search for: {query}")
    
    # Since we don't have Claude agent set up, we'll simulate with a Claude-style approach using GPT
    claude_style_instructions = f"""You are simulating Claude's analytical approach. 
    Perform systematic research with emphasis on:
    - Critical evaluation of sources
    - Logical reasoning chains  
    - Multiple perspective analysis
    - Methodical gap identification
    - Conservative confidence levels
    
    Query: {query}
    Iterations: {DEEP_SEARCH_ITERATIONS}"""
    
    with trace("Deep Claude-Style Search Pipeline"):
        analysis_phases = []
        
        for phase in range(DEEP_SEARCH_ITERATIONS):
            print(f"🔍 Claude-style Analysis Phase {phase + 1}/{DEEP_SEARCH_ITERATIONS}")
            
            phase_context = f"{claude_style_instructions}\nPhase {phase + 1}: "
            if phase == 0:
                phase_context += "Initial comprehensive search and source gathering"
            elif phase == 1:
                phase_context += "Critical evaluation and credibility assessment"
            elif phase == 2:
                phase_context += "Multi-angle analysis and perspective gathering"
            elif phase == 3:
                phase_context += "Gap identification and targeted investigation"
            else:
                phase_context += "Synthesis and final verification"
            
            # Use search agent with Claude-style prompting
            result = await Runner.run(search_agent, phase_context)
            phase_findings = result.final_output
            
            analysis_phases.append({
                "phase": phase + 1,
                "focus": phase_context.split(": ")[-1],
                "findings": phase_findings,
                "timestamp": datetime.now().isoformat()
            })
            
            print(f"✓ Phase {phase + 1} completed")
            await asyncio.sleep(1)
        
        # Final synthesis with Claude-style rigor
        print("📊 Performing Claude-style systematic synthesis...")
        synthesis_prompt = f"""Synthesize research with Claude-style analytical rigor:
        
        Query: {query}
        Research phases: {analysis_phases}
        
        Apply systematic evaluation:
        1. Evidence quality assessment
        2. Logical consistency check
        3. Confidence level assignment
        4. Alternative perspective consideration
        5. Practical implication analysis"""
        
        final_result = await Runner.run(writer_agent, synthesis_prompt)
        
        return {
            "type": "deep_claude_style", 
            "query": query,
            "phases": DEEP_SEARCH_ITERATIONS,
            "analysis_phases": analysis_phases,
            "final_report": final_result.final_output,
            "completed_at": datetime.now().isoformat()
        }

async def perform_comparative_deep_search(query: str):
    """
    Perform both ChatGPT and Claude-style deep searches, then compare findings
    """
    print(f"⚖️ Starting Comparative Deep Search for: {query}")
    
    with trace("Comparative Deep Search Pipeline"):
        # Run both deep searches in parallel
        print("🔄 Running ChatGPT and Claude-style searches in parallel...")
        chatgpt_task = asyncio.create_task(perform_deep_chatgpt_search(query))
        claude_task = asyncio.create_task(perform_deep_claude_search(query))
        
        chatgpt_results, claude_results = await asyncio.gather(chatgpt_task, claude_task)
        
        # Comparative analysis
        print("📊 Performing comparative analysis...")
        comparison_prompt = f"""Perform comparative analysis of two AI research approaches:
        
        Query: {query}
        
        ChatGPT Approach Results:
        {chatgpt_results['final_report'].markdown_report}
        
        Claude-Style Approach Results:  
        {claude_results['final_report'].markdown_report}
        
        Compare and analyze:
        1. Convergent findings (high confidence conclusions)
        2. Divergent perspectives (areas of disagreement)
        3. Methodology differences
        4. Evidence quality and source coverage
        5. Practical implications and recommendations
        6. Areas requiring further investigation
        
        Provide meta-analysis of research quality and synthesized recommendations."""
        
        comparative_result = await Runner.run(comparative_agent, comparison_prompt)
        
        return {
            "type": "comparative_deep_search",
            "query": query,
            "chatgpt_results": chatgpt_results,
            "claude_results": claude_results,
            "comparative_analysis": comparative_result.final_output,
            "total_iterations": DEEP_SEARCH_ITERATIONS * 2,
            "completed_at": datetime.now().isoformat()
        }

print("✅ Deep search execution functions loaded!")
print(f"🔧 Deep search configuration: {DEEP_SEARCH_ITERATIONS} iterations, refinement={'enabled' if DEEP_SEARCH_REFINEMENT else 'disabled'}")



# ============================================
# CELL 11: UNIVERSAL JSON EVALUATOR SYSTEM
# ============================================

def extract_content_from_json(json_data: Dict, query: str) -> Dict:
    """Universal content extractor that handles any JSON structure"""
    extracted = {
        "query": query,
        "content_type": "unknown",
        "text_content": "",
        "structured_data": {},
        "source_info": {}
    }
    
    # Detect content type and extract accordingly
    if "agentic_results" in json_data and "explorer_results" in json_data:
        # Combined results
        extracted["content_type"] = "combined"
        
        # Extract agentic content
        if "markdown_report" in json_data.get("agentic_results", {}):
            extracted["text_content"] += json_data["agentic_results"]["markdown_report"]
        
        # Extract explorer summary
        explorer_results = json_data.get("explorer_results", [])
        scraped_texts = []
        for item in explorer_results:
            if isinstance(item, dict) and "text" in item and not item.get("error"):
                scraped_texts.append(item["text"][:500])  # First 500 chars
        
        if scraped_texts:
            extracted["text_content"] += f"\n\n## Explorer Findings:\n" + "\n".join(scraped_texts)
        
        extracted["structured_data"] = {
            "agentic_summary": json_data.get("agentic_results", {}).get("short_summary", ""),
            "explorer_count": len(explorer_results),
            "sources_scraped": len([item for item in explorer_results if "url" in item])
        }
    
    elif "report" in json_data and "markdown_report" in json_data["report"]:
        # Agentic-only results
        extracted["content_type"] = "agentic"
        extracted["text_content"] = json_data["report"]["markdown_report"]
        extracted["structured_data"] = {
            "summary": json_data["report"].get("short_summary", ""),
            "follow_up_questions": json_data["report"].get("follow_up_questions", [])
        }
    
    elif isinstance(json_data, list) or "data" in json_data:
        # Explorer-only results
        extracted["content_type"] = "explorer"
        
        # Handle list format or nested data
        data_list = json_data if isinstance(json_data, list) else json_data.get("data", [])
        
        explorer_texts = []
        source_count = 0
        
        for item in data_list:
            if isinstance(item, dict):
                if "text" in item and not item.get("error", False):
                    explorer_texts.append(f"Source: {item.get('url', 'Unknown')}\n{item['text'][:800]}")
                    source_count += 1
                elif "data" in item:
                    # Handle nested structures like Google API results
                    source_count += 1
        
        extracted["text_content"] = "\n\n---\n\n".join(explorer_texts[:5])  # Limit to 5 sources
        extracted["structured_data"] = {
            "total_sources": source_count,
            "scraped_sources": len([item for item in data_list if isinstance(item, dict) and "text" in item])
        }
    
    return extracted

async def evaluate_with_gpt(content: Dict) -> DetailedEvaluation:
    """Enhanced GPT evaluation with retry logic and error handling"""
    evaluation_prompt = f"""You are an expert research evaluator. Evaluate this research content:

Query: {content['query']}
Content Type: {content['content_type']}

Research Content:
{content['text_content'][:4000]}  # Limit for token constraints

Additional Context:
{json.dumps(content['structured_data'], indent=2)}

Evaluate based on:
1. Accuracy - Factual correctness and reliability
2. Completeness - Coverage of the topic
3. Relevance - Direct relationship to the query
4. Clarity - Organization and readability  
5. Depth - Level of insight and analysis

Consider the content type when evaluating. Explorer results should be judged on breadth and source diversity, while agentic results should be judged on synthesis and insight quality."""

    max_retries = 3
    retry_delay = 2
    
    for attempt in range(max_retries):
        try:
            print(f"🤖 GPT-4 evaluation attempt {attempt + 1}/{max_retries}...")
            
            response = openai_client.beta.chat.completions.parse(
                model="gpt-4o",
                messages=[
                    {"role": "system", "content": "You are a critical research evaluator. Provide detailed, constructive assessment."},
                    {"role": "user", "content": evaluation_prompt}
                ],
                response_format=DetailedEvaluation,
                temperature=0.3
            )
            
            return response.choices[0].message.parsed
            
        except Exception as e:
            error_str = str(e)
            print(f"⚠️ GPT-4 attempt {attempt + 1} failed: {error_str}")
            
            if attempt < max_retries - 1:
                print(f"🔄 Retrying GPT-4 evaluation in {retry_delay} seconds...")
                await asyncio.sleep(retry_delay)
                retry_delay *= 2
                continue
            else:
                print("🚫 GPT-4 persistently failing, using fallback evaluation...")
                return create_fallback_gpt_evaluation(content)
    
    return create_fallback_gpt_evaluation(content)

def create_fallback_gpt_evaluation(content: Dict) -> DetailedEvaluation:
    """Create a fallback evaluation when GPT-4 API is unavailable"""
    print("🔧 Generating fallback GPT-4 evaluation...")
    
    text_length = len(content.get('text_content', ''))
    
    # Heuristic scoring
    accuracy_score = 6.5
    completeness_score = min(8.5, 5.0 + (text_length / 900))
    relevance_score = 7.5
    clarity_score = 7.0
    depth_score = min(7.5, 4.0 + (text_length / 700))
    
    overall_score = (accuracy_score + completeness_score + relevance_score + clarity_score + depth_score) / 5
    
    return DetailedEvaluation(
        criteria_scores=EvaluationCriteria(
            accuracy_score=accuracy_score,
            completeness_score=completeness_score,
            relevance_score=relevance_score,
            clarity_score=clarity_score,
            depth_score=depth_score
        ),
        overall_score=round(overall_score, 1),
        strengths=[
            "Comprehensive information gathering",
            "Well-organized content structure",
            "Good coverage of query topics"
        ],
        weaknesses=[
            "Could not verify with GPT-4 API",
            "Limited by automated evaluation",
            "May need human expert review"
        ],
        missing_aspects=[
            "Expert domain knowledge validation",
            "Advanced fact-checking",
            "Nuanced analysis capabilities"
        ],
        recommendations=[
            "Re-evaluate when API is available",
            "Consider manual expert review",
            "Validate key claims independently"
        ],
        confidence_level="Medium"
    )

async def evaluate_with_claude(content: Dict) -> DetailedEvaluation:
    """Enhanced Claude evaluation with retry logic and fallback"""
    evaluation_prompt = f"""Evaluate this research content as an expert evaluator:

Query: {content['query']}
Content Type: {content['content_type']}

Research Content:
{content['text_content'][:4000]}

Additional Context:
{json.dumps(content['structured_data'], indent=2)}

Evaluate on accuracy, completeness, relevance, clarity, and depth (0-10 each).
Adapt your evaluation criteria based on content type.

Return in JSON format:
{{
    "criteria_scores": {{
        "accuracy_score": <0-10>,
        "completeness_score": <0-10>,
        "relevance_score": <0-10>,
        "clarity_score": <0-10>,
        "depth_score": <0-10>
    }},
    "overall_score": <0-10>,
    "strengths": ["strength1", "strength2", ...],
    "weaknesses": ["weakness1", "weakness2", ...],
    "missing_aspects": ["aspect1", "aspect2", ...],
    "recommendations": ["recommendation1", "recommendation2", ...],
    "confidence_level": "High/Medium/Low"
}}"""

    max_retries = 3
    retry_delay = 2  # seconds
    
    for attempt in range(max_retries):
        try:
            print(f"🧠 Claude evaluation attempt {attempt + 1}/{max_retries}...")
            
            response = claude_client.messages.create(
                model="claude-3-5-sonnet-20241022",
                max_tokens=2000,
                temperature=0.3,
                messages=[{"role": "user", "content": evaluation_prompt}]
            )
            
            evaluation_json = json.loads(response.content[0].text)
            return DetailedEvaluation(**evaluation_json)
            
        except Exception as e:
            error_str = str(e)
            print(f"⚠️ Claude attempt {attempt + 1} failed: {error_str}")
            
            # Check if it's an overload error
            if "overloaded" in error_str.lower() or "529" in error_str:
                if attempt < max_retries - 1:
                    print(f"🔄 Claude overloaded, retrying in {retry_delay} seconds...")
                    await asyncio.sleep(retry_delay)
                    retry_delay *= 2  # Exponential backoff
                    continue
                else:
                    print("🚫 Claude persistently overloaded, using fallback evaluation...")
                    return create_fallback_claude_evaluation(content)
            else:
                # For other errors, try fallback immediately
                print(f"🚫 Claude error: {error_str}, using fallback evaluation...")
                return create_fallback_claude_evaluation(content)
    
    # If all retries failed, use fallback
    return create_fallback_claude_evaluation(content)

def create_fallback_claude_evaluation(content: Dict) -> DetailedEvaluation:
    """Create a fallback evaluation when Claude API is unavailable"""
    print("🔧 Generating fallback Claude evaluation...")
    
    # Create reasonable fallback scores based on content analysis
    text_length = len(content.get('text_content', ''))
    
    # Basic heuristic scoring
    completeness_score = min(8.0, 4.0 + (text_length / 1000))  # Longer content = more complete
    relevance_score = 7.0  # Assume reasonable relevance
    clarity_score = 6.5    # Neutral clarity score
    accuracy_score = 6.0   # Conservative accuracy score
    depth_score = min(7.0, 3.0 + (text_length / 800))
    
    overall_score = (completeness_score + relevance_score + clarity_score + accuracy_score + depth_score) / 5
    
    return DetailedEvaluation(
        criteria_scores=EvaluationCriteria(
            accuracy_score=accuracy_score,
            completeness_score=completeness_score,
            relevance_score=relevance_score,
            clarity_score=clarity_score,
            depth_score=depth_score
        ),
        overall_score=round(overall_score, 1),
        strengths=[
            "Content covers multiple aspects of the query",
            "Information appears well-structured",
            "Reasonable depth of analysis"
        ],
        weaknesses=[
            "Could not verify accuracy with Claude API",
            "May benefit from additional source validation",
            "Evaluation limited by API availability"
        ],
        missing_aspects=[
            "Real-time fact verification",
            "Cross-reference validation",
            "Expert domain analysis"
        ],
        recommendations=[
            "Verify key facts with additional sources",
            "Consider expert review for technical content",
            "Re-evaluate when Claude API is available"
        ],
        confidence_level="Medium"
    )

def calculate_consensus(gpt_eval: DetailedEvaluation, claude_eval: DetailedEvaluation) -> Dict:
    """Calculate consensus between evaluations with fallback handling"""
    gpt_scores = gpt_eval.criteria_scores.dict()
    claude_scores = claude_eval.criteria_scores.dict()
    
    score_differences = {}
    total_diff = 0
    
    for criterion, gpt_score in gpt_scores.items():
        claude_score = claude_scores[criterion]
        diff = abs(gpt_score - claude_score)
        score_differences[criterion] = diff
        total_diff += diff
    
    avg_difference = total_diff / len(score_differences)
    consensus_score = max(0, 10 - avg_difference)
    
    # Combine recommendations
    all_recommendations = list(set(gpt_eval.recommendations + claude_eval.recommendations))
    
    # Check if either evaluation was a fallback
    gpt_fallback = gpt_eval.confidence_level == "Medium" and "Could not verify with GPT-4 API" in gpt_eval.weaknesses
    claude_fallback = claude_eval.confidence_level == "Medium" and "Could not verify accuracy with Claude API" in claude_eval.weaknesses
    
    evaluation_notes = []
    if gpt_fallback:
        evaluation_notes.append("GPT-4 evaluation used fallback due to API issues")
    if claude_fallback:
        evaluation_notes.append("Claude evaluation used fallback due to API overload")
    
    return {
        "consensus_score": round(consensus_score, 2),
        "final_recommendations": all_recommendations,
        "score_differences": score_differences,
        "avg_difference": round(avg_difference, 2),
        "evaluation_notes": evaluation_notes,
        "api_fallbacks_used": gpt_fallback or claude_fallback
    }

async def universal_json_evaluator(filepath: str, query: str) -> UniversalEvaluation:
    """Universal evaluator that can process any JSON research file"""
    print(f"📊 Loading and evaluating: {filepath}")
    
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            json_data = json.load(f)
        
        # Extract content using universal extractor
        content = extract_content_from_json(json_data, query)
        print(f"🔍 Detected content type: {content['content_type']}")
        
        # Dual evaluation
        print("🤖 Running GPT-4 evaluation...")
        gpt_eval = await evaluate_with_gpt(content)
        
        print("🧠 Running Claude evaluation...")
        claude_eval = await evaluate_with_claude(content)
        
        # Build consensus
        print("🔄 Building evaluation consensus...")
        consensus = calculate_consensus(gpt_eval, claude_eval)
        
        # Create summary
        summary = f"""
# Universal Research Evaluation Report

**Query**: {query}  
**Content Type**: {content['content_type'].title()}  
**File**: {filepath}

## Evaluation Scores
- **GPT-4 Overall**: {gpt_eval.overall_score}/10
- **Claude Overall**: {claude_eval.overall_score}/10  
- **Consensus Score**: {consensus['consensus_score']}/10
- **Agreement Level**: {5 - len([d for d in consensus['score_differences'].values() if d > 2])}/5 criteria

## Content Analysis
{json.dumps(content['structured_data'], indent=2)}

## Quality Assessment
{'✅ **HIGH QUALITY** - Exceeds standards' if consensus['consensus_score'] >= 7.5 else 
 '⚠️ **MODERATE QUALITY** - Meets basic standards' if consensus['consensus_score'] >= 6.0 else
 '❌ **NEEDS IMPROVEMENT** - Below standards'}
"""
        
        return UniversalEvaluation(
            query=query,
            content_type=content['content_type'],
            gpt_evaluation=gpt_eval,
            claude_evaluation=claude_eval,
            consensus_score=consensus['consensus_score'],
            final_recommendations=consensus['final_recommendations'],
            evaluation_summary=summary
        )
        
    except Exception as e:
        raise Exception(f"Evaluation failed: {str(e)}")

print("✅ Universal JSON evaluator loaded!")

# ============================================
# API ERROR HANDLING UTILITIES
# ============================================

def get_api_status_help():
    """Get help text for API issues"""
    return """
## 🔧 API Troubleshooting Guide

### Claude API Overload (Error 529)
**What it means**: Claude's servers are temporarily overloaded
**Solutions**:
1. ⏳ **Wait and retry** - Usually resolves in 1-5 minutes
2. 🔄 **Use fallback evaluation** - System automatically provides heuristic scoring
3. 📊 **Focus on GPT-4 scores** - Still get partial evaluation
4. ⚙️ **Reduce request frequency** - Space out evaluations

### GPT-4 API Issues
**Common causes**: Rate limits, server issues, API key problems
**Solutions**:
1. 🔑 **Check API key** - Ensure valid OpenAI API key
2. 💳 **Check billing** - Ensure account has credits
3. ⏱️ **Rate limiting** - Reduce request frequency
4. 🔄 **Use fallback** - System provides alternative scoring

### When Fallback Evaluations Are Used
**Fallback scoring uses**:
- Content length analysis
- Structural assessment  
- Heuristic quality metrics
- Conservative confidence levels

**Limitations**:
- Less nuanced than AI evaluation
- Cannot verify factual accuracy
- Limited domain expertise
- Medium confidence ratings

### 💡 Best Practices
1. **Monitor API status** - Check provider status pages
2. **Use off-peak hours** - Better availability
3. **Implement delays** - Space out API calls
4. **Have fallbacks** - System handles gracefully
5. **Re-evaluate later** - Try again when APIs recover

### 🔄 Manual Re-evaluation
```python
# Re-run evaluation when APIs are available
evaluation = await universal_json_evaluator("your_file.json", "your_query")
```
"""

async def test_api_availability():
    """Test if APIs are currently available"""
    api_status = {
        "gpt4_available": False,
        "claude_available": False,
        "timestamp": datetime.now().isoformat()
    }
    
    # Test GPT-4
    try:
        test_response = openai_client.chat.completions.create(
            model="gpt-4o-mini",  # Use cheaper model for testing
            messages=[{"role": "user", "content": "Test"}],
            max_tokens=1,
            temperature=0
        )
        api_status["gpt4_available"] = True
        print("✅ GPT-4 API: Available")
    except Exception as e:
        print(f"❌ GPT-4 API: Unavailable - {str(e)[:50]}...")
    
    # Test Claude
    try:
        test_response = claude_client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1,
            messages=[{"role": "user", "content": "Test"}]
        )
        api_status["claude_available"] = True
        print("✅ Claude API: Available")
    except Exception as e:
        print(f"❌ Claude API: Unavailable - {str(e)[:50]}...")
    
    return api_status

print("✅ API error handling utilities loaded!")
print("💡 Use test_api_availability() to check current API status")
print("💡 Fallback evaluations activate automatically when needed")


# ============================================
# CELL 12: EVALUATION ORCHESTRATION
# ============================================

async def finalize_research_evaluation(query: str, report_content: str) -> UniversalEvaluation:
    """Finalize research with dual-model evaluation"""
    
    # Create content structure for evaluation
    content = {
        "query": query,
        "content_type": "agentic",
        "text_content": report_content,
        "structured_data": {
            "report_length": len(report_content),
            "generated_at": datetime.now().isoformat()
        },
        "source_info": {}
    }
    
    # Dual evaluation
    print("🤖 Running GPT-4 evaluation...")
    gpt_eval = await evaluate_with_gpt(content)
    
    print("🧠 Running Claude evaluation...")
    claude_eval = await evaluate_with_claude(content)
    
    # Build consensus
    print("🔄 Building evaluation consensus...")
    consensus = calculate_consensus(gpt_eval, claude_eval)
    
    # Create summary
    summary = f"""
# Research Evaluation Report

**Query**: {query}  
**Content Type**: Agentic Research Report
**Report Length**: {len(report_content)} characters

## Evaluation Scores
- **GPT-4 Overall**: {gpt_eval.overall_score}/10
- **Claude Overall**: {claude_eval.overall_score}/10  
- **Consensus Score**: {consensus['consensus_score']}/10

## Quality Assessment
{'✅ **HIGH QUALITY** - Exceeds standards' if consensus['consensus_score'] >= 7.5 else 
 '⚠️ **MODERATE QUALITY** - Meets basic standards' if consensus['consensus_score'] >= 6.0 else
 '❌ **NEEDS IMPROVEMENT** - Below standards'}
"""
    
    return UniversalEvaluation(
        query=query,
        content_type="agentic",
        gpt_evaluation=gpt_eval,
        claude_evaluation=claude_eval,
        consensus_score=consensus['consensus_score'],
        final_recommendations=consensus['final_recommendations'],
        evaluation_summary=summary
    )

def create_evaluation_report(evaluation: UniversalEvaluation) -> str:
    """Create formatted evaluation report with API status information"""
    
    gpt_scores = evaluation.gpt_evaluation.criteria_scores
    claude_scores = evaluation.claude_evaluation.criteria_scores
    
    # Check for API fallbacks
    gpt_fallback = evaluation.gpt_evaluation.confidence_level == "Medium" and "Could not verify with GPT-4 API" in evaluation.gpt_evaluation.weaknesses
    claude_fallback = evaluation.claude_evaluation.confidence_level == "Medium" and "Could not verify accuracy with Claude API" in evaluation.claude_evaluation.weaknesses
    
    api_status = ""
    if gpt_fallback or claude_fallback:
        api_status = "\n## ⚠️ API Status\n"
        if gpt_fallback:
            api_status += "- **GPT-4**: Used fallback evaluation (API unavailable)\n"
        else:
            api_status += "- **GPT-4**: ✅ API evaluation successful\n"
        if claude_fallback:
            api_status += "- **Claude**: Used fallback evaluation (API overloaded)\n"
        else:
            api_status += "- **Claude**: ✅ API evaluation successful\n"
        api_status += "\n*Note: Fallback evaluations use heuristic scoring and may be less accurate.*\n"
    
    report = f"""
# 📊 Research Quality Evaluation

## 🎯 Query Analysis
**Original Query**: {evaluation.query}
**Content Type**: {evaluation.content_type.title()}
{api_status}
## 📈 Evaluation Scores

### GPT-4 Assessment {'(Fallback)' if gpt_fallback else ''}
- **Overall Score**: {evaluation.gpt_evaluation.overall_score}/10
- **Accuracy**: {gpt_scores.accuracy_score}/10
- **Completeness**: {gpt_scores.completeness_score}/10  
- **Relevance**: {gpt_scores.relevance_score}/10
- **Clarity**: {gpt_scores.clarity_score}/10
- **Depth**: {gpt_scores.depth_score}/10

### Claude Assessment {'(Fallback)' if claude_fallback else ''}
- **Overall Score**: {evaluation.claude_evaluation.overall_score}/10
- **Accuracy**: {claude_scores.accuracy_score}/10
- **Completeness**: {claude_scores.completeness_score}/10
- **Relevance**: {claude_scores.relevance_score}/10  
- **Clarity**: {claude_scores.clarity_score}/10
- **Depth**: {claude_scores.depth_score}/10

### 🤝 Consensus Analysis
- **Consensus Score**: {evaluation.consensus_score}/10
- **Quality Rating**: {'✅ HIGH QUALITY' if evaluation.consensus_score >= 7.5 else '⚠️ MODERATE QUALITY' if evaluation.consensus_score >= 6.0 else '❌ NEEDS IMPROVEMENT'}
{'- **Note**: Consensus may be affected by API fallbacks' if gpt_fallback or claude_fallback else ''}

## 💪 Strengths
{chr(10).join(f"- {strength}" for strength in evaluation.gpt_evaluation.strengths[:3])}

## 🔧 Areas for Improvement  
{chr(10).join(f"- {weakness}" for weakness in evaluation.gpt_evaluation.weaknesses[:3])}

## 🎯 Recommendations
{chr(10).join(f"- {rec}" for rec in evaluation.final_recommendations[:5])}

---
*Evaluation completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*
{'*Some evaluations used fallback methods due to API limitations*' if gpt_fallback or claude_fallback else ''}
"""
    
    return report

print("✅ Evaluation orchestration functions loaded!")

# ============================================
# CELL 13: COMBINED SEARCH ORCHESTRATION
# ============================================

async def perform_combined_search(query: str) -> CombinedSearchResults:
    """
    Perform both explorer and agentic search, then combine results
    """
    print(f"🚀 Starting Combined Search for: {query}")
    timestamp = datetime.now().isoformat()
    
    # Run both searches in parallel
    print("⚡ Running explorer and agentic searches in parallel...")
    
    # Start both searches
    explorer_task = asyncio.create_task(
        asyncio.to_thread(lambda: perform_explorer_search(query, save_results=False))
    )
    agentic_task = asyncio.create_task(
        perform_agentic_search(query)
    )
    
    # Wait for both to complete
    (explorer_results, explorer_metadata), (agentic_report, search_plan, search_summaries) = await asyncio.gather(
        explorer_task, agentic_task
    )
    
    # Create combined summary
    explorer_sources = len([item for item in explorer_results if "url" in item])
    agentic_searches = len(search_plan.searches) if search_plan else 0
    
    combined_summary = f"""
# Combined Search Results for: {query}

## Search Overview
- **Timestamp**: {timestamp}
- **Explorer Sources**: {explorer_sources} web pages scraped
- **Agentic Searches**: {agentic_searches} strategic searches performed

## Explorer Summary
Scraped {explorer_sources} web pages from DuckDuckGo and Brave Search engines.

## Agentic Summary
{agentic_report.short_summary}

## Integration
This combined approach provides both raw web data (explorer) and synthesized insights (agentic) for comprehensive research coverage.
"""
    
    # Save combined results
    timestamp_str = datetime.now().strftime('%Y%m%d_%H%M%S')
    combined_results = {
        "query": query,
        "timestamp": timestamp,
        "explorer_results": explorer_results,
        "explorer_metadata": explorer_metadata,
        "agentic_results": agentic_report.dict(),
        "combined_summary": combined_summary
    }
    
    filename = f"combined_search_{timestamp_str}.json"
    filepath = os.path.join(DATA_DIR, filename)
    with open(filepath, "w", encoding='utf-8') as f:
        json.dump(combined_results, f, indent=2, ensure_ascii=False)
    
    print(f"💾 Combined search results saved to: {filepath}")
    
    return CombinedSearchResults(
        query=query,
        timestamp=timestamp,
        explorer_results=explorer_results,
        agentic_results=agentic_report,
        combined_summary=combined_summary
    )

print("✅ Combined search orchestration loaded!")

# ============================================
# CELL 14: JUPYTER EXECUTION INTERFACE
# ============================================

async def deep_search_with_evaluation_jupyter(query: str):
    """Jupyter-compatible version of deep search with evaluation"""
    
    with trace("Deep Search with Evaluation"):
        # Phase 1: Deep Search
        print("=" * 50)
        print("🔍 PHASE 1: DEEP SEARCH")
        print("=" * 50)
        
        print("\n📋 Planning searches...")
        search_plan = await plan_searches(query)
        
        print("\n🌐 Performing web searches...")
        search_results = await perform_searches(search_plan)
        
        print("\n📝 Writing comprehensive report...")
        report = await write_report(query, search_results)
        
        # Display initial report
        print("\n" + "=" * 50)
        print("📄 INITIAL RESEARCH REPORT")
        print("=" * 50)
        display(Markdown(report.markdown_report))
        
        # Phase 2: Evaluation
        print("\n" + "=" * 50)
        print("🔬 PHASE 2: DUAL-MODEL EVALUATION")
        print("=" * 50)
        
        # Perform evaluation
        final_eval = await finalize_research_evaluation(query, report.markdown_report)
        
        # Create and display evaluation report
        evaluation_report = create_evaluation_report(final_eval)
        display(Markdown(evaluation_report))
        
        # Save evaluation results
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f"evaluation_{timestamp}.json"
        
        evaluation_results = {
            "timestamp": datetime.now().isoformat(),
            "query": query,
            "gpt_overall_score": final_eval.gpt_evaluation.overall_score,
            "claude_overall_score": final_eval.claude_evaluation.overall_score,
            "consensus_score": final_eval.consensus_score,
            "final_recommendations": final_eval.final_recommendations,
            "report_summary": report.short_summary,
            "follow_up_questions": report.follow_up_questions,
            "full_report": report.markdown_report
        }
        
        # Save to DATA_DIR
        filepath = os.path.join(DATA_DIR, filename)
        with open(filepath, "w") as f:
            json.dump(evaluation_results, f, indent=2)
        print(f"\n💾 Evaluation saved to: {filepath}")
        
        # Phase 3: Quality Gate
        print("\n" + "=" * 50)
        print("🔧 PHASE 3: QUALITY CHECK")
        print("=" * 50)
        
        quality_threshold = 7.5
        meets_threshold = final_eval.consensus_score >= quality_threshold
        
        print(f"\n✅ Research Quality: {'APPROVED' if meets_threshold else 'NEEDS IMPROVEMENT'}")
        print(f"Consensus Score: {final_eval.consensus_score}/10")
        print(f"Quality Threshold: {quality_threshold}/10")
        
        if not meets_threshold:
            print("\n⚠️ Report needs improvement based on evaluation.")
            print("\nTop 3 recommendations for improvement:")
            for i, rec in enumerate(final_eval.final_recommendations[:3], 1):
                print(f"  {i}. {rec}")
        
        print("\n" + "=" * 50)
        print("✨ DEEP SEARCH WITH EVALUATION COMPLETE")
        print("=" * 50)
        
        return {
            "report": report,
            "evaluation": final_eval,
            "quality_approved": meets_threshold,
            "filepath": filepath
        }

# Main execution function
async def run_research(query: str):
    """Run research for a single query"""
    try:
        print(f"\n{'#' * 60}")
        print(f"# RESEARCHING: {query}")
        print(f"{'#' * 60}\n")
        
        results = await deep_search_with_evaluation_jupyter(query)
        
        # Summary statistics
        print(f"\n📊 Summary for '{query}':")
        print(f"   - Report length: {len(results['report'].markdown_report)} characters")
        print(f"   - GPT-4 Score: {results['evaluation'].gpt_evaluation.overall_score}/10")
        print(f"   - Claude Score: {results['evaluation'].claude_evaluation.overall_score}/10")
        print(f"   - Consensus: {results['evaluation'].consensus_score}/10")
        print(f"   - Quality: {'✅ Approved' if results['quality_approved'] else '❌ Needs Improvement'}")
        print(f"   - Saved to: {results['filepath']}")
        
        return results
        
    except Exception as e:
        print(f"❌ Error processing query '{query}': {str(e)}")
        return None

# Alternative: Run combined search (explorer + agentic)
async def run_combined_research(query: str):
    """Run combined explorer + agentic search"""
    try:
        print(f"\n{'#' * 60}")
        print(f"# COMBINED RESEARCH: {query}")
        print(f"{'#' * 60}\n")
        
        results = await perform_combined_search(query)
        
        # Display results
        display(Markdown(results.combined_summary))
        display(Markdown(results.agentic_results.markdown_report))
        
        # Save results
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f"combined_search_{timestamp}.json"
        filepath = os.path.join(DATA_DIR, filename)
        
        with open(filepath, "w") as f:
            json.dump(results.dict(), f, indent=2)
        
        print(f"\n💾 Combined results saved to: {filepath}")
        return results
        
    except Exception as e:
        print(f"❌ Error in combined research '{query}': {str(e)}")
        return None

# ============================================
# EXPLORER-ONLY EXECUTION FUNCTIONS
# ============================================

async def run_explorer_only_research(query: str):
    """Run explorer-only research with detailed JSON saving"""
    try:
        print(f"\n{'#' * 60}")
        print(f"# EXPLORER-ONLY RESEARCH: {query}")
        print(f"{'#' * 60}\n")
        
        # Perform explorer search
        explorer_results, explorer_metadata = perform_explorer_search(query, save_results=True)
        
        # Display results summary
        print(f"\n🔍 Explorer Research Summary:")
        print(f"   - Total links found: {explorer_metadata['metadata']['total_links_found']}")
        print(f"   - Pages scraped: {len(explorer_results)}")
        print(f"   - Successful scrapes: {len([r for r in explorer_results if not r.get('error')])}")
        print(f"   - Total text extracted: {sum(r.get('full_text_length', 0) for r in explorer_results if not r.get('error')):,} chars")
        
        # Create markdown report for display
        report_md = f"""
# 🔍 Explorer Research Results

## Query: {query}

### 📊 Summary Statistics
- **Total Links Found**: {explorer_metadata['metadata']['total_links_found']}
- **Pages Successfully Scraped**: {len([r for r in explorer_results if not r.get('error')])}
- **Total Text Extracted**: {sum(r.get('full_text_length', 0) for r in explorer_results if not r.get('error')):,} characters
- **Search Engines Used**: {', '.join([e['name'] for e in explorer_metadata['metadata']['search_engines']])}

### 🌐 Source Breakdown
"""
        
        for engine in explorer_metadata['metadata']['search_engines']:
            report_md += f"""
#### {engine['name']}
- Links Found: {engine['links_found']}
- Successfully Scraped: {engine['successful_scrapes']}/{engine['links_scraped']}
- Success Rate: {(engine['successful_scrapes']/engine['links_scraped']*100) if engine['links_scraped'] > 0 else 0:.1f}%
"""
        
        report_md += "\n### 📄 Content Samples\n"
        
        successful_results = [r for r in explorer_results if not r.get('error')][:3]
        for i, result in enumerate(successful_results, 1):
            report_md += f"""
#### Sample {i}: {result.get('source_engine', 'Unknown')}
**URL**: {result.get('url', 'N/A')}  
**Length**: {result.get('full_text_length', 0):,} characters  
**Preview**: {result.get('text', '')[:200]}...
"""
        
        display(Markdown(report_md))
        
        return {
            "type": "explorer_only",
            "query": query,
            "results": explorer_results,
            "metadata": explorer_metadata,
            "summary": explorer_metadata['summary']
        }
        
    except Exception as e:
        print(f"❌ Error in explorer research '{query}': {str(e)}")
        return None

def load_explorer_results(filepath: str):
    """Load and display explorer search results from JSON file"""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # Check if it's an explorer results file
        if "metadata" in data and "scraped_data" in data:
            return data
        elif "explorer_results" in data and "explorer_metadata" in data:
            # Combined search file
            return {
                "metadata": data["explorer_metadata"]["metadata"],
                "scraped_data": data["explorer_results"],
                "summary": data["explorer_metadata"]["summary"]
            }
        else:
            return None
            
    except Exception as e:
        print(f"Error loading explorer results: {e}")
        return None

def display_explorer_json_analysis(filepath: str, query: str = None):
    """Analyze and display explorer JSON results"""
    data = load_explorer_results(filepath)
    if not data:
        return "❌ File is not a valid explorer results file"
    
    # Extract information
    metadata = data.get("metadata", {})
    scraped_data = data.get("scraped_data", [])
    summary = data.get("summary", {})
    
    # Detect query if not provided
    if not query:
        query = metadata.get("query", summary.get("query", "Unknown"))
    
    successful_scrapes = [item for item in scraped_data if not item.get('error')]
    failed_scrapes = [item for item in scraped_data if item.get('error')]
    
    analysis = f"""
# 📊 Explorer Results Analysis

## 🔍 Query Information
**Original Query**: {query}  
**Timestamp**: {metadata.get('timestamp', summary.get('completed_at', 'Unknown'))}  
**File**: {filepath}

## 📈 Performance Metrics
- **Total Links Found**: {metadata.get('total_links_found', 'Unknown')}
- **Pages Scraped**: {len(scraped_data)}
- **Successful Scrapes**: {len(successful_scrapes)}
- **Failed Scrapes**: {len(failed_scrapes)}
- **Success Rate**: {(len(successful_scrapes)/len(scraped_data)*100) if scraped_data else 0:.1f}%

## 🌐 Search Engine Performance
"""
    
    for engine in metadata.get('search_engines', []):
        success_rate = (engine['successful_scrapes'] / engine['links_scraped'] * 100) if engine['links_scraped'] > 0 else 0
        analysis += f"""
### {engine['name']}
- **Links Found**: {engine['links_found']}
- **Links Scraped**: {engine['links_scraped']}
- **Successful Scrapes**: {engine['successful_scrapes']}
- **Success Rate**: {success_rate:.1f}%
"""
    
    # Content analysis
    if successful_scrapes:
        total_chars = sum(item.get('full_text_length', 0) for item in successful_scrapes)
        avg_chars = total_chars / len(successful_scrapes)
        avg_time = sum(item.get('processing_time_seconds', 0) for item in successful_scrapes) / len(successful_scrapes)
        
        analysis += f"""
## 📄 Content Analysis
- **Total Text Extracted**: {total_chars:,} characters
- **Average Text per Page**: {avg_chars:,.0f} characters
- **Average Processing Time**: {avg_time:.2f} seconds
- **Longest Page**: {max(item.get('full_text_length', 0) for item in successful_scrapes):,} characters
- **Shortest Page**: {min(item.get('full_text_length', 0) for item in successful_scrapes):,} characters

## 🔗 Sample URLs (First 5)
"""
        for i, item in enumerate(successful_scrapes[:5], 1):
            analysis += f"{i}. {item.get('url', 'Unknown')} ({item.get('full_text_length', 0):,} chars)\n"
    
    if failed_scrapes:
        analysis += f"""
## ❌ Failed Scrapes ({len(failed_scrapes)})
"""
        for i, item in enumerate(failed_scrapes[:3], 1):
            analysis += f"{i}. {item.get('url', 'Unknown')} - {item.get('error_details', item.get('text', 'Unknown error'))}\n"
    
    analysis += f"""
## 🔧 Configuration Used
- **Max Search Results**: {metadata.get('search_config', {}).get('max_search_results', 'Unknown')}
- **Max URLs to Scrape**: {metadata.get('search_config', {}).get('max_urls_to_scrape', 'Unknown')}
- **Max Text Length**: {metadata.get('search_config', {}).get('max_text_length', 'Unknown')}
- **Scraping Timeout**: {metadata.get('search_config', {}).get('scraping_timeout', 'Unknown')} seconds

---
*Analysis generated at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*
"""
    
    return analysis

async def run_deep_chatgpt_research(query: str):
    """Run deep ChatGPT research with iterative refinement"""
    try:
        print(f"\n{'#' * 60}")
        print(f"# DEEP CHATGPT RESEARCH: {query}")
        print(f"{'#' * 60}\n")
        
        results = await perform_deep_chatgpt_search(query)
        
        # Display results
        print(f"🧠 Deep ChatGPT Research Summary:")
        print(f"   - Iterations: {results['iterations']}")
        print(f"   - Total search phases: {len(results['search_history'])}")
        
        display(Markdown(results['final_report'].markdown_report))
        
        # Save results
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f"deep_chatgpt_{timestamp}.json"
        filepath = os.path.join(DATA_DIR, filename)
        
        with open(filepath, "w") as f:
            json.dump(results, f, indent=2, default=str)
        
        print(f"\n💾 Deep ChatGPT results saved to: {filepath}")
        return results
        
    except Exception as e:
        print(f"❌ Error in deep ChatGPT research '{query}': {str(e)}")
        return None

async def run_deep_claude_research(query: str):
    """Run deep Claude-style research with systematic analysis"""
    try:
        print(f"\n{'#' * 60}")
        print(f"# DEEP CLAUDE-STYLE RESEARCH: {query}")
        print(f"{'#' * 60}\n")
        
        results = await perform_deep_claude_search(query)
        
        # Display results
        print(f"🎭 Deep Claude-Style Research Summary:")
        print(f"   - Analysis phases: {results['phases']}")
        print(f"   - Systematic approach: ✅")
        
        display(Markdown(results['final_report'].markdown_report))
        
        # Save results
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f"deep_claude_style_{timestamp}.json"
        filepath = os.path.join(DATA_DIR, filename)
        
        with open(filepath, "w") as f:
            json.dump(results, f, indent=2, default=str)
        
        print(f"\n💾 Deep Claude-style results saved to: {filepath}")
        return results
        
    except Exception as e:
        print(f"❌ Error in deep Claude-style research '{query}': {str(e)}")
        return None

async def run_comparative_deep_research(query: str):
    """Run comparative deep research (ChatGPT vs Claude-style)"""
    try:
        print(f"\n{'#' * 60}")
        print(f"# COMPARATIVE DEEP RESEARCH: {query}")
        print(f"{'#' * 60}\n")
        
        results = await perform_comparative_deep_search(query)
        
        # Display results
        print(f"⚖️ Comparative Deep Research Summary:")
        print(f"   - Total iterations: {results['total_iterations']}")
        print(f"   - ChatGPT approach: ✅")
        print(f"   - Claude-style approach: ✅")
        print(f"   - Comparative analysis: ✅")
        
        display(Markdown("## ChatGPT Research Results"))
        display(Markdown(results['chatgpt_results']['final_report'].markdown_report))
        
        display(Markdown("## Claude-Style Research Results"))
        display(Markdown(results['claude_results']['final_report'].markdown_report))
        
        display(Markdown("## Comparative Analysis"))
        display(Markdown(results['comparative_analysis'].markdown_report))
        
        # Save results
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f"comparative_deep_{timestamp}.json"
        filepath = os.path.join(DATA_DIR, filename)
        
        with open(filepath, "w") as f:
            json.dump(results, f, indent=2, default=str)
        
        print(f"\n💾 Comparative deep research results saved to: {filepath}")
        return results
        
    except Exception as e:
        print(f"❌ Error in comparative deep research '{query}': {str(e)}")
        return None

print("✅ Jupyter execution interface ready!")
print("💡 Available functions:")
print("   - await run_research('your query')           # Agentic search with evaluation")
print("   - await run_combined_research('your query')  # Explorer + Agentic combined")
print("   - await run_explorer_only_research('query')  # Explorer-only with detailed JSON")
print("   - await universal_json_evaluator(filepath, query)  # Evaluate existing results")
print("🔥 Deep search functions (NOW ENABLED!):")
print("   - await run_deep_chatgpt_research('query')    # Deep ChatGPT iterative research")
print("   - await run_deep_claude_research('query')     # Deep Claude-style systematic research") 
print("   - await run_comparative_deep_research('query') # Comparative ChatGPT vs Claude research")
print(f"\n🔧 Current configuration:")
print(f"   - Max links to extract: {MAX_LINKS_TO_EXTRACT}")
print(f"   - Max URLs to scrape: {MAX_URLS_TO_SCRAPE}")
print(f"   - Max search results: {MAX_SEARCH_RESULTS}")
print(f"   - Strategic searches: {MAX_STRATEGIC_SEARCHES}")
print(f"   - Deep search iterations: {DEEP_SEARCH_ITERATIONS}")
print(f"   - Search context size: {SEARCH_CONTEXT_SIZE}")
print("\n📄 Explorer features:")
print("   - Detailed link tracking and progress display")
print("   - Comprehensive JSON saving with metadata")
print("   - Source-by-source analysis and statistics")
print("   - Performance metrics and error tracking")

# ============================================
# CELL 15: CONFIGURATION CUSTOMIZATION GUIDE
# ============================================

print("\n" + "=" * 80)
print("📝 CONFIGURATION CUSTOMIZATION GUIDE")
print("=" * 80)

print("""
🎛️ **How to customize the search system:**

1. **Modify Search Scope:**
   MAX_LINKS_TO_EXTRACT = 30        # Extract more links from search results
   MAX_URLS_TO_SCRAPE = 8           # Scrape more web pages  
   MAX_SEARCH_RESULTS = 15          # Get more search results per engine

2. **Adjust Agentic Search:**
   MAX_STRATEGIC_SEARCHES = 5       # Plan more strategic searches
   SEARCH_CONTEXT_SIZE = "large"    # Use larger context for deeper analysis
   REPORT_MIN_LENGTH = 1500         # Require longer, more detailed reports

3. **Configure Deep Search:**
   DEEP_SEARCH_ITERATIONS = 7       # More iterative refinement cycles
   DEEP_SEARCH_REFINEMENT = True    # Enable progressive refinement

4. **Performance Tuning:**
   SCRAPING_TIMEOUT = 20            # Longer timeout for slow websites
   MAX_TEXT_LENGTH = 5000           # Capture more text per page

**Example: High-Intensity Research Setup**
```python
# Uncomment and modify these in Cell 2:
# MAX_LINKS_TO_EXTRACT = 50
# MAX_URLS_TO_SCRAPE = 10  
# MAX_STRATEGIC_SEARCHES = 7
# DEEP_SEARCH_ITERATIONS = 10
# SEARCH_CONTEXT_SIZE = "large"
# REPORT_MIN_LENGTH = 2000
```

**Example: Fast & Light Setup**
```python
# Uncomment and modify these in Cell 2:
# MAX_LINKS_TO_EXTRACT = 10
# MAX_URLS_TO_SCRAPE = 3
# MAX_STRATEGIC_SEARCHES = 2  
# DEEP_SEARCH_ITERATIONS = 3
# SEARCH_CONTEXT_SIZE = "small"
# REPORT_MIN_LENGTH = 500
```

🔥 **Deep search capabilities are NOW ENABLED!**
✅ Deep search agents activated
✅ Deep search execution functions ready
✅ Deep search interface functions available
✅ Gradio Deep Search tab enabled

💡 **Pro Tips:**
- Increase MAX_STRATEGIC_SEARCHES for complex topics
- Use "large" SEARCH_CONTEXT_SIZE for technical subjects
- Enable DEEP_SEARCH_REFINEMENT for controversial topics
- Adjust SCRAPING_TIMEOUT if you encounter many timeouts
- Use Deep Search for the most comprehensive analysis
""")

print("✅ Configuration guide complete!")
print("🔄 Restart the kernel and rerun all cells after making configuration changes")
print("🔥 Deep Search is now FULLY OPERATIONAL!")


# ============================================
# CELL 16: GRADIO USER INTERFACE
# ============================================

def create_gradio_interface():
    """Create comprehensive Gradio interface for the search system"""
    
    # Custom CSS for better styling
    custom_css = """
    .gradio-container {
        max-width: 1200px !important;
    }
    .search-header {
        text-align: center;
        color: #2563eb;
        margin-bottom: 20px;
    }
    .config-section {
        background: #f8fafc;
        padding: 15px;
        border-radius: 8px;
        margin: 10px 0;
    }
    .status-box {
        padding: 10px;
        border-radius: 5px;
        margin: 5px 0;
    }
    .success { background: #dcfce7; border-left: 4px solid #16a34a; }
    .error { background: #fef2f2; border-left: 4px solid #dc2626; }
    .info { background: #dbeafe; border-left: 4px solid #2563eb; }
    """
    
    async def run_search_interface(query, search_type, max_links, max_urls, max_searches):
        """Interface wrapper for different search types"""
        if not query.strip():
            return "❌ Please enter a search query", "", ""
        
        try:
            # Update global configuration temporarily
            global MAX_LINKS_TO_EXTRACT, MAX_URLS_TO_SCRAPE, MAX_STRATEGIC_SEARCHES
            original_links = MAX_LINKS_TO_EXTRACT
            original_urls = MAX_URLS_TO_SCRAPE  
            original_searches = MAX_STRATEGIC_SEARCHES
            
            MAX_LINKS_TO_EXTRACT = max_links
            MAX_URLS_TO_SCRAPE = max_urls
            MAX_STRATEGIC_SEARCHES = max_searches
            
            status = f"🚀 Starting {search_type} search for: '{query}'\n"
            status += f"📊 Config: {max_links} links, {max_urls} URLs, {max_searches} searches\n\n"
            
            if search_type == "Standard Agentic Search":
                results = await run_research(query)
                if results:
                    report = results['report'].markdown_report
                    evaluation = f"""
## 📊 Evaluation Results
- **GPT-4 Score**: {results['evaluation'].gpt_evaluation.overall_score}/10
- **Claude Score**: {results['evaluation'].claude_evaluation.overall_score}/10  
- **Consensus**: {results['evaluation'].consensus_score}/10
- **Quality**: {'✅ Approved' if results['quality_approved'] else '❌ Needs Improvement'}
- **File**: {results['filepath']}
"""
                    status += "✅ Standard agentic search completed successfully!"
                    
            elif search_type == "Combined Explorer + Agentic":
                results = await run_combined_research(query)
                if results:
                    report = results.agentic_results.markdown_report
                    evaluation = f"""
## 📊 Combined Search Results
- **Explorer Sources**: {len([item for item in results.explorer_results if 'url' in item])}
- **Search Engines**: DuckDuckGo, Brave
- **Agentic Report**: Generated
- **Combined Summary**: Available
"""
                    status += "✅ Combined search completed successfully!"
                    
            elif search_type == "Explorer Only":
                explorer_results, explorer_metadata = perform_explorer_search(query, save_results=True)
                
                # Create detailed explorer report
                report = f"""
# 🔍 Explorer Search Results for: {query}

## 📊 Search Summary
- **Total Links Found**: {explorer_metadata['metadata']['total_links_found']}
- **Pages Scraped**: {explorer_metadata['metadata']['total_scraped']}
- **Successful Scrapes**: {len([r for r in explorer_results if not r.get('error')])}
- **Search Engines**: {', '.join([engine['name'] for engine in explorer_metadata['metadata']['search_engines']])}

## 🌐 Search Engine Details
"""
                
                for engine in explorer_metadata['metadata']['search_engines']:
                    report += f"""
### {engine['name']} Results
- **Links Found**: {engine['links_found']}
- **Links Scraped**: {engine['links_scraped']}
- **Successful Scrapes**: {engine['successful_scrapes']}

**All {engine['name']} Links:**
"""
                    for i, link in enumerate(engine['all_links'][:10], 1):  # Show first 10 links
                        report += f"{i}. {link}\n"
                    if len(engine['all_links']) > 10:
                        report += f"... and {len(engine['all_links']) - 10} more links\n"
                
                report += "\n## 📄 Scraped Content Preview\n"
                
                for i, result in enumerate([r for r in explorer_results if not r.get('error')][:5], 1):
                    report += f"""
### Source {i}: {result.get('source_engine', 'Unknown')}
**URL**: {result.get('url', 'N/A')}
**Text Length**: {result.get('full_text_length', 0):,} characters
**Processing Time**: {result.get('processing_time_seconds', 0)} seconds

**Content Preview**:
{result.get('text', 'No content')[:500]}...

---
"""
                
                evaluation = f"""
## 📊 Explorer Results Analysis
- **Total Sources**: {len(explorer_results)}
- **Successful Scrapes**: {len([r for r in explorer_results if not r.get('error')])}
- **Failed Scrapes**: {len([r for r in explorer_results if r.get('error')])}
- **Total Text Extracted**: {sum(r.get('full_text_length', 0) for r in explorer_results if not r.get('error')):,} characters
- **Average Processing Time**: {sum(r.get('processing_time_seconds', 0) for r in explorer_results) / len(explorer_results):.2f} seconds
- **Search Engines**: DuckDuckGo, Brave
- **Saved to**: {explorer_metadata['summary']['completed_at']}

### 🔗 Link Success Rate by Engine
"""
                for engine in explorer_metadata['metadata']['search_engines']:
                    success_rate = (engine['successful_scrapes'] / engine['links_scraped'] * 100) if engine['links_scraped'] > 0 else 0
                    evaluation += f"- **{engine['name']}**: {success_rate:.1f}% ({engine['successful_scrapes']}/{engine['links_scraped']})\n"
                
                status += "✅ Explorer search completed successfully!"
            
            # Restore original configuration
            MAX_LINKS_TO_EXTRACT = original_links
            MAX_URLS_TO_SCRAPE = original_urls
            MAX_STRATEGIC_SEARCHES = original_searches
            
            return status, report, evaluation
            
        except Exception as e:
            # Restore original configuration on error
            MAX_LINKS_TO_EXTRACT = original_links
            MAX_URLS_TO_SCRAPE = original_urls
            MAX_STRATEGIC_SEARCHES = original_searches
            
            error_msg = f"❌ Error during {search_type}: {str(e)}"
            return error_msg, "", ""
    
    async def evaluate_file_interface(filepath, query):
        """Interface wrapper for file evaluation"""
        if not filepath.strip():
            return "❌ Please provide a filepath"
        
        try:
            if not os.path.exists(filepath):
                return f"❌ File not found: {filepath}"
            
            # Check if it's an explorer results file
            explorer_data = load_explorer_results(filepath)
            if explorer_data:
                # It's an explorer file - provide explorer analysis
                if not query.strip():
                    # Try to extract query from file
                    query = explorer_data.get("metadata", {}).get("query") or explorer_data.get("summary", {}).get("query", "")
                
                return display_explorer_json_analysis(filepath, query)
            else:
                # It's a regular research file - use standard evaluation
                if not query.strip():
                    return "❌ Please provide the original query for evaluation"
                
                evaluation = await universal_json_evaluator(filepath, query)
                return evaluation.evaluation_summary
            
        except Exception as e:
            return f"❌ Evaluation error: {str(e)}"
    
    def get_recent_files():
        """Get list of recent result files"""
        try:
            files = []
            for filename in os.listdir(DATA_DIR):
                if filename.endswith('.json'):
                    filepath = os.path.join(DATA_DIR, filename)
                    files.append(filepath)
            return sorted(files, key=os.path.getmtime, reverse=True)[:10]
        except:
            return []
    
    # Create Gradio interface
    with gr.Blocks(css=custom_css, title="🔍 Advanced Research System") as interface:
        
        gr.HTML("""
        <div class="search-header">
            <h1>🔍 Advanced AI Research System</h1>
            <p>Powered by Multi-Engine Search + AI Analysis + Dual-Model Evaluation</p>
        </div>
        """)
        
        with gr.Tabs():
            
            # Main Search Tab
            with gr.Tab("🚀 Research Search"):
                with gr.Row():
                    with gr.Column(scale=2):
                        query_input = gr.Textbox(
                            label="🔍 Research Query",
                            placeholder="Enter your research question...",
                            lines=2
                        )
                        
                        search_type = gr.Radio(
                            choices=[
                                "Standard Agentic Search",
                                "Combined Explorer + Agentic", 
                                "Explorer Only"
                            ],
                            value="Standard Agentic Search",
                            label="Search Type"
                        )
                        
                    with gr.Column(scale=1):
                        gr.HTML('<div class="config-section">')
                        gr.HTML("<h4>⚙️ Configuration</h4>")
                        
                        max_links = gr.Slider(
                            minimum=5, maximum=100, value=MAX_LINKS_TO_EXTRACT,
                            label="Max Links to Extract"
                        )
                        max_urls = gr.Slider(
                            minimum=1, maximum=20, value=MAX_URLS_TO_SCRAPE,
                            label="Max URLs to Scrape" 
                        )
                        max_searches = gr.Slider(
                            minimum=1, maximum=10, value=MAX_STRATEGIC_SEARCHES,
                            label="Strategic Searches"
                        )
                        gr.HTML('</div>')
                
                search_btn = gr.Button("🚀 Start Research", variant="primary", size="lg")
                
                with gr.Row():
                    with gr.Column():
                        status_output = gr.Textbox(
                            label="📊 Status & Progress",
                            lines=4,
                            interactive=False
                        )
                    
                with gr.Row():
                    with gr.Column():
                        report_output = gr.Markdown(
                            label="📄 Research Report",
                            height=400
                        )
                    with gr.Column():
                        evaluation_output = gr.Markdown(
                            label="📊 Evaluation & Metrics",
                            height=400
                        )
                
                search_btn.click(
                    fn=run_search_interface,
                    inputs=[query_input, search_type, max_links, max_urls, max_searches],
                    outputs=[status_output, report_output, evaluation_output]
                )
            
            # File Evaluation Tab
            with gr.Tab("📊 Evaluate Results"):
                gr.HTML("<h3>📊 Analyze Research Files</h3>")
                
                with gr.Row():
                    with gr.Column():
                        file_input = gr.Textbox(
                            label="📁 File Path",
                            placeholder="Enter path to JSON results file...",
                            lines=1
                        )
                        eval_query_input = gr.Textbox(
                            label="🔍 Original Query (Optional for Explorer files)",
                            placeholder="Enter the original research query (auto-detected for Explorer files)...",
                            lines=2
                        )
                        
                        recent_files = gr.Dropdown(
                            choices=get_recent_files(),
                            label="📂 Recent Files",
                            interactive=True
                        )
                        
                        def update_file_path(selected_file):
                            return selected_file if selected_file else ""
                        
                        recent_files.change(
                            fn=update_file_path,
                            inputs=[recent_files],
                            outputs=[file_input]
                        )
                
                eval_btn = gr.Button("📊 Analyze File", variant="secondary")
                
                gr.HTML("""
                <div style="background: #f0f9ff; padding: 10px; border-radius: 5px; margin: 10px 0;">
                    <h4>📄 Supported File Types</h4>
                    <ul>
                        <li><strong>Explorer Results</strong>: <code>explorer_search_*.json</code> - Shows link analysis, scraping stats, content breakdown</li>
                        <li><strong>Research Reports</strong>: <code>evaluation_*.json</code> - Provides AI-powered quality evaluation</li>
                        <li><strong>Combined Results</strong>: <code>combined_search_*.json</code> - Analyzes both explorer and agentic components</li>
                        <li><strong>Deep Search</strong>: <code>deep_*.json</code> - Analysis of iterative research results</li>
                    </ul>
                </div>
                """)
                
                evaluation_result = gr.Markdown(
                    label="📊 Analysis Report",
                    height=500
                )
                
                eval_btn.click(
                    fn=evaluate_file_interface,
                    inputs=[file_input, eval_query_input],
                    outputs=[evaluation_result]
                )
            
            # Configuration Tab
            with gr.Tab("⚙️ System Config"):
                gr.HTML("<h3>⚙️ Current System Configuration</h3>")
                
                # API Status Section
                with gr.Row():
                    with gr.Column():
                        gr.HTML("<h4>🔌 API Status</h4>")
                        api_status_btn = gr.Button("🔍 Check API Availability", variant="secondary")
                        api_status_output = gr.Markdown("Click button to check API status...")
                        
                        async def check_api_status():
                            status = await test_api_availability()
                            
                            status_text = f"""
## 🔌 API Availability Check
**Timestamp**: {status['timestamp']}

### Service Status
- **GPT-4**: {'✅ Available' if status['gpt4_available'] else '❌ Unavailable'}
- **Claude**: {'✅ Available' if status['claude_available'] else '❌ Unavailable'}

### Recommendations
"""
                            if status['gpt4_available'] and status['claude_available']:
                                status_text += "🎉 **All systems operational** - Full evaluation available\n"
                            elif status['gpt4_available'] or status['claude_available']:
                                status_text += "⚠️ **Partial availability** - Fallback evaluation will be used\n"
                            else:
                                status_text += "🚫 **Limited availability** - Fallback evaluations will be used for both models\n"
                            
                            if not status['claude_available']:
                                status_text += "\n### Claude Troubleshooting\n"
                                status_text += "- Try again in 2-5 minutes\n- System will use fallback scoring\n- GPT-4 evaluation still available\n"
                            
                            if not status['gpt4_available']:
                                status_text += "\n### GPT-4 Troubleshooting\n" 
                                status_text += "- Check API key and billing\n- Verify rate limits\n- Claude evaluation still available\n"
                            
                            return status_text
                        
                        api_status_btn.click(
                            fn=check_api_status,
                            outputs=[api_status_output]
                        )
                
                config_info = f"""
## 📊 Current Settings

### Search Configuration
- **Max Links to Extract**: {MAX_LINKS_TO_EXTRACT}
- **Max URLs to Scrape**: {MAX_URLS_TO_SCRAPE}
- **Max Search Results**: {MAX_SEARCH_RESULTS}
- **Strategic Searches**: {MAX_STRATEGIC_SEARCHES}
- **Search Context Size**: {SEARCH_CONTEXT_SIZE}

### Performance Settings  
- **Scraping Timeout**: {SCRAPING_TIMEOUT}s
- **Max Text Length**: {MAX_TEXT_LENGTH} chars
- **Report Min Length**: {REPORT_MIN_LENGTH} words

### Deep Search (Commented Out)
- **Deep Search Iterations**: {DEEP_SEARCH_ITERATIONS}
- **Deep Search Refinement**: {DEEP_SEARCH_REFINEMENT}

### 🔧 How to Modify Configuration
1. Edit variables in **Cell 2** of the notebook
2. Restart kernel and rerun all cells
3. Or use the sliders in the Research Search tab for temporary changes

### 💡 Deep Search Capabilities
To enable deep search features:
1. Uncomment deep search agents in Cell 9
2. Uncomment deep search functions in Cell 10B
3. Uncomment deep search interfaces in Cell 14
4. Add deep search tab to this Gradio interface

### 🔧 API Error Handling
- **Automatic Retries**: 3 attempts with exponential backoff
- **Fallback Evaluations**: Heuristic scoring when APIs unavailable
- **Graceful Degradation**: Partial evaluation when one API fails
- **Status Monitoring**: Check API availability above
"""
                
                gr.Markdown(config_info)
                
                refresh_btn = gr.Button("🔄 Refresh File List")
                refresh_btn.click(
                    fn=lambda: gr.Dropdown.update(choices=get_recent_files()),
                    outputs=[recent_files]
                )
            
            # Help Tab
            with gr.Tab("❓ Help"):
                help_content = """
# 🔍 Advanced Research System - User Guide

## 🚀 Quick Start
1. **Enter your research query** in the search box
2. **Select search type**:
   - **Standard Agentic**: AI-powered analysis with evaluation
   - **Combined**: Web scraping + AI analysis  
   - **Explorer Only**: Raw web scraping
3. **Adjust configuration** sliders if needed
4. **Click "Start Research"** and wait for results

## 📊 Search Types Explained

### Standard Agentic Search
- Uses AI agents to plan strategic searches
- Performs web research with built-in tools
- Generates comprehensive reports
- Includes dual-model evaluation (GPT-4 + Claude)
- **Best for**: Complex analysis, professional reports

### Combined Explorer + Agentic  
- Scrapes web pages directly (DuckDuckGo + Brave)
- Combines with AI analysis
- Provides both raw data and insights
- **Best for**: Comprehensive coverage, fact-checking

### Explorer Only
- Direct web scraping without AI processing
- Raw content from multiple search engines
- **Enhanced features**: Detailed link tracking, comprehensive JSON saving, performance metrics
- **Best for**: Quick data gathering, source verification, link analysis

## 📄 File Analysis Features

### Automatic File Type Detection
The system automatically detects and analyzes different file types:

#### 🔍 Explorer Results Files
- **Format**: `explorer_search_YYYYMMDD_HHMMSS.json`
- **Analysis**: Link success rates, scraping performance, content breakdown
- **Auto-detection**: Query extracted from metadata
- **Metrics**: Processing times, content lengths, error analysis

#### 📊 Research Report Files  
- **Format**: `evaluation_YYYYMMDD_HHMMSS.json`
- **Analysis**: AI-powered quality evaluation using GPT-4 + Claude
- **Requires**: Original query for proper evaluation

#### 🔄 Combined Search Files
- **Format**: `combined_search_YYYYMMDD_HHMMSS.json`  
- **Analysis**: Both explorer and agentic components
- **Features**: Comprehensive analysis of all data sources

## ⚙️ Configuration Tips

### Performance Settings
- **Max Links**: Higher = more comprehensive, slower
- **Max URLs**: Higher = more content, longer processing
- **Strategic Searches**: Higher = deeper analysis

### Quality vs Speed
- **Fast**: 5 links, 2 URLs, 2 searches
- **Balanced**: 20 links, 5 URLs, 3 searches  
- **Comprehensive**: 50 links, 10 URLs, 7 searches

## 🔗 Enhanced Explorer Features

### Detailed Link Tracking
- **Real-time display** of all discovered links
- **Source attribution** (DuckDuckGo, Brave, etc.)
- **Link success rates** by search engine
- **Processing time tracking** per URL

### Comprehensive JSON Saving
- **Metadata preservation**: Search configuration, timestamps, engine details
- **Full content storage**: Original text length + truncated versions
- **Error tracking**: Detailed error messages and failure analysis
- **Performance metrics**: Processing times, success rates, content statistics

### Progress Monitoring
- **Live scraping progress** with URL display
- **Success/failure indicators** in real-time
- **Character count tracking** as pages are processed
- **Engine-by-engine breakdown** of results

### Advanced Analysis
```python
# Explorer-only research with full JSON
results = await run_explorer_only_research("your query")

# Load and analyze existing explorer files
analysis = display_explorer_json_analysis("explorer_search_file.json")
```

## 📁 File Management
- Results automatically saved to `workspace/data/`
- **Explorer files**: `explorer_search_YYYYMMDD_HHMMSS.json`
- **Combined files**: `combined_search_YYYYMMDD_HHMMSS.json`
- **Evaluation files**: `evaluation_YYYYMMDD_HHMMSS.json`
- Use "Analyze Results" tab for any file type

## 🔧 API Error Handling

### Common Issues & Solutions

#### Claude API Overload (Error 529)
**Symptoms**: "Overloaded" error message
**Solutions**:
- ⏳ Wait 2-5 minutes and retry
- 🔄 System automatically uses fallback evaluation
- ✅ GPT-4 evaluation still works normally

#### GPT-4 API Issues  
**Symptoms**: Authentication or rate limit errors
**Solutions**:
- 🔑 Check OpenAI API key in environment
- 💳 Verify account billing and credits
- ⏱️ Wait between requests to avoid rate limits

### Fallback Evaluations
When APIs are unavailable, the system uses:
- **Heuristic scoring** based on content analysis
- **Conservative estimates** for accuracy and quality
- **Clear labeling** of fallback vs. API evaluations
- **Medium confidence** ratings

### 💡 Best Practices
1. **Check API status** in System Config tab
2. **Use off-peak hours** for better availability
3. **Space out evaluations** to avoid rate limits
4. **Re-evaluate later** when APIs recover
5. **Monitor fallback indicators** in results

## 🔧 Deep Search Features (Advanced)

### How to Enable Deep Search
1. **Uncomment Cell 9**: Deep search agent definitions
2. **Uncomment Cell 10B**: Deep search execution functions  
3. **Uncomment Cell 14**: Deep search interface functions
4. **Restart kernel** and rerun all cells

### Deep Search Modes

#### 🧠 Deep ChatGPT Search
- **5 iterative cycles** with progressive refinement
- **Gap identification** and targeted follow-up
- **Comprehensive synthesis** of findings
- **Best for**: Complex, evolving topics

#### 🎭 Deep Claude-Style Search  
- **Systematic 5-phase** methodology
- **Critical source evaluation**
- **Conservative confidence** assessment
- **Best for**: Controversial or technical topics

#### ⚖️ Comparative Deep Search
- **Parallel ChatGPT + Claude** approaches
- **Convergence/divergence** analysis
- **Meta-research** quality assessment
- **Best for**: Complex topics requiring multiple perspectives

### Usage After Enabling
```python
# Deep iterative research
await run_deep_chatgpt_research("query")

# Systematic deep analysis  
await run_deep_claude_research("query")

# Comparative deep research
await run_comparative_deep_research("query")
```

### Configuration
- **DEEP_SEARCH_ITERATIONS**: 5-10 cycles
- **SEARCH_CONTEXT_SIZE**: "large" for deep analysis
- **DEEP_SEARCH_REFINEMENT**: Enable progressive improvement

## ❓ Troubleshooting
- **Slow performance**: Reduce max links/URLs
- **Timeout errors**: Increase scraping timeout in config
- **Empty results**: Try different search terms
- **Evaluation errors**: Check file path and query format
- **API issues**: Use System Config tab to check status

## 🆘 Emergency Mode
If both APIs fail:
1. System continues with fallback evaluations
2. Focus on the research content quality
3. Manually verify key facts from sources
4. Re-evaluate when APIs recover

"""
                gr.Markdown(help_content)
            
            # ============================================
            # DEEP SEARCH TAB (NOW ENABLED!)
            # ============================================
            
            # Deep Search Tab
            with gr.Tab("🔍 Deep Search"):
                gr.HTML("<h3>🔍 Advanced Deep Search Capabilities</h3>")
                
                with gr.Row():
                    with gr.Column():
                        deep_query_input = gr.Textbox(
                            label="🎯 Deep Research Query",
                            placeholder="Enter complex research question for deep analysis...",
                            lines=3
                        )
                        
                        deep_search_type = gr.Radio(
                            choices=[
                                "Deep ChatGPT (Iterative)",
                                "Deep Claude-Style (Systematic)", 
                                "Comparative Deep Search (Both)"
                            ],
                            value="Deep ChatGPT (Iterative)",
                            label="Deep Search Mode"
                        )
                        
                        deep_iterations = gr.Slider(
                            minimum=3, maximum=15, value=DEEP_SEARCH_ITERATIONS,
                            label="Search Iterations"
                        )
                
                async def run_deep_search_interface(query, search_type, iterations):
                    if not query.strip():
                        return "❌ Please enter a deep research query", ""
                    
                    try:
                        # Update iterations temporarily
                        global DEEP_SEARCH_ITERATIONS
                        original_iterations = DEEP_SEARCH_ITERATIONS
                        DEEP_SEARCH_ITERATIONS = iterations
                        
                        status = f"🔍 Starting {search_type} for: '{query}'\n"
                        status += f"🔄 Iterations: {iterations}\n\n"
                        
                        if search_type == "Deep ChatGPT (Iterative)":
                            results = await run_deep_chatgpt_research(query)
                            report = results['final_report'].markdown_report
                            status += f"✅ Deep ChatGPT search completed!\n"
                            status += f"📊 {len(results['search_history'])} iterations performed"
                            
                        elif search_type == "Deep Claude-Style (Systematic)":
                            results = await run_deep_claude_research(query)
                            report = results['final_report'].markdown_report  
                            status += f"✅ Deep Claude-style search completed!\n"
                            status += f"📊 {len(results['analysis_phases'])} phases completed"
                            
                        elif search_type == "Comparative Deep Search (Both)":
                            results = await run_comparative_deep_research(query)
                            report = results['comparative_analysis'].markdown_report
                            status += f"✅ Comparative deep search completed!\n"
                            status += f"📊 {results['total_iterations']} total iterations"
                        
                        # Restore original setting
                        DEEP_SEARCH_ITERATIONS = original_iterations
                        return status, report
                        
                    except Exception as e:
                        DEEP_SEARCH_ITERATIONS = original_iterations
                        return f"❌ Deep search error: {str(e)}", ""
                
                deep_search_btn = gr.Button("🔍 Start Deep Search", variant="primary", size="lg")
                
                with gr.Row():
                    with gr.Column():
                        deep_status_output = gr.Textbox(
                            label="📊 Deep Search Progress",
                            lines=4,
                            interactive=False
                        )
                    with gr.Column():
                        deep_report_output = gr.Markdown(
                            label="📄 Deep Research Report",
                            height=500
                        )
                
                deep_search_btn.click(
                    fn=run_deep_search_interface,
                    inputs=[deep_query_input, deep_search_type, deep_iterations],
                    outputs=[deep_status_output, deep_report_output]
                )
                
                gr.HTML("""
                <div style="background: #f0f9ff; padding: 15px; border-radius: 8px; margin: 10px 0;">
                    <h4>🔍 Deep Search Capabilities</h4>
                    <ul>
                        <li><strong>Deep ChatGPT</strong>: 5-15 iterative cycles with progressive refinement</li>
                        <li><strong>Deep Claude-Style</strong>: Systematic 5-phase analytical methodology</li>
                        <li><strong>Comparative</strong>: Both approaches + meta-analysis comparison</li>
                    </ul>
                    <p><em>⚠️ Deep searches take 5-20 minutes depending on complexity and iterations</em></p>
                </div>
                """)

        
        gr.HTML("""
        <div style="text-align: center; margin-top: 20px; color: #6b7280;">
            <p>🔍 Advanced Research System | Multi-Engine Search + AI Analysis + Dual Evaluation</p>
        </div>
        """)
    
    return interface

# Create and launch interface
def launch_gradio_interface(share=False, debug=False):
    """Launch the Gradio interface"""
    print("🚀 Launching Gradio interface...")
    print(f"📊 Current config: {MAX_LINKS_TO_EXTRACT} links, {MAX_URLS_TO_SCRAPE} URLs, {MAX_STRATEGIC_SEARCHES} searches")
    
    interface = create_gradio_interface()
    
    # Launch interface
    interface.launch(
        share=share,
        debug=debug,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True,
        quiet=False
    )
    
    return interface

print("✅ Gradio interface ready!")
print("💡 Usage:")
print("   - interface = launch_gradio_interface()              # Launch locally")
print("   - interface = launch_gradio_interface(share=True)    # Create public link")
print("   - interface = launch_gradio_interface(debug=True)    # Enable debug mode")

# ============================================
# AUTO-LAUNCH GRADIO INTERFACE
# ============================================

# Auto-launch settings
AUTO_LAUNCH = True           # Set to False to disable auto-launch
SHARE_PUBLICLY = False       # Set to True to create public shareable link
DEBUG_MODE = False           # Set to True for debugging
PORT = 7860                  # Port number for the interface

if AUTO_LAUNCH:
    try:
        print("\n" + "=" * 60)
        print("🚀 AUTO-LAUNCHING GRADIO INTERFACE")
        print("=" * 60)
        print(f"📊 Configuration: {MAX_LINKS_TO_EXTRACT} links, {MAX_URLS_TO_SCRAPE} URLs, {MAX_STRATEGIC_SEARCHES} searches")
        print(f"🌐 Share publicly: {'Yes' if SHARE_PUBLICLY else 'No'}")
        print(f"🔧 Debug mode: {'Enabled' if DEBUG_MODE else 'Disabled'}")
        print(f"🔌 Port: {PORT}")
        
        # Create and launch interface automatically
        interface = create_gradio_interface()
        
        print("🎯 Starting Gradio server...")
        interface.launch(
            share=SHARE_PUBLICLY,
            debug=DEBUG_MODE,
            server_name="0.0.0.0",
            server_port=PORT,
            show_error=True,
            quiet=False,
            inbrowser=True,          # Automatically open in browser
            prevent_thread_lock=False  # Allow notebook to continue
        )
        
        print("✅ Gradio interface launched successfully!")
        print(f"🌐 Access your interface at: http://localhost:{PORT}")
        if SHARE_PUBLICLY:
            print("🔗 Public link will be displayed above")
            
    except Exception as e:
        print(f"❌ Failed to auto-launch Gradio interface: {str(e)}")
        print("💡 You can manually launch with: launch_gradio_interface()")
        
else:
    print("\n⏸️ Auto-launch disabled")
    print("💡 To launch manually: interface = launch_gradio_interface()")
    print("🔧 To enable auto-launch: Set AUTO_LAUNCH = True in this cell")

print("\n" + "=" * 60)
print("🎉 ADVANCED RESEARCH SYSTEM FULLY OPERATIONAL!")
print("=" * 60)
print("📚 Complete system with:")
print("   ✅ Multi-engine web scraping (DuckDuckGo + Brave)")
print("   ✅ AI-powered agentic search")
print("   ✅ Dual-model evaluation (GPT-4 + Claude)")
print("   ✅ Deep search capabilities (ENABLED!)")
print("   ✅ Beautiful Gradio web interface with Deep Search tab")
print("   ✅ Fully configurable parameters")
print("   ✅ Robust error handling with fallbacks")
print("\n🔥 Enhanced Explorer Features:")
print("   ✅ Real-time link discovery and display")
print("   ✅ Detailed scraping progress with statistics")
print("   ✅ Comprehensive JSON saving with metadata")
print("   ✅ Performance metrics and error tracking")
print("   ✅ Source-by-source analysis and breakdown")
print("   ✅ Automatic file type detection in analysis")
print("\n🚀 Ready for advanced research tasks!")
print("🔥 Deep Search now available in both notebook and Gradio interface!")
print("📊 Explorer JSON data now provides complete transparency!")

✅ Environment and directories initialized!
✅ Configuration and models loaded!
✅ HTML parsing utilities loaded!
🚫 Google search functions disabled (uncomment to enable)
✅ DuckDuckGo search functions loaded!
✅ Brave search functions loaded!
✅ Web scraping functions loaded!
✅ Enhanced explorer search orchestration loaded!
✅ Agentic search agents initialized!
📊 Configuration: 3 searches, medium context, 1000+ word reports
🔥 Deep search agents ENABLED and ready!
✅ Agentic search execution functions loaded!
✅ Deep search execution functions loaded!
🔧 Deep search configuration: 5 iterations, refinement=enabled
✅ Universal JSON evaluator loaded!
✅ API error handling utilities loaded!
💡 Use test_api_availability() to check current API status
💡 Fallback evaluations activate automatically when needed
✅ Evaluation orchestration functions loaded!
✅ Combined search orchestration loaded!
✅ Jupyter execution interface ready!
💡 Available functions:
   - await run_research('your query')           # Agen


############################################################
# RESEARCHING: write a pytho3 program To help Halim to create similar box sets in the future, we would like to create a Python algorithm that suggests a possible solution for any given set of jars.

We will use a list to store the collection of jars available. For instance, using the current set of 10 jars, our list would be as follows:
jars = [150,20,20,10,80,130,110,90,100,40]

The aim of this challenge is to use a backtracking / recursive algorithm to work out a possible solution for this puzzle by creating three lists of jars, using the values from the above jars list to create 3 box sets of exactly 250g each.
############################################################

🔍 PHASE 1: DEEP SEARCH

📋 Planning searches...

🌐 Performing web searches...

📝 Writing comprehensive report...

📄 INITIAL RESEARCH REPORT


# Report on Backtracking Algorithm for Jar Partitioning Problem

## Executive Summary  
The challenge involves partitioning a given set of jars, each with a specified weight, into three separate box sets, with each set containing a total weight of 250 grams. Given the jars represented as a list, the algorithm employs a backtracking approach to explore potential combinations systematically. This report synthesizes the approach taken, the implementation details, findings, and actionable insights.

## Introduction  
Halim needs assistance in creating box sets from a collection of jars, which can be described as a combination problem—a form of the partition problem in computational complexity. This problem is well-known in combinatorial optimization and requires splitting a list of integers into subsets meeting specific criteria. Here, the criteria are defined as achieving equal weights across subsets, specifically at 250 grams per box. 

### Problem Statement  
Given the list of jars' weights:  
```python  
jars = [150, 20, 20, 10, 80, 130, 110, 90, 100, 40]  
```  
The goal is to implement an algorithm that analyzes these weights and suggests valid partitions to form three separate box sets, effectively utilizing the backtracking approach.

## Research Findings  
The partitioning problem is NP-complete, inherently exhibiting challenges when scaling data size. However, backtracking algorithms can effectively navigate smaller datasets by exploring all possible partitions and systematically validating results.

### Key Steps in Backtracking Algorithm  
To tackle the jar partition problem, the backtracking approach involves the following:

1. **Sort the Array**: The jar weights are sorted in descending order to optimize the search process and quickly eliminate combinations that exceed target weights.
2. **Calculate Target Sum**: The total weight of jars is computed. If it's not dividable by three, valid box sets cannot be formed, and the function exits early.
3. **Define Backtracking Function**: This recursive function will attempt to build each box set by evaluating weights one by one. If a weight exceeds the box capacity (250g), the algorithm backtracks and tries different combinations.
4. **Base Case**: The recursion concludes when all jars are processed, and each box set accumulates the required weights.

### Backtracking Code Implementation  
Below is the sample Python implementation of the backtracking algorithm to solve the jar partitioning problem:

```python  
def can_partition(jars, target_weight=250, num_boxes=3):  
    def backtrack(index, current_sums):  
        # Check if all boxes have reached the target weight  
        if all(weight == target_weight for weight in current_sums):  
            return True  
        if current_sums.count(target_weight) == num_boxes:  
            return True  
        if index >= len(jars):  
            return False  
        for i in range(num_boxes):  
            if current_sums[i] + jars[index] <= target_weight:  
                current_sums[i] += jars[index]  
                if backtrack(index + 1, current_sums):  
                    return True  
                current_sums[i] -= jars[index]  
            if current_sums[i] == 0:  
                break  
        return False  
  
    return backtrack(0, [0] * num_boxes)  

# Example Usage  
jars = [150, 20, 20, 10, 80, 130, 110, 90, 100, 40]  
result = can_partition(jars)  
print("Can partition into three sets of 250g: ", result)  
```  

### Explanation of Code  
- **Function Definition**: `can_partition()` takes a list of jars, the target weight for each box, and the number of boxes. 
- **Recursive Function**: The inner function `backtrack()` handles indices and current sums of the box weights. It checks if a configuration meets the weight requirement, if the weight surpasses the limit, or if it should continue exploring other configurations.
- **Optimal Check**: The condition `if current_sums[i] == 0: break` prevents unnecessary further checks if the current box is still empty, optimizing performance further.

## Main Findings  
- **Efficiency**: The backtracking method is feasible for relatively small datasets, successfully partitioning jars into three box sets.
- **Performance Implications**: While this approach offers a clear path for smaller problems, its exponential nature poses limitations with larger datasets or increased constraints.
- **Compression Techniques**: Analyzing ways to preprocess weights can enhance efficiency, potentially applying dynamic programming for expanded datasets.

## Discussion  
The algorithm offers a foundational approach to partition problems. Future enhancements could involve integrating dynamic programming techniques to manage larger sets more effectively or employing heuristic methods for approximation when exact solutions become computationally prohibitive.

### Considerations  
- **Edge Cases**: Scenarios where the total weight isn’t exactly dividable by three need handling to avoid runtime errors.
- **Scalability**: As the dataset grows, switching to a more efficient method (e.g., dynamic programming) will likely be necessary to maintain performance and speed.

## Conclusions  
The development of a backtracking algorithm for partitioning jar weights effectively demonstrates practical applications of recursion and combinatorial optimization in Python. The approach lays the groundwork for addressing more complex partitioning problems while offering insights into computational limitations and performance optimizations. Future adjustments, particularly in data handling and larger data sets, will be pivotal for creating scalable solutions.

## Actionable Insights  
1. Enhance the current function to handle larger inputs efficiently through preprocessing or alternative algorithms.
2. Conduct further research on hybrid approaches combining backtracking with heuristic or approximation strategies for quicker results in larger datasets.
3. Implement error handling to manage edge cases more robustly, particularly with unevenly weighted sets.

---  
This report encapsulates significant findings and approaches in developing an effective solution to Halim's jar partition problem, contributing towards systematic partitioning problems and algorithmic exploration in Python.


🔬 PHASE 2: DUAL-MODEL EVALUATION
🤖 Running GPT-4 evaluation...
🤖 GPT-4 evaluation attempt 1/3...
🧠 Running Claude evaluation...
🧠 Claude evaluation attempt 1/3...
⚠️ Claude attempt 1 failed: Extra data: line 42 column 1 (char 1571)
🚫 Claude error: Extra data: line 42 column 1 (char 1571), using fallback evaluation...
🔧 Generating fallback Claude evaluation...
🔄 Building evaluation consensus...


C:\Users\deepa\AppData\Local\Temp\ipykernel_18480\1201890045.py:1174: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  gpt_scores = gpt_eval.criteria_scores.dict()
C:\Users\deepa\AppData\Local\Temp\ipykernel_18480\1201890045.py:1175: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  claude_scores = claude_eval.criteria_scores.dict()



# 📊 Research Quality Evaluation

## 🎯 Query Analysis
**Original Query**: write a pytho3 program To help Halim to create similar box sets in the future, we would like to create a Python algorithm that suggests a possible solution for any given set of jars.

We will use a list to store the collection of jars available. For instance, using the current set of 10 jars, our list would be as follows:
jars = [150,20,20,10,80,130,110,90,100,40]

The aim of this challenge is to use a backtracking / recursive algorithm to work out a possible solution for this puzzle by creating three lists of jars, using the values from the above jars list to create 3 box sets of exactly 250g each.
**Content Type**: Agentic

## ⚠️ API Status
- **GPT-4**: ✅ API evaluation successful
- **Claude**: Used fallback evaluation (API overloaded)

*Note: Fallback evaluations use heuristic scoring and may be less accurate.*

## 📈 Evaluation Scores

### GPT-4 Assessment 
- **Overall Score**: 7.8/10
- **Accuracy**: 8.0/10
- **Completeness**: 7.0/10  
- **Relevance**: 9.0/10
- **Clarity**: 8.0/10
- **Depth**: 7.0/10

### Claude Assessment (Fallback)
- **Overall Score**: 6.9/10
- **Accuracy**: 6.0/10
- **Completeness**: 8.0/10
- **Relevance**: 7.0/10  
- **Clarity**: 6.5/10
- **Depth**: 7.0/10

### 🤝 Consensus Analysis
- **Consensus Score**: 8.7/10
- **Quality Rating**: ✅ HIGH QUALITY
- **Note**: Consensus may be affected by API fallbacks

## 💪 Strengths
- The content provides a clear and structured approach to solving the jar partitioning problem using a backtracking algorithm.
- The Python code implementation is well-documented and demonstrates the application of the backtracking technique effectively.
- The report includes a logical explanation of the problem and the steps involved in the algorithm, which enhances understanding.

## 🔧 Areas for Improvement  
- The report lacks a detailed analysis of potential limitations or challenges associated with the algorithm, such as performance issues with larger datasets.
- There is limited discussion on alternative approaches or optimizations that could be considered for this problem.
- The explanation of the code could be expanded to include more detailed comments on each step of the algorithm.

## 🎯 Recommendations
- Expand the discussion to include potential optimizations or alternative approaches to solving the partition problem.
- Include a section on the computational complexity of the algorithm to provide insights into its efficiency and scalability.
- Verify key facts with additional sources
- Re-evaluate when Claude API is available
- Consider expert review for technical content

---
*Evaluation completed at 2025-08-18 19:33:21*
*Some evaluations used fallback methods due to API limitations*



💾 Evaluation saved to: d:\Workspace\LLMs_projects\agents\DeepSearch\workspace\data\evaluation_20250818_193321.json

🔧 PHASE 3: QUALITY CHECK

✅ Research Quality: APPROVED
Consensus Score: 8.7/10
Quality Threshold: 7.5/10

✨ DEEP SEARCH WITH EVALUATION COMPLETE

📊 Summary for 'write a pytho3 program To help Halim to create similar box sets in the future, we would like to create a Python algorithm that suggests a possible solution for any given set of jars.

We will use a list to store the collection of jars available. For instance, using the current set of 10 jars, our list would be as follows:
jars = [150,20,20,10,80,130,110,90,100,40]

The aim of this challenge is to use a backtracking / recursive algorithm to work out a possible solution for this puzzle by creating three lists of jars, using the values from the above jars list to create 3 box sets of exactly 250g each.':
   - Report length: 6322 characters
   - GPT-4 Score: 7.8/10
   - Claude Score: 6.9/10
   - Consensus: 8.7/10
   