In [1]:
# SECTION 1: System Initialization & Diagnostics
# ===============================================

import sys
import os
from pathlib import Path
import importlib
from datetime import datetime

print("🔧 PyNucleus Developer Environment - Starting Initialization...")
print(f"📅 Session started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Add src to Python path
src_path = str(Path().resolve() / 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import all PyNucleus components
try:
    from pynucleus.pipeline import RAGPipeline, DWSIMPipeline, ResultsExporter, PipelineUtils
    from pynucleus.integration.config_manager import ConfigManager
    from pynucleus.integration.dwsim_rag_integrator import DWSIMRAGIntegrator
    from pynucleus.integration.llm_output_generator import LLMOutputGenerator
    from pynucleus.llm import LLMRunner
    from pynucleus.llm.query_llm import LLMQueryManager
    
    print("✅ All PyNucleus modules imported successfully")
    
    # Initialize components
    pipeline = PipelineUtils(results_dir="data/05_output/results")
    config_manager = ConfigManager(config_dir="configs")
    
    # Fixed: DWSIMRAGIntegrator only accepts results_dir parameter
    dwsim_rag_integrator = DWSIMRAGIntegrator(results_dir="data/05_output/results")
    
    llm_generator = LLMOutputGenerator(results_dir="data/05_output/llm_reports")
    
    print("✅ Core components initialized")
    print("🎯 Ready for development and testing!")
    
except Exception as e:
    print(f"❌ Initialization error: {e}")
    import traceback
    traceback.print_exc()

🔧 PyNucleus Developer Environment - Starting Initialization...
📅 Session started: 2025-06-18 14:51:38
✅ All PyNucleus modules imported successfully


  from .autonotebook import tqdm as notebook_tqdm
  embeddings = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')


✅ Core components initialized
🎯 Ready for development and testing!


In [2]:
# SECTION 1.2: Comprehensive System Diagnostic
# ============================================

print("🔍 Running Comprehensive System Diagnostic...")

try:
    # Run system diagnostic
    import subprocess
    result = subprocess.run([
        sys.executable, "scripts/comprehensive_system_diagnostic.py", "--quiet"
    ], capture_output=True, text=True, cwd=".")
    
    if result.returncode == 0:
        print("✅ System diagnostic completed successfully")
        # Extract key metrics from output
        lines = result.stdout.strip().split('\n')
        for line in lines[-10:]:  # Show last 10 lines for summary
            if any(keyword in line for keyword in ['Health:', 'Status:', 'EXCELLENT', 'GOOD', 'passed']):
                print(f"   {line}")
    else:
        print("⚠️ System diagnostic issues detected:")
        print(result.stderr)
        
except Exception as e:
    print(f"❌ Could not run system diagnostic: {e}")
    print("💡 Continuing with manual checks...")

# Manual system checks
print("\n🔍 Manual System Checks:")

# Check data directories
data_dirs = ['data/01_raw', 'data/02_processed', 'data/03_intermediate', 'data/04_models', 'data/05_output']
for dir_path in data_dirs:
    exists = Path(dir_path).exists()
    print(f"   {'✅' if exists else '❌'} {dir_path}")

# Check src structure
src_dirs = ['src/pynucleus/pipeline', 'src/pynucleus/rag', 'src/pynucleus/integration', 'src/pynucleus/llm']
for dir_path in src_dirs:
    exists = Path(dir_path).exists()
    print(f"   {'✅' if exists else '❌'} {dir_path}")

print("\n🎯 System diagnostic complete!")


🔍 Running Comprehensive System Diagnostic...
⚠️ System diagnostic issues detected:
  main()


🔍 Manual System Checks:
   ✅ data/01_raw
   ✅ data/02_processed
   ✅ data/03_intermediate
   ✅ data/04_models
   ✅ data/05_output
   ✅ src/pynucleus/pipeline
   ✅ src/pynucleus/rag
   ✅ src/pynucleus/integration
   ✅ src/pynucleus/llm

🎯 System diagnostic complete!


In [3]:
# SECTION 1.3: Pipeline Status and Health Check
# =============================================

print("📊 Detailed Pipeline Status Check...")

try:
    # Quick test with validation (this method exists)
    test_results = pipeline.quick_test()
    
    print("\n" + "="*50)
    
    # Validate test_results
    if test_results is None:
        print("❌ Quick test returned None - pipeline may not be properly initialized")
        test_results = {
            'results_dir': 'data/05_output/results',
            'csv_files_count': 0,
            'csv_files': []
        }
    
    # Safely access results with fallbacks
    results_dir = test_results.get('results_dir', 'data/05_output/results')
    csv_files_count = test_results.get('csv_files_count', 0)
    csv_files = test_results.get('csv_files', [])
    
    print(f"📁 Results Directory: {results_dir}")
    print(f"📄 CSV Files: {csv_files_count}")
    
    if csv_files_count > 0:
        print("\n📋 Existing Files:")
        for file_info in csv_files:
            if isinstance(file_info, dict):
                name = file_info.get('name', 'Unknown')
                size = file_info.get('size', 0)
                print(f"   • {name} ({size} bytes)")
            else:
                print(f"   • {file_info}")
    
    # Component health check
    print(f"\n🔧 Component Health:")
    print(f"   • Pipeline Utils: {'✅' if pipeline else '❌'}")
    print(f"   • RAG Pipeline: {'✅' if hasattr(pipeline, 'rag_pipeline') and pipeline.rag_pipeline else '❌'}")
    print(f"   • DWSIM Pipeline: {'✅' if hasattr(pipeline, 'dwsim_pipeline') and pipeline.dwsim_pipeline else '❌'}")
    print(f"   • Config Manager: {'✅' if 'config_manager' in locals() and config_manager else '❌'}")
    print(f"   • DWSIM-RAG Integrator: {'✅' if 'dwsim_rag_integrator' in locals() and dwsim_rag_integrator else '❌'}")
    print(f"   • LLM Generator: {'✅' if 'llm_generator' in locals() and llm_generator else '❌'}")
    
    # Additional pipeline status info
    component_status = test_results.get('component_status', {})
    print(f"\n🔍 Pipeline Component Status:")
    print(f"   • RAG Pipeline: {'✅' if component_status.get('rag_pipeline', False) else '❌'}")
    print(f"   • DWSIM Pipeline: {'✅' if component_status.get('dwsim_pipeline', False) else '❌'}")
    print(f"   • Results Exporter: {'✅' if component_status.get('exporter', False) else '❌'}")
    
    # Integration status
    integration_enabled = test_results.get('integration_enabled', False)
    rag_chunks = test_results.get('rag_chunks', 0)
    simulation_chunks = test_results.get('simulation_chunks', 0)
    
    print(f"\n🔗 Integration Status:")
    print(f"   • Total RAG Chunks: {rag_chunks:,}")
    print(f"   • Simulation Chunks: {simulation_chunks:,}")
    print(f"   • Integration Active: {'✅' if integration_enabled else '⚪'}")
    
    print("\n✅ Status check complete!")
    
except Exception as e:
    print(f"❌ Status check error: {e}")
    import traceback
    traceback.print_exc()
    
    # Provide troubleshooting tips
    print("\n🔧 Troubleshooting Tips:")
    print("   1. Try restarting the notebook kernel")
    print("   2. Re-run Cell 1 to reinitialize components")
    print("   3. Check if all required directories exist")
    print("   4. Verify PyNucleus installation is complete")

📊 Detailed Pipeline Status Check...

📁 Results Directory: data/05_output/results
📄 CSV Files: 2

📋 Existing Files:
   • dev_simulation_config.csv (171 bytes)
   • bulk_modular_plants_template.csv (1372 bytes)

🔧 Component Health:
   • Pipeline Utils: ✅
   • RAG Pipeline: ✅
   • DWSIM Pipeline: ✅
   • Config Manager: ✅
   • DWSIM-RAG Integrator: ✅
   • LLM Generator: ✅

🔍 Pipeline Component Status:
   • RAG Pipeline: ❌
   • DWSIM Pipeline: ❌
   • Results Exporter: ❌

🔗 Integration Status:
   • Total RAG Chunks: 0
   • Simulation Chunks: 0
   • Integration Active: ⚪

✅ Status check complete!


In [4]:
# SECTION 2.1: Configuration Templates and Management
# ===================================================

print("🔧 Advanced Configuration Management...")

# Create configuration templates
try:
    # List existing configuration files
    existing_configs = config_manager.list_configs()
    print(f"📋 Existing Configuration Files: {len(existing_configs)}")
    for config_file in existing_configs:
        print(f"   • {config_file}")
    
    # Create a sample configuration template
    sample_config = {
        "simulations": [
            {
                "case_name": "dev_test_case_1",
                "temperature": 350.0,
                "pressure": 2.5,
                "feed_rate": 100.0,
                "catalyst_type": "Pt/Al2O3",
                "process_type": "distillation"
            },
            {
                "case_name": "dev_test_case_2", 
                "temperature": 375.0,
                "pressure": 3.0,
                "feed_rate": 120.0,
                "catalyst_type": "Pd/C",
                "process_type": "reaction"
            }
        ],
        "metadata": {
            "created_by": "Developer_Notebook",
            "version": "1.0",
            "description": "Development configuration template"
        }
    }
    
    # Save JSON template
    json_template_path = config_manager.save(sample_config, "dev_simulation_config.json")
    print(f"✅ JSON template created: {json_template_path}")
    
    # Save CSV template (will extract simulations data)
    csv_template_path = config_manager.save(sample_config, "dev_simulation_config.csv")
    print(f"✅ CSV template created: {csv_template_path}")
    
    # Show template contents (first few lines)
    if json_template_path.exists():
        with open(json_template_path, 'r') as f:
            content = f.read()[:300]
            print(f"\n📋 JSON Template Preview:")
            print(content + "..." if len(content) >= 300 else content)
    
    # Test loading the configuration back
    loaded_config = config_manager.load("dev_simulation_config.json")
    print(f"\n🔄 Template validation: {len(loaded_config['simulations'])} simulations loaded")
    
except Exception as e:
    print(f"❌ Configuration error: {e}")
    import traceback
    traceback.print_exc()

print("\n✅ Configuration management ready!")

🔧 Advanced Configuration Management...
📋 Existing Configuration Files: 5
   • bulk_modular_plants_template.csv
   • bulk_modular_plants_template.json
   • dev_simulation_config.csv
   • dev_simulation_config.json
   • perf_test.json
✅ JSON template created: configs/dev_simulation_config.json
✅ CSV template created: configs/dev_simulation_config.csv

📋 JSON Template Preview:
{
  "simulations": [
    {
      "case_name": "dev_test_case_1",
      "temperature": 350.0,
      "pressure": 2.5,
      "feed_rate": 100.0,
      "catalyst_type": "Pt/Al2O3",
      "process_type": "distillation"
    },
    {
      "case_name": "dev_test_case_2",
      "temperature": 375.0,
      "...

🔄 Template validation: 2 simulations loaded

✅ Configuration management ready!


In [5]:
# SECTION 2.2: Run Enhanced Pipeline with Full Analysis
# =====================================================

print("🚀 Running Enhanced Pipeline for Development Testing...")

# Run complete pipeline with detailed logging
try:
    start_time = datetime.now()
    
    # Execute pipeline
    results = pipeline.run_complete_pipeline()
    
    if results:
        duration = (datetime.now() - start_time).total_seconds()
        print(f"\n🎉 Pipeline completed in {duration:.1f} seconds!")
        
        # Detailed results analysis
        print(f"\n📊 Detailed Results:")
        print(f"   • RAG Queries: {len(results.get('rag_data', []))}")
        print(f"   • DWSIM Simulations: {len(results.get('dwsim_data', []))}")
        print(f"   • Export Files: {len(results.get('exported_files', []))}")
        
        # Set up integrator with pipeline data
        if hasattr(pipeline, 'rag_pipeline'):
            dwsim_rag_integrator.rag_pipeline = pipeline.rag_pipeline
        
        print("✅ Pipeline data ready for enhanced analysis")
        
    else:
        print("❌ Pipeline execution failed")
        
except Exception as e:
    print(f"❌ Enhanced pipeline error: {e}")
    import traceback
    traceback.print_exc()

print("\n✅ Enhanced pipeline testing complete!")


🚀 Running Enhanced Pipeline for Development Testing...


Failed to export RAG results: Object of type float32 is not JSON serializable



🎉 Pipeline completed in 14.4 seconds!

📊 Detailed Results:
   • RAG Queries: 3
   • DWSIM Simulations: 3
   • Export Files: 1
✅ Pipeline data ready for enhanced analysis

✅ Enhanced pipeline testing complete!


In [6]:
# SECTION 3.1: DWSIM-RAG Integration and Enhanced Analysis
# ========================================================

print("🔬 Advanced DWSIM-RAG Integration Analysis...")

try:
    # Get DWSIM results
    dwsim_results = pipeline.dwsim_pipeline.get_results()
    
    if dwsim_results:
        print(f"📊 Processing {len(dwsim_results)} DWSIM simulations...")
        
        # Perform enhanced integration
        integrated_results = dwsim_rag_integrator.integrate_simulation_results(
            dwsim_results, perform_rag_analysis=True
        )
        
        # Export integrated results
        integrated_export_file = dwsim_rag_integrator.export_integrated_results()
        
        print(f"✅ Enhanced integration complete:")
        print(f"   • Integrated simulations: {len(integrated_results)}")
        print(f"   • Export file: {integrated_export_file}")
        
        # Show detailed analysis for first simulation
        if integrated_results:
            sample = integrated_results[0]
            print(f"\n📋 Sample Analysis (First Simulation):")
            
            # Safely access original simulation data
            original_sim = sample.get('original_simulation', {})
            print(f"   • Case: {original_sim.get('case_name', 'Unknown')}")
            
            # Performance metrics
            perf_metrics = sample.get('performance_metrics', {})
            if perf_metrics:
                print(f"   📊 Performance Metrics:")
                for key, value in perf_metrics.items():
                    display_key = key.replace('_', ' ').title()
                    if isinstance(value, (int, float)):
                        if 'rate' in key.lower() or 'percentage' in key.lower():
                            print(f"      • {display_key}: {value:.1f}%")
                        elif isinstance(value, float):
                            print(f"      • {display_key}: {value:.3f}")
                        else:
                            print(f"      • {display_key}: {value}")
                    else:
                        print(f"      • {display_key}: {value}")
            else:
                print(f"   ⚠️ No performance metrics available")
        
    else:
        print("⚠️ No DWSIM results available. Run Section 2.2 first.")
        
except Exception as e:
    print(f"❌ Integration error: {e}")
    import traceback
    traceback.print_exc()

print("\n✅ Advanced integration analysis complete!")


🔬 Advanced DWSIM-RAG Integration Analysis...
📊 Processing 3 DWSIM simulations...
✅ Enhanced integration complete:
   • Integrated simulations: 3
   • Export file: data/05_output/results/integrated_results_20250618_145159.json

📋 Sample Analysis (First Simulation):
   • Case: distillation_ethanol_water
   📊 Performance Metrics:
      • Conversion: 0.850
      • Selectivity: 0.920
      • Yield: 0.782
      • Overall Performance: Good
      • Efficiency Rating: High
      • Temperature Rating: Suboptimal
      • Pressure Rating: Optimal

✅ Advanced integration analysis complete!


In [7]:
# SECTION 3.2: LLM Report Generation and Financial Analysis
# =========================================================

print("💰 Advanced Financial Analysis and LLM Report Generation...")

try:
    if 'integrated_results' in locals() and integrated_results:
        
        # Generate LLM reports for all simulations
        print(f"📄 Generating LLM reports for {len(integrated_results)} simulations...")
        
        llm_report_files = []
        for i, result in enumerate(integrated_results):
            try:
                report_file = llm_generator.export_llm_ready_text(result)
                llm_report_files.append(report_file)
                print(f"   ✅ Report {i+1}: {Path(report_file).name}")
            except Exception as e:
                print(f"   ❌ Report {i+1} failed: {e}")
        
        # Generate comprehensive financial analysis
        financial_file = llm_generator.export_financial_analysis(integrated_results)
        metrics = llm_generator._calculate_key_metrics(integrated_results)
        
        print(f"\n💰 Comprehensive Financial Metrics:")
        print(f"   • Average Recovery Rate: {metrics['avg_recovery']:.1f}%")
        print(f"   • Estimated Daily Revenue: ${metrics['estimated_revenue']:,.2f}")
        print(f"   • Estimated Daily Profit: ${metrics['net_profit']:,.2f}")
        print(f"   • Return on Investment: {metrics['roi']:.1f}%")
        print(f"   • Financial Analysis File: {financial_file}")
        
        print(f"\n📄 Generated Files:")
        print(f"   • LLM Reports: {len(llm_report_files)} files")
        print(f"   • Financial Analysis: 1 file")
        
    else:
        print("⚠️ No integrated results available. Run Section 3.1 first.")
        
except Exception as e:
    print(f"❌ LLM/Financial analysis error: {e}")
    import traceback
    traceback.print_exc()

print("\n✅ Advanced analysis and reporting complete!")


💰 Advanced Financial Analysis and LLM Report Generation...
📄 Generating LLM reports for 3 simulations...
   ✅ Report 1: llm_analysis_distillation_ethanol_water_20250618_145159.md
   ✅ Report 2: llm_analysis_methanol_synthesis_20250618_145159.md
   ✅ Report 3: llm_analysis_heat_exchanger_optimization_20250618_145159.md

💰 Comprehensive Financial Metrics:
   • Average Recovery Rate: 85.0%
   • Estimated Daily Revenue: $997,050.00
   • Estimated Daily Profit: $-2,950.00
   • Return on Investment: -43.2%
   • Financial Analysis File: data/05_output/llm_reports/financial_analysis_20250618_145159.md

📄 Generated Files:
   • LLM Reports: 3 files
   • Financial Analysis: 1 file

✅ Advanced analysis and reporting complete!


In [8]:
# SECTION 4.1: LLM Model Testing and Initialization
# =================================================

print("🤖 LLM Development Environment Initialization...")

try:
    # Initialize LLM components
    llm_runner = LLMRunner()
    llm_query_manager = LLMQueryManager()  # Fixed: removed max_tokens parameter
    
    # Test LLM functionality
    print(f"✅ LLM Runner initialized")
    print(f"✅ LLM Query Manager initialized")
    
    # Get model information
    model_info = llm_runner.get_model_info()
    print(f"\n🔧 Model Information:")
    print(f"   • Model ID: {model_info['model_id']}")
    
    # Handle the actual keys returned by get_model_info()
    if 'num_parameters' in model_info:
        print(f"   • Parameters: {model_info['num_parameters']:,}")
    if 'device' in model_info:
        print(f"   • Device: {model_info['device']}")
    if 'dtype' in model_info:
        print(f"   • Data Type: {model_info['dtype']}")
    if 'memory_footprint' in model_info:
        memory = model_info['memory_footprint']
        if isinstance(memory, int):
            print(f"   • Memory: {memory / 1024 / 1024:.1f} MB")
        else:
            print(f"   • Memory: {memory}")
    
    # Check for errors in model info
    if 'error' in model_info:
        print(f"   ⚠️ Model Info Error: {model_info['error']}")
    
    # Test basic prompt rendering
    test_prompt = llm_query_manager._create_general_query_prompt(
        question="Test chemical process optimization query",
        context="You are a chemical engineering expert."
    )
    
    print(f"\n📋 Prompt System Test:")
    print(f"   • Template rendering: ✅ Success")
    print(f"   • Prompt length: {len(test_prompt)} characters")
    
    # Test tokenizer access if available
    if hasattr(llm_runner, 'tokenizer') and llm_runner.tokenizer:
        vocab_size = len(llm_runner.tokenizer)
        print(f"   • Vocabulary Size: {vocab_size:,}")
    
except Exception as e:
    print(f"❌ LLM initialization error: {e}")
    import traceback
    traceback.print_exc()

print("\n✅ LLM development environment ready!")

🤖 LLM Development Environment Initialization...
✅ LLM Runner initialized
✅ LLM Query Manager initialized

🔧 Model Information:
   • Model ID: microsoft/DialoGPT-medium
   • Parameters: 354,823,168
   • Device: cpu
   • Data Type: torch.float32
   • Memory: 1377.5 MB

📋 Prompt System Test:
   • Template rendering: ✅ Success
   • Prompt length: 108 characters
   • Vocabulary Size: 50,257

✅ LLM development environment ready!


In [9]:
# SECTION 4.2: Advanced Prompt Engineering and Testing
# ====================================================

print("🎯 Advanced Prompt Engineering and Testing...")

try:
    # Test different prompt scenarios
    test_scenarios = [
        {
            "name": "Process Optimization",
            "query": "How can we optimize the distillation column efficiency?",
            "system": "You are a process optimization expert specializing in distillation systems."
        },
        {
            "name": "Safety Analysis", 
            "query": "What safety considerations are important for modular chemical plants?",
            "system": "You are a chemical safety engineer with expertise in process hazard analysis."
        },
        {
            "name": "Economic Assessment",
            "query": "Analyze the economic benefits of modular plant design.",
            "system": "You are a chemical engineering economist specializing in plant design economics."
        }
    ]
    
    print(f"🧪 Testing {len(test_scenarios)} prompt scenarios...")
    
    for i, scenario in enumerate(test_scenarios):
        print(f"\n📋 Scenario {i+1}: {scenario['name']}")
        
        try:
            # Render prompt
            prompt = llm_query_manager._create_general_query_prompt(
                question=scenario['query'],
                context=scenario['system']
            )
            
            print(f"   ✅ Prompt rendered successfully ({len(prompt)} chars)")
            
            # Quick test with LLM (generate a short response) - FIXED: use generate_response
            response_result = llm_runner.generate_response(
                prompt=scenario['query'],
                max_length=100,
                temperature=0.7,
                do_sample=True
            )
            
            # Extract the actual response text
            response_text = response_result.get('response', 'No response generated')
            print(f"   ✅ LLM response generated ({len(response_text)} chars)")
            print(f"   📝 Preview: {response_text[:100]}...")
            
        except Exception as e:
            print(f"   ❌ Scenario {i+1} failed: {e}")
    
except Exception as e:
    print(f"❌ Prompt engineering error: {e}")
    import traceback
    traceback.print_exc()

print("\n✅ Prompt engineering and testing complete!")

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


🎯 Advanced Prompt Engineering and Testing...
🧪 Testing 3 prompt scenarios...

📋 Scenario 1: Process Optimization
   ✅ Prompt rendered successfully (160 chars)
   ✅ LLM response generated (0 chars)
   📝 Preview: ...

📋 Scenario 2: Safety Analysis
   ✅ Prompt rendered successfully (176 chars)
   ✅ LLM response generated (0 chars)
   📝 Preview: ...

📋 Scenario 3: Economic Assessment
   ✅ Prompt rendered successfully (164 chars)
   ✅ LLM response generated (0 chars)
   📝 Preview: ...

✅ Prompt engineering and testing complete!


In [10]:
# SECTION 5.1: Performance Analysis and Benchmarking
# ==================================================

print("📈 Performance Analysis and System Benchmarking...")

import time
import psutil
import gc

def measure_performance(func, name, *args, **kwargs):
    """Measure function performance"""
    gc.collect()  # Clean memory before measurement
    
    start_time = time.time()
    start_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
    
    try:
        result = func(*args, **kwargs)
        success = True
        error = None
    except Exception as e:
        result = None
        success = False
        error = str(e)
    
    end_time = time.time()
    end_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
    
    return {
        'name': name,
        'duration': end_time - start_time,
        'memory_used': end_memory - start_memory,
        'success': success,
        'error': error,
        'result': result
    }

# Performance benchmarks
benchmarks = []

print("🧪 Running Performance Benchmarks...")

# Benchmark 1: Pipeline initialization
bench1 = measure_performance(
    lambda: PipelineUtils(results_dir="data/05_output/results"),
    "Pipeline Initialization"
)
benchmarks.append(bench1)

# Benchmark 2: Configuration template creation
bench2 = measure_performance(
    lambda: config_manager.create_template_json("perf_test.json"),
    "Configuration Template Creation"
)
benchmarks.append(bench2)

# Benchmark 3: Quick status check
bench3 = measure_performance(
    pipeline.quick_test,
    "Quick Status Check"
)
benchmarks.append(bench3)

# Display results
print(f"\n📊 Performance Benchmark Results:")
print("-" * 60)
for bench in benchmarks:
    status = "✅" if bench['success'] else "❌"
    print(f"{status} {bench['name']:<30} {bench['duration']:>8.3f}s {bench['memory_used']:>8.1f}MB")
    if not bench['success']:
        print(f"   Error: {bench['error']}")

# System resource usage
print(f"\n💻 Current System Resources:")
print(f"   • CPU Usage: {psutil.cpu_percent():.1f}%")
print(f"   • Memory Usage: {psutil.virtual_memory().percent:.1f}%")
print(f"   • Available Memory: {psutil.virtual_memory().available / 1024 / 1024 / 1024:.1f} GB")

print("\n✅ Performance analysis complete!")


📈 Performance Analysis and System Benchmarking...
🧪 Running Performance Benchmarks...

📊 Performance Benchmark Results:
------------------------------------------------------------
✅ Pipeline Initialization           1.949s   -977.9MB
✅ Configuration Template Creation    0.000s      0.0MB
✅ Quick Status Check                0.001s      0.9MB

💻 Current System Resources:
   • CPU Usage: 29.3%
   • Memory Usage: 84.2%
   • Available Memory: 2.5 GB

✅ Performance analysis complete!


In [11]:
# SECTION 5.2: Debug Tools and System Cleanup
# ============================================

print("🔧 Debug Tools and System Maintenance...")

# System cleanup functions
def cleanup_temp_files():
    """Remove temporary files"""
    temp_patterns = ["perf_test.json", "dev_simulation_config.*"]
    cleaned = 0
    
    for pattern in temp_patterns:
        if "*" in pattern:
            import glob
            files = glob.glob(pattern)
            for file in files:
                try:
                    Path(file).unlink()
                    cleaned += 1
                except:
                    pass
        else:
            try:
                if Path(pattern).exists():
                    Path(pattern).unlink()
                    cleaned += 1
            except:
                pass
    
    return cleaned

def check_log_files():
    """Check system log files"""
    log_dirs = ["logs", "data/05_output/logs"]
    log_files = []
    
    for log_dir in log_dirs:
        if Path(log_dir).exists():
            for log_file in Path(log_dir).glob("*.log"):
                size = log_file.stat().st_size
                log_files.append({
                    'file': str(log_file),
                    'size': size,
                    'age': time.time() - log_file.stat().st_mtime
                })
    
    return log_files

# Run debug tools
print("🗑️ Running System Cleanup...")
cleaned_files = cleanup_temp_files()
print(f"   • Cleaned {cleaned_files} temporary files")

print("\n📋 Checking Log Files...")
log_files = check_log_files()
if log_files:
    print(f"   • Found {len(log_files)} log files:")
    for log in log_files[-5:]:  # Show last 5
        age_hours = log['age'] / 3600
        print(f"     - {Path(log['file']).name} ({log['size']} bytes, {age_hours:.1f}h old)")
else:
    print("   • No log files found")

# Memory cleanup
print("\n💾 Memory Cleanup...")
gc.collect()
print("   • Garbage collection completed")

# Final status
print(f"\n📊 Development Session Summary:")
if 'start_time' in locals():
    print(f"   • Session Duration: {(datetime.now() - start_time).total_seconds():.1f} seconds")
print(f"   • Benchmarks Run: {len(benchmarks)}")
print(f"   • Components Tested: {'✅' if all(b['success'] for b in benchmarks) else '⚠️'}")

print("\n✅ Debug tools and cleanup complete!")


🔧 Debug Tools and System Maintenance...
🗑️ Running System Cleanup...
   • Cleaned 0 temporary files

📋 Checking Log Files...
   • Found 61 log files:
     - system_diagnostic_20250617_220903.log (13408 bytes, 16.7h old)
     - system_diagnostic_20250618_135859.log (16805 bytes, 0.9h old)
     - system_diagnostic_20250618_135654.log (17202 bytes, 0.9h old)
     - system_validation_20250618_010327.log (10283 bytes, 13.8h old)
     - system_diagnostic_20250617_215819.log (13404 bytes, 16.9h old)

💾 Memory Cleanup...
   • Garbage collection completed

📊 Development Session Summary:
   • Session Duration: 19.6 seconds
   • Benchmarks Run: 3
   • Components Tested: ✅

✅ Debug tools and cleanup complete!
