In [18]:
# SECTION 1: System Initialization & Diagnostics
# ===============================================

import sys
import os
from pathlib import Path
import importlib
from datetime import datetime

print("🔧 PyNucleus Developer Environment - Starting Initialization...")
print(f"📅 Session started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Add src to Python path
src_path = str(Path().resolve() / "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import all PyNucleus components
try:
    from pynucleus.pipeline import RAGPipeline, DWSIMPipeline, ResultsExporter, PipelineUtils
    from pynucleus.integration.config_manager import ConfigManager
    from pynucleus.integration.dwsim_rag_integrator import DWSIMRAGIntegrator
    from pynucleus.integration.llm_output_generator import LLMOutputGenerator
    from pynucleus.llm import LLMRunner
    from pynucleus.llm.query_llm import LLMQueryManager, quick_ask_llm
    
    print("✅ All PyNucleus modules imported successfully")
    
    # Initialize components
    pipeline = PipelineUtils(results_dir="data/05_output/results")
    config_manager = ConfigManager(config_dir="configs")
    dwsim_rag_integrator = DWSIMRAGIntegrator(
        rag_pipeline=None,  # Will be set after pipeline initialization
        results_dir="data/05_output/results"
    )
    llm_generator = LLMOutputGenerator(results_dir="data/05_output/llm_reports")
    
    print("✅ Core components initialized")
    print("🎯 Ready for development and testing!")
    
except Exception as e:
    print(f"❌ Initialization error: {e}")
    import traceback
    traceback.print_exc()


🔧 PyNucleus Developer Environment - Starting Initialization...
📅 Session started: 2025-06-18 00:06:42
✅ All PyNucleus modules imported successfully
🔧 Setting up RAG imports...
✅ RAG imports ready!
📂 Loaded 5 existing DWSIM results from disk
🔧 Setting up DWSIM imports...
✅ DWSIM modules imported successfully
📁 Results directory: data/05_output/results
🔧 Pipeline Utils initialized with results dir: data/05_output/results
🔗 DWSIM-RAG integration enabled
✅ Core components initialized
🎯 Ready for development and testing!


In [19]:
# SECTION 1.2: Comprehensive System Diagnostic
# ============================================

print("🔍 Running Comprehensive System Diagnostic...")

try:
    # Run system diagnostic
    import subprocess
    result = subprocess.run([
        sys.executable, "scripts/comprehensive_system_diagnostic.py", "--quiet"
    ], capture_output=True, text=True, cwd=".")
    
    if result.returncode == 0:
        print("✅ System diagnostic completed successfully")
        # Extract key metrics from output
        lines = result.stdout.strip().split('\n')
        for line in lines[-10:]:  # Show last 10 lines for summary
            if any(keyword in line for keyword in ['Health:', 'Status:', 'EXCELLENT', 'GOOD', 'passed']):
                print(f"   {line}")
    else:
        print("⚠️ System diagnostic issues detected:")
        print(result.stderr)
        
except Exception as e:
    print(f"❌ Could not run system diagnostic: {e}")
    print("💡 Continuing with manual checks...")

# Manual system checks
print("\n🔍 Manual System Checks:")

# Check data directories
data_dirs = ['data/01_raw', 'data/02_processed', 'data/03_intermediate', 'data/04_models', 'data/05_output']
for dir_path in data_dirs:
    exists = Path(dir_path).exists()
    print(f"   {'✅' if exists else '❌'} {dir_path}")

# Check src structure
src_dirs = ['src/pynucleus/pipeline', 'src/pynucleus/rag', 'src/pynucleus/integration', 'src/pynucleus/llm']
for dir_path in src_dirs:
    exists = Path(dir_path).exists()
    print(f"   {'✅' if exists else '❌'} {dir_path}")

print("\n🎯 System diagnostic complete!")


🔍 Running Comprehensive System Diagnostic...


Python(76265) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


✅ System diagnostic completed successfully
   🟢 SYSTEM HEALTH: 100.0% - EXCELLENT

🔍 Manual System Checks:
   ✅ data/01_raw
   ✅ data/02_processed
   ✅ data/03_intermediate
   ✅ data/04_models
   ✅ data/05_output
   ✅ src/pynucleus/pipeline
   ✅ src/pynucleus/rag
   ✅ src/pynucleus/integration
   ✅ src/pynucleus/llm

🎯 System diagnostic complete!


In [20]:
# SECTION 1.3: Pipeline Status and Health Check
# =============================================

print("📊 Detailed Pipeline Status Check...")

try:
    # Pipeline component status
    pipeline.print_pipeline_status()
    
    print("\n" + "="*50)
    
    # Quick test with validation
    test_results = pipeline.quick_test()
    
    # Validate test_results
    if test_results is None:
        print("❌ Quick test returned None - pipeline may not be properly initialized")
        test_results = {
            'results_dir': 'data/05_output/results',
            'csv_files_count': 0,
            'csv_files': []
        }
    
    # Safely access results with fallbacks
    results_dir = test_results.get('results_dir', 'data/05_output/results')
    csv_files_count = test_results.get('csv_files_count', 0)
    csv_files = test_results.get('csv_files', [])
    
    print(f"📁 Results Directory: {results_dir}")
    print(f"📄 CSV Files: {csv_files_count}")
    
    if csv_files_count > 0:
        print("\n📋 Existing Files:")
        for file_info in csv_files:
            if isinstance(file_info, dict):
                name = file_info.get('name', 'Unknown')
                size = file_info.get('size', 0)
                print(f"   • {name} ({size} bytes)")
            else:
                print(f"   • {file_info}")
    
    # Component health
    print(f"\n🔧 Component Health:")
    print(f"   • Pipeline Utils: {'✅' if hasattr(pipeline, 'rag_pipeline') else '⚠️'}")
    print(f"   • Config Manager: {'✅' if config_manager else '❌'}")
    print(f"   • DWSIM-RAG Integrator: {'✅' if dwsim_rag_integrator else '❌'}")
    print(f"   • LLM Generator: {'✅' if llm_generator else '❌'}")
    
    # Additional diagnostics
    component_status = test_results.get('component_status', {})
    print(f"\n🔍 Pipeline Component Status:")
    print(f"   • RAG Pipeline: {'✅' if component_status.get('rag_pipeline', False) else '❌'}")
    print(f"   • DWSIM Pipeline: {'✅' if component_status.get('dwsim_pipeline', False) else '❌'}")
    print(f"   • Results Exporter: {'✅' if component_status.get('exporter', False) else '❌'}")
    
    # Integration status
    integration_enabled = test_results.get('integration_enabled', False)
    rag_chunks = test_results.get('rag_chunks', 0)
    simulation_chunks = test_results.get('simulation_chunks', 0)
    
    print(f"\n🔗 Integration Status:")
    print(f"   • Total RAG Chunks: {rag_chunks:,}")
    print(f"   • Simulation Chunks: {simulation_chunks:,}")
    print(f"   • Integration Active: {'✅' if integration_enabled else '⚪'}")
    
except Exception as e:
    print(f"❌ Status check error: {e}")
    import traceback
    traceback.print_exc()
    
    # Provide troubleshooting tips
    print("\n🔧 Troubleshooting Tips:")
    print("   1. Try restarting the notebook kernel")
    print("   2. Re-run Cell 1 to reinitialize components")
    print("   3. Check if all required directories exist")
    print("   4. Verify PyNucleus installation is complete")

print("\n✅ Status check complete!")


📊 Detailed Pipeline Status Check...

🔧 PyNucleus Pipeline Status
⚙️ Configuration:
   Results Directory: data/05_output/results
   LLM Output Directory: data/05_output/llm_reports
   DWSIM Integration: ✅ Enabled

📦 Components:
   RAG Pipeline: ✅ Ready
   DWSIM Pipeline: ✅ Ready
   Results Exporter: ✅ Ready

📊 RAG Pipeline Status:
📚 Total Chunks: 7,141
📄 Document Sources: 19
🔗 Integration Status: Documents only
📏 Average Chunk Size: 787.7 characters
🔍 Vector Store: Not built

🔗 Integration Capabilities:
   📄 Document Processing: ✅ Available
   🔬 Simulation Integration: ✅ Available
   🤖 LLM Querying: ✅ Enhanced (docs + sims)

🧪 Quick Pipeline Test
------------------------------
📚 RAG: 7141 chunks available
🔗 Integration: ⚪ Documents only
📊 DWSIM Statistics:
   • Total Simulations: 5
   • Success Rate: 100.0%
   • Average Duration: 0.00s
🔬 DWSIM: 5 simulations
📁 Output: 2 CSV files
📁 Results Directory: data/05_output/results
📄 CSV Files: 2

📋 Existing Files:
   • dwsim_summary.csv (360 by

In [21]:
# SECTION 2.1: Configuration Templates and Management
# ===================================================

print("🔧 Advanced Configuration Management...")

# Create configuration templates
try:
    # Generate JSON and CSV templates
    json_template = config_manager.create_template_json("dev_simulation_config.json", verbose=True)
    csv_template = config_manager.create_template_csv("dev_simulation_config.csv", verbose=True)
    
    print(f"✅ Configuration templates created:")
    print(f"   • JSON: {json_template}")
    print(f"   • CSV: {csv_template}")
    
    # Show template contents (first few lines)
    if Path(json_template).exists():
        with open(json_template, 'r') as f:
            content = f.read()[:300]
            print(f"\n📋 JSON Template Preview:")
            print(content + "..." if len(content) >= 300 else content)
    
except Exception as e:
    print(f"❌ Configuration error: {e}")

print("\n✅ Configuration management ready!")


🔧 Advanced Configuration Management...
✅ Pydantic template created: configs/dev_simulation_config.json
❌ Configuration error: dict contains fields not in fieldnames: 'catalyst_loading'

✅ Configuration management ready!


In [22]:
# SECTION 2.2: Run Enhanced Pipeline with Full Analysis
# =====================================================

print("🚀 Running Enhanced Pipeline for Development Testing...")

# Run complete pipeline with detailed logging
try:
    start_time = datetime.now()
    
    # Execute pipeline
    results = pipeline.run_complete_pipeline()
    
    if results:
        duration = (datetime.now() - start_time).total_seconds()
        print(f"\n🎉 Pipeline completed in {duration:.1f} seconds!")
        
        # Detailed results analysis
        print(f"\n📊 Detailed Results:")
        print(f"   • RAG Queries: {len(results.get('rag_data', []))}")
        print(f"   • DWSIM Simulations: {len(results.get('dwsim_data', []))}")
        print(f"   • Export Files: {len(results.get('exported_files', []))}")
        
        # Set up integrator with pipeline data
        if hasattr(pipeline, 'rag_pipeline'):
            dwsim_rag_integrator.rag_pipeline = pipeline.rag_pipeline
        
        print("✅ Pipeline data ready for enhanced analysis")
        
    else:
        print("❌ Pipeline execution failed")
        
except Exception as e:
    print(f"❌ Enhanced pipeline error: {e}")
    import traceback
    traceback.print_exc()

print("\n✅ Enhanced pipeline testing complete!")


🚀 Running Enhanced Pipeline for Development Testing...
🚀 Running complete PyNucleus pipeline...
🗑️ RAG results cleared.
🗑️ RAG results cleared.
🗑️ DWSIM results cleared from memory and disk.
🗑️ DWSIM results cleared.
🔬 Step 1: Running DWSIM simulations...
🔬 Starting DWSIM Simulations...
📋 Running 5 simulation cases...

🧪 Case 1/5: distillation_ethanol_water
   ✅ Success - Duration: 0.00s

🧪 Case 2/5: reactor_methane_combustion
   ✅ Success - Duration: 0.00s

🧪 Case 3/5: heat_exchanger_steam
   ✅ Success - Duration: 0.00s

🧪 Case 4/5: absorber_co2_capture
   ✅ Success - Duration: 0.00s

🧪 Case 5/5: crystallizer_salt
   ✅ Success - Duration: 0.00s

📊 Simulation Summary:
   • Successful simulations: 5/5
   • Failed simulations: 0/5
💾 Saved 5 DWSIM results to disk
📊 DWSIM Statistics:
   • Total Simulations: 5
   • Success Rate: 100.0%
   • Average Duration: 0.00s
✅ DWSIM: 5 simulations completed

📚 Step 2: Running RAG pipeline with DWSIM integration...
📚 Starting RAG Pipeline...
Step 1: Pr

In [23]:
# SECTION 3.1: DWSIM-RAG Integration and Enhanced Analysis
# ========================================================

print("🔬 Advanced DWSIM-RAG Integration Analysis...")

try:
    # Get DWSIM results
    dwsim_results = pipeline.dwsim_pipeline.get_results()
    
    if dwsim_results:
        print(f"📊 Processing {len(dwsim_results)} DWSIM simulations...")
        
        # Perform enhanced integration
        integrated_results = dwsim_rag_integrator.integrate_simulation_results(
            dwsim_results, perform_rag_analysis=True
        )
        
        # Export integrated results
        integrated_export_file = dwsim_rag_integrator.export_integrated_results()
        
        print(f"✅ Enhanced integration complete:")
        print(f"   • Integrated simulations: {len(integrated_results)}")
        print(f"   • Export file: {integrated_export_file}")
        
        # Show detailed analysis for first simulation
        if integrated_results:
            sample = integrated_results[0]
            print(f"\\n📋 Sample Analysis (First Simulation):")
            
            # Safely access original simulation data
            original_sim = sample.get('original_simulation', {})
            print(f"   • Case: {original_sim.get('case_name', 'Unknown')}")
            
            # Dynamically display all available performance metrics
            perf_metrics = sample.get('performance_metrics', {})
            if perf_metrics:
                print(f"   📊 Performance Metrics:")
                for key, value in perf_metrics.items():
                    # Format the key for display (convert snake_case to Title Case)
                    display_key = key.replace('_', ' ').title()
                    
                    # Handle different value types appropriately
                    if isinstance(value, (int, float)):
                        if 'rate' in key.lower() or 'percentage' in key.lower():
                            print(f"      • {display_key}: {value:.1f}%")
                        elif isinstance(value, float):
                            print(f"      • {display_key}: {value:.3f}")
                        else:
                            print(f"      • {display_key}: {value}")
                    elif isinstance(value, dict):
                        print(f"      • {display_key}: {len(value)} items")
                        # Show nested dict items if not too many
                        if len(value) <= 5:
                            for sub_key, sub_value in value.items():
                                sub_display_key = sub_key.replace('_', ' ').title()
                                print(f"        - {sub_display_key}: {sub_value}")
                    elif isinstance(value, list):
                        print(f"      • {display_key}: {len(value)} items")
                    else:
                        print(f"      • {display_key}: {value}")
            else:
                print(f"   ⚠️ No performance metrics available")
            
            # Show other analysis results dynamically
            analysis_sections = [
                ('potential_issues', 'Potential Issues'),
                ('recommendations', 'Recommendations'), 
                ('optimization_opportunities', 'Optimization Opportunities'),
                ('rag_insights', 'RAG Insights')
            ]
            
            for section_key, section_title in analysis_sections:
                section_data = sample.get(section_key, [])
                if section_data:
                    print(f"   📋 {section_title}: {len(section_data)} items")
                    # Show first few items as examples
                    for i, item in enumerate(section_data[:3]):
                        if isinstance(item, dict):
                            # For RAG insights or complex items
                            item_summary = item.get('content', item.get('query', str(item)))[:100]
                            print(f"      {i+1}. {item_summary}...")
                        else:
                            # For simple string items
                            print(f"      {i+1}. {item}")
                    if len(section_data) > 3:
                        print(f"      ... and {len(section_data) - 3} more")
        
    else:
        print("⚠️ No DWSIM results available. Run Section 2.2 first.")
        
except Exception as e:
    print(f"❌ Integration error: {e}")
    import traceback
    traceback.print_exc()

print("\\n✅ Advanced integration analysis complete!")


🔬 Advanced DWSIM-RAG Integration Analysis...
📊 Processing 5 DWSIM simulations...
✅ Enhanced 5 simulations with RAG insights
✅ Enhanced integration complete:
   • Integrated simulations: 5
   • Export file: data/05_output/results/integrated_dwsim_rag_results_20250618_000819.json
\n📋 Sample Analysis (First Simulation):
   • Case: distillation_ethanol_water
   📊 Performance Metrics:
      • Overall Performance: Good
      • Efficiency Rating: High
      • Reliability Score: High
      • Performance Indicators: 4 items
        - Process Type: distillation
        - Success Status: True
        - Duration Seconds: 0.0003
        - Timestamp: 2025-06-18 00:06:48
      • Conversion: 88.900
      • Selectivity: 97.600
      • Yield: 89.800
      • Recovery Rate: 86.8%
   📋 Recommendations: 1 items
      1. Simulation completed successfully - results are ready for analysis
   📋 Optimization Opportunities: 1 items
      1. Consider heat integration for energy efficiency
\n✅ Advanced integration 

In [24]:
# SECTION 3.2: LLM Report Generation and Financial Analysis
# =========================================================

print("💰 Advanced Financial Analysis and LLM Report Generation...")

try:
    if 'integrated_results' in locals() and integrated_results:
        
        # Generate LLM reports for all simulations
        print(f"📄 Generating LLM reports for {len(integrated_results)} simulations...")
        
        llm_report_files = []
        for i, result in enumerate(integrated_results):
            try:
                report_file = llm_generator.export_llm_ready_text(result)
                llm_report_files.append(report_file)
                print(f"   ✅ Report {i+1}: {Path(report_file).name}")
            except Exception as e:
                print(f"   ❌ Report {i+1} failed: {e}")
        
        # Generate comprehensive financial analysis
        financial_file = llm_generator.export_financial_analysis(integrated_results)
        metrics = llm_generator._calculate_key_metrics(integrated_results)
        
        print(f"\\n💰 Comprehensive Financial Metrics:")
        print(f"   • Average Recovery Rate: {metrics['avg_recovery']:.1f}%")
        print(f"   • Estimated Daily Revenue: ${metrics['estimated_revenue']:,.2f}")
        print(f"   • Estimated Daily Profit: ${metrics['net_profit']:,.2f}")
        print(f"   • Return on Investment: {metrics['roi']:.1f}%")
        print(f"   • Financial Analysis File: {financial_file}")
        
        # Performance summary
        print(f"\\n📊 Performance Summary:")
        good_performance = sum(1 for r in integrated_results 
                             if r['performance_metrics']['overall_performance'] == 'Good')
        print(f"   • High Performance Simulations: {good_performance}/{len(integrated_results)}")
        
        avg_efficiency = sum(1 for r in integrated_results 
                           if r['performance_metrics']['efficiency_rating'] == 'High') / len(integrated_results)
        print(f"   • High Efficiency Rate: {avg_efficiency:.1%}")
        
        print(f"\\n📄 Generated Files:")
        print(f"   • LLM Reports: {len(llm_report_files)} files")
        print(f"   • Financial Analysis: 1 file")
        
    else:
        print("⚠️ No integrated results available. Run Section 3.1 first.")
        
except Exception as e:
    print(f"❌ LLM/Financial analysis error: {e}")
    import traceback
    traceback.print_exc()

print("\\n✅ Advanced analysis and reporting complete!")


💰 Advanced Financial Analysis and LLM Report Generation...
📄 Generating LLM reports for 5 simulations...
   ✅ Report 1: distillation_ethanol_water_summary.md
   ✅ Report 2: reactor_methane_combustion_summary.md
   ✅ Report 3: heat_exchanger_steam_summary.md
   ✅ Report 4: absorber_co2_capture_summary.md
   ✅ Report 5: crystallizer_salt_summary.md
\n💰 Comprehensive Financial Metrics:
   • Average Recovery Rate: 83.9%
   • Estimated Daily Revenue: $151,056.00
   • Estimated Daily Profit: $61,056.00
   • Return on Investment: 6.8%
   • Financial Analysis File: data/05_output/llm_reports/financial_analysis_20250618_000819.csv
\n📊 Performance Summary:
   • High Performance Simulations: 2/5
   • High Efficiency Rate: 40.0%
\n📄 Generated Files:
   • LLM Reports: 5 files
   • Financial Analysis: 1 file
\n✅ Advanced analysis and reporting complete!


In [25]:
# SECTION 4.1: LLM Model Testing and Initialization
# =================================================

print("🤖 LLM Development Environment Initialization...")

try:
    # Initialize LLM components
    llm_runner = LLMRunner()
    llm_query_manager = LLMQueryManager(max_tokens=2048)
    
    # Test LLM functionality
    print(f"✅ LLM Runner initialized")
    print(f"✅ LLM Query Manager initialized")
    print(f"   • Template directory: {llm_query_manager.template_dir}")
    print(f"   • Template exists: {llm_query_manager.template_dir.exists()}")
    
    # Get model information
    model_info = llm_runner.get_model_info()
    print(f"\\n🔧 Model Information:")
    print(f"   • Model ID: {model_info['model_id']}")
    print(f"   • Vocabulary Size: {model_info['vocab_size']:,}")
    print(f"   • Device: {model_info['device']}")
    
    # Test basic prompt rendering
    test_prompt = llm_query_manager.render_prompt(
        user_query="Test chemical process optimization query",
        system_message="You are a chemical engineering expert."
    )
    
    print(f"\\n📋 Prompt System Test:")
    print(f"   • Template rendering: ✅ Success")
    print(f"   • Prompt length: {len(test_prompt)} characters")
    
except Exception as e:
    print(f"❌ LLM initialization error: {e}")
    import traceback
    traceback.print_exc()

print("\\n✅ LLM development environment ready!")


🤖 LLM Development Environment Initialization...
Loading tokenizer for gpt2...
Loading model gpt2 on cpu...
Model loaded successfully on cpu
Loading tokenizer for gpt2...
Loading model gpt2 on cpu...
Model loaded successfully on cpu
✅ LLM Runner initialized
✅ LLM Query Manager initialized
   • Template directory: /Users/mohammadalmusaiteer/PyNucleus-Model/prompts
   • Template exists: True
\n🔧 Model Information:
   • Model ID: gpt2
   • Vocabulary Size: 50,257
   • Device: cpu
\n📋 Prompt System Test:
   • Template rendering: ✅ Success
   • Prompt length: 289 characters
\n✅ LLM development environment ready!


In [26]:
# SECTION 4.2: Advanced Prompt Engineering and Testing
# ====================================================

print("🎯 Advanced Prompt Engineering and Testing...")

try:
    # Test different prompt scenarios
    test_scenarios = [
        {
            "name": "Process Optimization",
            "query": "How can we optimize the distillation column efficiency?",
            "system": "You are a process optimization expert specializing in distillation systems."
        },
        {
            "name": "Safety Analysis", 
            "query": "What safety considerations are important for modular chemical plants?",
            "system": "You are a chemical safety engineer with expertise in process hazard analysis."
        },
        {
            "name": "Economic Assessment",
            "query": "Analyze the economic benefits of modular plant design.",
            "system": "You are a chemical engineering economist specializing in plant design economics."
        }
    ]
    
    print(f"🧪 Testing {len(test_scenarios)} prompt scenarios...")
    
    for i, scenario in enumerate(test_scenarios):
        print(f"\\n📋 Scenario {i+1}: {scenario['name']}")
        
        try:
            # Render prompt
            prompt = llm_query_manager.render_prompt(
                user_query=scenario['query'],
                system_message=scenario['system']
            )
            
            print(f"   ✅ Prompt rendered successfully ({len(prompt)} chars)")
            
            # Quick test with LLM (generate a short response)
            response = llm_runner.ask(
                scenario['query'],
                max_length=50,
                temperature=0.7,
                do_sample=True
            )
            
            print(f"   ✅ LLM response generated ({len(response)} chars)")
            print(f"   📝 Preview: {response[:100]}...")
            
        except Exception as e:
            print(f"   ❌ Scenario {i+1} failed: {e}")
    
    # Test prompt system validation
    print(f"\\n🔍 Running Prompt System Validation...")
    try:
        # Import prompt system if available
        from src.pynucleus.llm.prompt_system import PyNucleusPromptSystem
        
        prompt_system = PyNucleusPromptSystem(template_dir="prompts")
        validation_result = prompt_system.validate_prompts()
        
        print("✅ Prompt system validation completed")
        
    except ImportError:
        print("⚠️ Prompt system module not available for validation")
    except Exception as e:
        print(f"⚠️ Prompt validation error: {e}")
    
except Exception as e:
    print(f"❌ Prompt engineering error: {e}")
    import traceback
    traceback.print_exc()

print("\\n✅ Prompt engineering and testing complete!")


🎯 Advanced Prompt Engineering and Testing...
🧪 Testing 3 prompt scenarios...
\n📋 Scenario 1: Process Optimization
   ✅ Prompt rendered successfully (341 chars)
   ✅ LLM response generated (153 chars)
   📝 Preview: In the Distillation column, there are two ways to do it:

Use a distillation column for the same amo...
\n📋 Scenario 2: Safety Analysis
   ✅ Prompt rendered successfully (357 chars)
   ✅ LLM response generated (194 chars)
   📝 Preview: A: Safety concerns are the most important. The safety of a chemical plant is the most important thin...
\n📋 Scenario 3: Economic Assessment
   ✅ Prompt rendered successfully (345 chars)
   ✅ LLM response generated (222 chars)
   📝 Preview: The modular plant design provides a high-efficiency, high-volume manufacturing capability that enabl...
\n🔍 Running Prompt System Validation...
⚠️ Prompt system module not available for validation
\n✅ Prompt engineering and testing complete!


In [27]:
# SECTION 5.1: Performance Analysis and Benchmarking
# ==================================================

print("📈 Performance Analysis and System Benchmarking...")

import time
import psutil
import gc

def measure_performance(func, name, *args, **kwargs):
    """Measure function performance"""
    gc.collect()  # Clean memory before measurement
    
    start_time = time.time()
    start_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
    
    try:
        result = func(*args, **kwargs)
        success = True
        error = None
    except Exception as e:
        result = None
        success = False
        error = str(e)
    
    end_time = time.time()
    end_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
    
    return {
        'name': name,
        'duration': end_time - start_time,
        'memory_used': end_memory - start_memory,
        'success': success,
        'error': error,
        'result': result
    }

# Performance benchmarks
benchmarks = []

print("🧪 Running Performance Benchmarks...")

# Benchmark 1: Pipeline initialization
bench1 = measure_performance(
    lambda: PipelineUtils(results_dir="data/05_output/results"),
    "Pipeline Initialization"
)
benchmarks.append(bench1)

# Benchmark 2: Configuration template creation
bench2 = measure_performance(
    lambda: config_manager.create_template_json("perf_test.json"),
    "Configuration Template Creation"
)
benchmarks.append(bench2)

# Benchmark 3: Quick status check
bench3 = measure_performance(
    pipeline.quick_test,
    "Quick Status Check"
)
benchmarks.append(bench3)

# Display results
print(f"\n📊 Performance Benchmark Results:")
print("-" * 60)
for bench in benchmarks:
    status = "✅" if bench['success'] else "❌"
    print(f"{status} {bench['name']:<30} {bench['duration']:>8.3f}s {bench['memory_used']:>8.1f}MB")
    if not bench['success']:
        print(f"   Error: {bench['error']}")

# System resource usage
print(f"\n💻 Current System Resources:")
print(f"   • CPU Usage: {psutil.cpu_percent():.1f}%")
print(f"   • Memory Usage: {psutil.virtual_memory().percent:.1f}%")
print(f"   • Available Memory: {psutil.virtual_memory().available / 1024 / 1024 / 1024:.1f} GB")

print("\n✅ Performance analysis complete!")


📈 Performance Analysis and System Benchmarking...
🧪 Running Performance Benchmarks...
🔧 Setting up RAG imports...
✅ RAG imports ready!
📂 Loaded 5 existing DWSIM results from disk
🔧 Setting up DWSIM imports...
✅ DWSIM modules imported successfully
📁 Results directory: data/05_output/results
🔧 Pipeline Utils initialized with results dir: data/05_output/results
🔗 DWSIM-RAG integration enabled
🧪 Quick Pipeline Test
------------------------------
📚 RAG: 7141 chunks available
🔗 Integration: ⚪ Documents only
📊 DWSIM Statistics:
   • Total Simulations: 5
   • Success Rate: 100.0%
   • Average Duration: 0.00s
🔬 DWSIM: 5 simulations
📁 Output: 2 CSV files

📊 Performance Benchmark Results:
------------------------------------------------------------
✅ Pipeline Initialization           0.016s      4.1MB
✅ Configuration Template Creation    0.000s      0.0MB
✅ Quick Status Check                0.001s      0.0MB

💻 Current System Resources:
   • CPU Usage: 34.2%
   • Memory Usage: 83.3%
   • Availabl

In [28]:
# SECTION 5.2: Debug Tools and System Cleanup
# ============================================

print("🔧 Debug Tools and System Maintenance...")

# System cleanup functions
def cleanup_temp_files():
    """Remove temporary files"""
    temp_patterns = ["perf_test.json", "dev_simulation_config.*"]
    cleaned = 0
    
    for pattern in temp_patterns:
        if "*" in pattern:
            import glob
            files = glob.glob(pattern)
            for file in files:
                try:
                    Path(file).unlink()
                    cleaned += 1
                except:
                    pass
        else:
            try:
                if Path(pattern).exists():
                    Path(pattern).unlink()
                    cleaned += 1
            except:
                pass
    
    return cleaned

def check_log_files():
    """Check system log files"""
    log_dirs = ["logs", "data/05_output/logs"]
    log_files = []
    
    for log_dir in log_dirs:
        if Path(log_dir).exists():
            for log_file in Path(log_dir).glob("*.log"):
                size = log_file.stat().st_size
                log_files.append({
                    'file': str(log_file),
                    'size': size,
                    'age': time.time() - log_file.stat().st_mtime
                })
    
    return log_files

# Run debug tools
print("🗑️ Running System Cleanup...")
cleaned_files = cleanup_temp_files()
print(f"   • Cleaned {cleaned_files} temporary files")

print("\n📋 Checking Log Files...")
log_files = check_log_files()
if log_files:
    print(f"   • Found {len(log_files)} log files:")
    for log in log_files[-5:]:  # Show last 5
        age_hours = log['age'] / 3600
        print(f"     - {Path(log['file']).name} ({log['size']} bytes, {age_hours:.1f}h old)")
else:
    print("   • No log files found")

# Memory cleanup
print("\n💾 Memory Cleanup...")
gc.collect()
print("   • Garbage collection completed")

# Final status
print(f"\n📊 Development Session Summary:")
print(f"   • Session Duration: {(datetime.now() - start_time).total_seconds():.1f} seconds")
print(f"   • Benchmarks Run: {len(benchmarks)}")
print(f"   • Components Tested: {'✅' if all(b['success'] for b in benchmarks) else '⚠️'}")

print("\n✅ Debug tools and cleanup complete!")


🔧 Debug Tools and System Maintenance...
🗑️ Running System Cleanup...
   • Cleaned 0 temporary files

📋 Checking Log Files...
   • Found 14 log files:
     - ingestion.log (51304 bytes, 0.0h old)
     - system_diagnostic_20250617_220244.log (13404 bytes, 2.1h old)
     - system_diagnostic_20250617_232907.log (16884 bytes, 0.7h old)
     - system_diagnostic_20250617_220903.log (13408 bytes, 2.0h old)
     - system_diagnostic_20250617_215819.log (13404 bytes, 2.2h old)

💾 Memory Cleanup...
   • Garbage collection completed

📊 Development Session Summary:
   • Session Duration: 95.2 seconds
   • Benchmarks Run: 3
   • Components Tested: ✅

✅ Debug tools and cleanup complete!


In [29]:
# SECTION 6.1: Version Control and Documentation
# ===============================================
# Uncomment and run these cells for version control operations

# from datetime import datetime
# import subprocess

# def update_github():
#     """Update GitHub repository with changes"""
#     print("📦 Starting GitHub update...")
#     
#     try:
#         # Add all changes
#         subprocess.run(["git", "add", "."], check=True)
#         print("   ✅ Files added to staging")
#         
#         # Commit with timestamp
#         commit_msg = f"Developer update: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
#         subprocess.run(["git", "commit", "-m", commit_msg], check=True)
#         print("   ✅ Changes committed")
#         
#         # Push to origin
#         subprocess.run(["git", "push", "origin", "main"], check=True)
#         print("   ✅ Changes pushed to GitHub")
#         
#         return True
#         
#     except subprocess.CalledProcessError as e:
#         print(f"   ❌ Git operation failed: {e}")
#         return False

# def log_development_session():
#     """Log this development session"""
#     log_file = "update_log.txt"
#     
#     with open(log_file, "a") as f:
#         timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
#         f.write(f"\n{timestamp}: Developer session - System testing and validation\n")
#     
#     print(f"✅ Development session logged to {log_file}")

# # Uncomment to run version control operations:
# # log_development_session()
# # update_github()

print("🔧 Version control tools ready (uncomment to use)")
print("💡 Manual operations available:")


🔧 Version control tools ready (uncomment to use)
💡 Manual operations available:
