In [1]:
# SECTION 1: System Initialization & Health Checks
# ================================================

import sys
import os
from pathlib import Path
import importlib
from datetime import datetime

print("üîß PyNucleus Developer Environment - Advanced Initialization...")
print(f"üìÖ Session started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Add src to Python path
src_path = str(Path().resolve() / 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import PyNucleus components with enhanced error handling
try:
    # Core pipeline components (aligned with Capstone Project)
    from pynucleus.pipeline import PipelineUtils
    from pynucleus.integration.llm_output_generator import LLMOutputGenerator
    
    # Advanced developer components
    from pynucleus.integration.config_manager import ConfigManager
    from pynucleus.integration.dwsim_rag_integrator import DWSIMRAGIntegrator
    from pynucleus.llm import LLMRunner
    from pynucleus.llm.query_llm import LLMQueryManager
    
    print("‚úÖ All PyNucleus modules imported successfully")
    
    # Initialize core components (matching Capstone Project pattern)
    pipeline = PipelineUtils(results_dir="data/05_output/results")
    llm_generator = LLMOutputGenerator(results_dir="data/05_output/reports")
    
    # Initialize advanced developer components
    config_manager = ConfigManager(config_dir="configs")
    dwsim_rag_integrator = DWSIMRAGIntegrator(results_dir="data/05_output/results")
    
    print("‚úÖ Core components initialized")
    print("‚úÖ Advanced components initialized")
    print("üéØ Developer environment ready!")
    
    # Quick system health check
    try:
        quick_status = pipeline.quick_test()
        print(f"üìä System Status: {quick_status['csv_files_count']} files in results directory")
    except Exception as status_error:
        print(f"‚ö†Ô∏è Status check warning: {status_error}")
    
except ImportError as e:
    print(f"‚ùå Import Error: {e}")
    print("üí° Please ensure you're in the PyNucleus-Model directory and all dependencies are installed")
except Exception as e:
    print(f"‚ùå Initialization error: {e}")
    import traceback
    traceback.print_exc()
    print("üí° Check system setup and try restarting the kernel")

üîß PyNucleus Developer Environment - Advanced Initialization...
üìÖ Session started: 2025-06-18 23:27:34
‚úÖ All PyNucleus modules imported successfully


  from .autonotebook import tqdm as notebook_tqdm
  embeddings = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')


‚úÖ Core components initialized
‚úÖ Advanced components initialized
üéØ Developer environment ready!
üìä System Status: 1 files in results directory


In [2]:
# SECTION 1.2: Advanced System Diagnostic & Health Check
# ======================================================

print("üîç Running Advanced System Diagnostic...")

# Enhanced system diagnostic with better error handling
def run_system_diagnostic():
    """Run comprehensive system diagnostic with fallback options"""
    try:
        import subprocess
        result = subprocess.run([
            sys.executable, "scripts/comprehensive_system_diagnostic.py", "--quiet"
        ], capture_output=True, text=True, cwd=".", timeout=30)
        
        if result.returncode == 0:
            print("‚úÖ System diagnostic completed successfully")
            lines = result.stdout.strip().split('\n')
            
            # Extract and display key health metrics
            health_metrics = []
            for line in lines:
                if any(keyword in line for keyword in ['Health:', 'Status:', 'EXCELLENT', 'GOOD', 'passed', 'Score:']):
                    health_metrics.append(line.strip())
            
            if health_metrics:
                print("üìä System Health Metrics:")
                for metric in health_metrics[-5:]:  # Show last 5 relevant metrics
                    print(f"   {metric}")
            
            return True
            
        else:
            print("‚ö†Ô∏è System diagnostic detected issues:")
            if result.stderr:
                print(f"   Error: {result.stderr.strip()}")
            return False
            
    except subprocess.TimeoutExpired:
        print("‚ö†Ô∏è System diagnostic timed out (taking longer than expected)")
        return False
    except FileNotFoundError:
        print("‚ö†Ô∏è System diagnostic script not found")
        return False
    except Exception as e:
        print(f"‚ö†Ô∏è System diagnostic error: {e}")
        return False

diagnostic_success = run_system_diagnostic()

# Enhanced manual system checks
print(f"\nüîç Manual System Health Checks:")

# Check critical data directories
data_dirs = {
    'data/01_raw': 'Raw data storage',
    'data/02_processed': 'Processed data cache', 
    'data/03_intermediate': 'Intermediate results',
    'data/04_models': 'Model artifacts',
    'data/05_output': 'Pipeline outputs'
}

for dir_path, description in data_dirs.items():
    exists = Path(dir_path).exists()
    if exists:
        file_count = len(list(Path(dir_path).rglob("*")))
        print(f"   ‚úÖ {dir_path} ({file_count} items) - {description}")
    else:
        print(f"   ‚ùå {dir_path} - Missing! {description}")

# Check source code structure
src_dirs = {
    'src/pynucleus/pipeline': 'Pipeline components',
    'src/pynucleus/rag': 'RAG system',
    'src/pynucleus/integration': 'Integration modules',
    'src/pynucleus/llm': 'LLM components'
}

print(f"\nüèóÔ∏è Source Code Structure:")
for dir_path, description in src_dirs.items():
    exists = Path(dir_path).exists()
    if exists:
        py_files = len(list(Path(dir_path).glob("*.py")))
        print(f"   ‚úÖ {dir_path} ({py_files} Python files) - {description}")
    else:
        print(f"   ‚ùå {dir_path} - Missing! {description}")

# System environment check
print(f"\nüêç Python Environment:")
print(f"   ‚Ä¢ Python Version: {sys.version.split()[0]}")
print(f"   ‚Ä¢ Working Directory: {Path.cwd()}")
print(f"   ‚Ä¢ PyNucleus Path: {'‚úÖ Found' if src_path in sys.path else '‚ùå Not in path'}")

print(f"\nüéØ {'‚úÖ System diagnostic complete - All checks passed!' if diagnostic_success else '‚ö†Ô∏è System diagnostic complete - Some issues detected'}")


üîç Running Advanced System Diagnostic...
‚úÖ System diagnostic completed successfully

üîç Manual System Health Checks:
   ‚úÖ data/01_raw (16 items) - Raw data storage
   ‚úÖ data/02_processed (20 items) - Processed data cache
   ‚úÖ data/03_intermediate (4 items) - Intermediate results
   ‚úÖ data/04_models (26 items) - Model artifacts
   ‚úÖ data/05_output (150 items) - Pipeline outputs

üèóÔ∏è Source Code Structure:
   ‚úÖ src/pynucleus/pipeline (5 Python files) - Pipeline components
   ‚úÖ src/pynucleus/rag (6 Python files) - RAG system
   ‚úÖ src/pynucleus/integration (4 Python files) - Integration modules
   ‚úÖ src/pynucleus/llm (10 Python files) - LLM components

üêç Python Environment:
   ‚Ä¢ Python Version: 3.13.1
   ‚Ä¢ Working Directory: /Users/mohammadalmusaiteer/PyNucleus-Model
   ‚Ä¢ PyNucleus Path: ‚úÖ Found

üéØ ‚úÖ System diagnostic complete - All checks passed!


In [3]:
# SECTION 1.3: Pipeline Status and Health Check
# =============================================

print("üìä Detailed Pipeline Status Check...")

try:
    # Quick test with validation (this method exists)
    test_results = pipeline.quick_test()
    
    print("\n" + "="*50)
    
    # Validate test_results
    if test_results is None:
        print("‚ùå Quick test returned None - pipeline may not be properly initialized")
        test_results = {
            'results_dir': 'data/05_output/results',
            'csv_files_count': 0,
            'csv_files': []
        }
    
    # Safely access results with fallbacks
    results_dir = test_results.get('results_dir', 'data/05_output/results')
    csv_files_count = test_results.get('csv_files_count', 0)
    csv_files = test_results.get('csv_files', [])
    
    print(f"üìÅ Results Directory: {results_dir}")
    print(f"üìÑ CSV Files: {csv_files_count}")
    
    if csv_files_count > 0:
        print("\nüìã Existing Files:")
        for file_info in csv_files:
            if isinstance(file_info, dict):
                name = file_info.get('name', 'Unknown')
                size = file_info.get('size', 0)
                print(f"   ‚Ä¢ {name} ({size} bytes)")
            else:
                print(f"   ‚Ä¢ {file_info}")
    
    # Component health check
    print(f"\nüîß Component Health:")
    print(f"   ‚Ä¢ Pipeline Utils: {'‚úÖ' if pipeline else '‚ùå'}")
    print(f"   ‚Ä¢ RAG Pipeline: {'‚úÖ' if hasattr(pipeline, 'rag_pipeline') and pipeline.rag_pipeline else '‚ùå'}")
    print(f"   ‚Ä¢ DWSIM Pipeline: {'‚úÖ' if hasattr(pipeline, 'dwsim_pipeline') and pipeline.dwsim_pipeline else '‚ùå'}")
    print(f"   ‚Ä¢ Config Manager: {'‚úÖ' if 'config_manager' in locals() and config_manager else '‚ùå'}")
    print(f"   ‚Ä¢ DWSIM-RAG Integrator: {'‚úÖ' if 'dwsim_rag_integrator' in locals() and dwsim_rag_integrator else '‚ùå'}")
    print(f"   ‚Ä¢ LLM Generator: {'‚úÖ' if 'llm_generator' in locals() and llm_generator else '‚ùå'}")
    
    # Additional pipeline status info
    component_status = test_results.get('component_status', {})
    print(f"\nüîç Pipeline Component Status:")
    print(f"   ‚Ä¢ RAG Pipeline: {'‚úÖ' if component_status.get('rag_pipeline', False) else '‚ùå'}")
    print(f"   ‚Ä¢ DWSIM Pipeline: {'‚úÖ' if component_status.get('dwsim_pipeline', False) else '‚ùå'}")
    print(f"   ‚Ä¢ Results Exporter: {'‚úÖ' if component_status.get('exporter', False) else '‚ùå'}")
    
    # Integration status
    integration_enabled = test_results.get('integration_enabled', False)
    rag_chunks = test_results.get('rag_chunks', 0)
    simulation_chunks = test_results.get('simulation_chunks', 0)
    
    print(f"\nüîó Integration Status:")
    print(f"   ‚Ä¢ Total RAG Chunks: {rag_chunks:,}")
    print(f"   ‚Ä¢ Simulation Chunks: {simulation_chunks:,}")
    print(f"   ‚Ä¢ Integration Active: {'‚úÖ' if integration_enabled else '‚ö™'}")
    
    print("\n‚úÖ Status check complete!")
    
except Exception as e:
    print(f"‚ùå Status check error: {e}")
    import traceback
    traceback.print_exc()
    
    # Provide troubleshooting tips
    print("\nüîß Troubleshooting Tips:")
    print("   1. Try restarting the notebook kernel")
    print("   2. Re-run Cell 1 to reinitialize components")
    print("   3. Check if all required directories exist")
    print("   4. Verify PyNucleus installation is complete")

üìä Detailed Pipeline Status Check...

üìÅ Results Directory: data/05_output/results
üìÑ CSV Files: 1

üìã Existing Files:
   ‚Ä¢ dev_simulation_config.csv (171 bytes)

üîß Component Health:
   ‚Ä¢ Pipeline Utils: ‚úÖ
   ‚Ä¢ RAG Pipeline: ‚úÖ
   ‚Ä¢ DWSIM Pipeline: ‚úÖ
   ‚Ä¢ Config Manager: ‚úÖ
   ‚Ä¢ DWSIM-RAG Integrator: ‚úÖ
   ‚Ä¢ LLM Generator: ‚úÖ

üîç Pipeline Component Status:
   ‚Ä¢ RAG Pipeline: ‚ùå
   ‚Ä¢ DWSIM Pipeline: ‚ùå
   ‚Ä¢ Results Exporter: ‚ùå

üîó Integration Status:
   ‚Ä¢ Total RAG Chunks: 0
   ‚Ä¢ Simulation Chunks: 0
   ‚Ä¢ Integration Active: ‚ö™

‚úÖ Status check complete!


In [4]:
# SECTION 2.1: Configuration Templates and Management
# ===================================================

print("üîß Advanced Configuration Management...")

# Create configuration templates
try:
    # List existing configuration files
    existing_configs = config_manager.list_configs()
    print(f"üìã Existing Configuration Files: {len(existing_configs)}")
    for config_file in existing_configs:
        print(f"   ‚Ä¢ {config_file}")
    
    # Create a sample configuration template
    sample_config = {
        "simulations": [
            {
                "case_name": "dev_test_case_1",
                "temperature": 350.0,
                "pressure": 2.5,
                "feed_rate": 100.0,
                "catalyst_type": "Pt/Al2O3",
                "process_type": "distillation"
            },
            {
                "case_name": "dev_test_case_2", 
                "temperature": 375.0,
                "pressure": 3.0,
                "feed_rate": 120.0,
                "catalyst_type": "Pd/C",
                "process_type": "reaction"
            }
        ],
        "metadata": {
            "created_by": "Developer_Notebook",
            "version": "1.0",
            "description": "Development configuration template"
        }
    }
    
    # Save JSON template
    json_template_path = config_manager.save(sample_config, "dev_simulation_config.json")
    print(f"‚úÖ JSON template created: {json_template_path}")
    
    # Save CSV template (will extract simulations data)
    csv_template_path = config_manager.save(sample_config, "dev_simulation_config.csv")
    print(f"‚úÖ CSV template created: {csv_template_path}")
    
    # Show template contents (first few lines)
    if json_template_path.exists():
        with open(json_template_path, 'r') as f:
            content = f.read()[:300]
            print(f"\nüìã JSON Template Preview:")
            print(content + "..." if len(content) >= 300 else content)
    
    # Test loading the configuration back
    loaded_config = config_manager.load("dev_simulation_config.json")
    print(f"\nüîÑ Template validation: {len(loaded_config['simulations'])} simulations loaded")
    
except Exception as e:
    print(f"‚ùå Configuration error: {e}")
    import traceback
    traceback.print_exc()

print("\n‚úÖ Configuration management ready!")

üîß Advanced Configuration Management...
üìã Existing Configuration Files: 3
   ‚Ä¢ dev_simulation_config.csv
   ‚Ä¢ dev_simulation_config.json
   ‚Ä¢ perf_test.json
‚úÖ JSON template created: configs/dev_simulation_config.json
‚úÖ CSV template created: configs/dev_simulation_config.csv

üìã JSON Template Preview:
{
  "simulations": [
    {
      "case_name": "dev_test_case_1",
      "temperature": 350.0,
      "pressure": 2.5,
      "feed_rate": 100.0,
      "catalyst_type": "Pt/Al2O3",
      "process_type": "distillation"
    },
    {
      "case_name": "dev_test_case_2",
      "temperature": 375.0,
      "...

üîÑ Template validation: 2 simulations loaded

‚úÖ Configuration management ready!


In [5]:
# SECTION 2.2: Run Enhanced Pipeline with Advanced Analytics
# ==========================================================

print("üöÄ Running Enhanced Pipeline for Advanced Development Testing...")
print("\nüìä This enhanced run includes:")
print("   ‚Ä¢ Document processing with real FAISS vector store")
print("   ‚Ä¢ DWSIM chemical process simulations")
print("   ‚Ä¢ Advanced integration and analysis")
print("   ‚Ä¢ Performance metrics and validation")
print("\n‚è≥ Please wait... Enhanced analysis may take 30-60 seconds.\n")

# Enhanced pipeline execution with comprehensive error handling
try:
    start_time = datetime.now()
    
    # Execute core pipeline (matching Capstone Project pattern)
    results = pipeline.run_complete_pipeline()
    
    if results:
        duration = (datetime.now() - start_time).total_seconds()
        print(f"\nüéâ Pipeline completed successfully in {duration:.1f} seconds!")
        
        # Enhanced results analysis
        print(f"\nüìä Detailed Results Analysis:")
        rag_count = len(results.get('rag_data', []))
        dwsim_count = len(results.get('dwsim_data', []))
        files_count = len(results.get('exported_files', []))
        
        print(f"   ‚Ä¢ RAG Document Queries: {rag_count}")
        print(f"   ‚Ä¢ DWSIM Simulations: {dwsim_count}")
        print(f"   ‚Ä¢ Export Files Generated: {files_count}")
        print(f"   ‚Ä¢ Total Processing Time: {results.get('duration', duration):.1f}s")
        
        # Advanced integration setup (developer feature)
        try:
            # Set up integrator with pipeline data for advanced analysis
            if hasattr(pipeline, 'rag_pipeline') and pipeline.rag_pipeline:
                dwsim_rag_integrator.rag_pipeline = pipeline.rag_pipeline
                print("\nüîó Advanced Integration:")
                print("   ‚Ä¢ RAG pipeline connected to integrator")
                print("   ‚Ä¢ Ready for enhanced analysis workflows")
            
            # Performance metrics
            if rag_count > 0 and dwsim_count > 0:
                processing_rate = (rag_count + dwsim_count) / duration
                print(f"   ‚Ä¢ Processing Rate: {processing_rate:.2f} items/second")
            
        except Exception as integration_error:
            print(f"\n‚ö†Ô∏è Integration setup warning: {integration_error}")
        
        print("\n‚úÖ Pipeline data ready for advanced developer analysis")
        
        # Store results for next sections
        global enhanced_pipeline_results
        enhanced_pipeline_results = results
        
    else:
        print("‚ùå Pipeline execution failed")
        print("üí° Check system initialization and try running Cell 1 again")
        
except Exception as e:
    print(f"‚ùå Enhanced pipeline error: {e}")
    import traceback
    traceback.print_exc()
    print("\nüí° Troubleshooting tips:")
    print("   ‚Ä¢ Ensure all components were initialized successfully in Cell 1")
    print("   ‚Ä¢ Check that data directories exist and are accessible")
    print("   ‚Ä¢ Try restarting the kernel and re-running all cells")

print("\n‚úÖ Enhanced pipeline testing complete!")


üöÄ Running Enhanced Pipeline for Advanced Development Testing...

üìä This enhanced run includes:
   ‚Ä¢ Document processing with real FAISS vector store
   ‚Ä¢ DWSIM chemical process simulations
   ‚Ä¢ Advanced integration and analysis
   ‚Ä¢ Performance metrics and validation

‚è≥ Please wait... Enhanced analysis may take 30-60 seconds.



Failed to export RAG results: Object of type float32 is not JSON serializable



üéâ Pipeline completed successfully in 14.3 seconds!

üìä Detailed Results Analysis:
   ‚Ä¢ RAG Document Queries: 3
   ‚Ä¢ DWSIM Simulations: 3
   ‚Ä¢ Export Files Generated: 1
   ‚Ä¢ Total Processing Time: 14.3s

üîó Advanced Integration:
   ‚Ä¢ RAG pipeline connected to integrator
   ‚Ä¢ Ready for enhanced analysis workflows
   ‚Ä¢ Processing Rate: 0.42 items/second

‚úÖ Pipeline data ready for advanced developer analysis

‚úÖ Enhanced pipeline testing complete!


In [6]:
# SECTION 3.1: DWSIM-RAG Integration and Enhanced Analysis
# ========================================================

print("üî¨ Advanced DWSIM-RAG Integration Analysis...")

try:
    # Get DWSIM results
    dwsim_results = pipeline.dwsim_pipeline.get_results()
    
    if dwsim_results:
        print(f"üìä Processing {len(dwsim_results)} DWSIM simulations...")
        
        # Perform enhanced integration
        integrated_results = dwsim_rag_integrator.integrate_simulation_results(
            dwsim_results, perform_rag_analysis=True
        )
        
        # Export integrated results
        integrated_export_file = dwsim_rag_integrator.export_integrated_results()
        
        print(f"‚úÖ Enhanced integration complete:")
        print(f"   ‚Ä¢ Integrated simulations: {len(integrated_results)}")
        print(f"   ‚Ä¢ Export file: {integrated_export_file}")
        
        # Show detailed analysis for first simulation
        if integrated_results:
            sample = integrated_results[0]
            print(f"\nüìã Sample Analysis (First Simulation):")
            
            # Safely access original simulation data
            original_sim = sample.get('original_simulation', {})
            print(f"   ‚Ä¢ Case: {original_sim.get('case_name', 'Unknown')}")
            
            # Performance metrics
            perf_metrics = sample.get('performance_metrics', {})
            if perf_metrics:
                print(f"   üìä Performance Metrics:")
                for key, value in perf_metrics.items():
                    display_key = key.replace('_', ' ').title()
                    if isinstance(value, (int, float)):
                        if 'rate' in key.lower() or 'percentage' in key.lower():
                            print(f"      ‚Ä¢ {display_key}: {value:.1f}%")
                        elif isinstance(value, float):
                            print(f"      ‚Ä¢ {display_key}: {value:.3f}")
                        else:
                            print(f"      ‚Ä¢ {display_key}: {value}")
                    else:
                        print(f"      ‚Ä¢ {display_key}: {value}")
            else:
                print(f"   ‚ö†Ô∏è No performance metrics available")
        
    else:
        print("‚ö†Ô∏è No DWSIM results available. Run Section 2.2 first.")
        
except Exception as e:
    print(f"‚ùå Integration error: {e}")
    import traceback
    traceback.print_exc()

print("\n‚úÖ Advanced integration analysis complete!")


üî¨ Advanced DWSIM-RAG Integration Analysis...
üìä Processing 3 DWSIM simulations...
‚úÖ Enhanced integration complete:
   ‚Ä¢ Integrated simulations: 3
   ‚Ä¢ Export file: data/05_output/results/integrated_results_20250618_232806.json

üìã Sample Analysis (First Simulation):
   ‚Ä¢ Case: distillation_ethanol_water
   üìä Performance Metrics:
      ‚Ä¢ Conversion: 0.850
      ‚Ä¢ Selectivity: 0.920
      ‚Ä¢ Yield: 0.782
      ‚Ä¢ Overall Performance: Good
      ‚Ä¢ Efficiency Rating: High
      ‚Ä¢ Temperature Rating: Suboptimal
      ‚Ä¢ Pressure Rating: Optimal

‚úÖ Advanced integration analysis complete!


In [7]:
# SECTION 3.2: LLM Report Generation and Financial Analysis
# =========================================================

print("üí∞ Advanced Financial Analysis and LLM Report Generation...")

try:
    if 'integrated_results' in locals() and integrated_results:
        
        # Generate LLM reports for all simulations
        print(f"üìÑ Generating LLM reports for {len(integrated_results)} simulations...")
        
        llm_report_files = []
        for i, result in enumerate(integrated_results):
            try:
                report_file = llm_generator.export_llm_ready_text(result)
                llm_report_files.append(report_file)
                print(f"   ‚úÖ Report {i+1}: {Path(report_file).name}")
            except Exception as e:
                print(f"   ‚ùå Report {i+1} failed: {e}")
        
        # Generate comprehensive financial analysis
        financial_file = llm_generator.export_financial_analysis(integrated_results)
        metrics = llm_generator._calculate_key_metrics(integrated_results)
        
        print(f"\nüí∞ Comprehensive Financial Metrics:")
        print(f"   ‚Ä¢ Average Recovery Rate: {metrics['avg_recovery']:.1f}%")
        print(f"   ‚Ä¢ Estimated Daily Revenue: ${metrics['estimated_revenue']:,.2f}")
        print(f"   ‚Ä¢ Estimated Daily Profit: ${metrics['net_profit']:,.2f}")
        print(f"   ‚Ä¢ Return on Investment: {metrics['roi']:.1f}%")
        print(f"   ‚Ä¢ Financial Analysis File: {financial_file}")
        
        print(f"\nüìÑ Generated Files:")
        print(f"   ‚Ä¢ LLM Reports: {len(llm_report_files)} files")
        print(f"   ‚Ä¢ Financial Analysis: 1 file")
        
    else:
        print("‚ö†Ô∏è No integrated results available. Run Section 3.1 first.")
        
except Exception as e:
    print(f"‚ùå LLM/Financial analysis error: {e}")
    import traceback
    traceback.print_exc()

print("\n‚úÖ Advanced analysis and reporting complete!")


üí∞ Advanced Financial Analysis and LLM Report Generation...
üìÑ Generating LLM reports for 3 simulations...
   ‚úÖ Report 1: llm_analysis_distillation_ethanol_water_20250618_232806.md
   ‚úÖ Report 2: llm_analysis_methanol_synthesis_20250618_232806.md
   ‚úÖ Report 3: llm_analysis_heat_exchanger_optimization_20250618_232806.md

üí∞ Comprehensive Financial Metrics:
   ‚Ä¢ Average Recovery Rate: 85.0%
   ‚Ä¢ Estimated Daily Revenue: $997,050.00
   ‚Ä¢ Estimated Daily Profit: $-2,950.00
   ‚Ä¢ Return on Investment: -43.2%
   ‚Ä¢ Financial Analysis File: data/05_output/reports/financial_analysis_20250618_232806.md

üìÑ Generated Files:
   ‚Ä¢ LLM Reports: 3 files
   ‚Ä¢ Financial Analysis: 1 file

‚úÖ Advanced analysis and reporting complete!


In [8]:
# SECTION 4.1: LLM Model Testing, Initialization & DSPy Integration
# ================================================================

print("ü§ñ LLM Development Environment & DSPy Workflow Initialization...")

try:
    # Initialize core LLM components
    llm_runner = LLMRunner()
    llm_query_manager = LLMQueryManager()
    
    print(f"‚úÖ LLM Runner initialized")
    print(f"‚úÖ LLM Query Manager initialized")
    
    # Try to initialize DSPy components if available
    dspy_available = False
    try:
        from pynucleus.llm.dspy_program import PyNucleusDSPyProgram
        from pynucleus.llm.answer_engine import AnswerEngine
        from pynucleus.llm.device_manager import DeviceManager
        
        # Initialize DSPy components
        device_manager = DeviceManager()
        answer_engine = AnswerEngine()
        
        print(f"‚úÖ DSPy Program Module available")
        print(f"‚úÖ Answer Engine initialized")
        print(f"‚úÖ Device Manager initialized")
        
        dspy_available = True
        
    except ImportError as dspy_error:
        print(f"‚ö†Ô∏è DSPy modules not available: {dspy_error}")
        dspy_available = False
    except Exception as dspy_error:
        print(f"‚ö†Ô∏è DSPy initialization warning: {dspy_error}")
        dspy_available = False
    
    # Get model information
    model_info = llm_runner.get_model_info()
    print(f"\nüîß Model Information:")
    print(f"   ‚Ä¢ Model ID: {model_info['model_id']}")
    
    # Display available model metrics
    for key, value in model_info.items():
        if key not in ['model_id', 'error']:
            if key == 'num_parameters' and isinstance(value, int):
                print(f"   ‚Ä¢ {key.replace('_', ' ').title()}: {value:,}")
            elif key == 'memory_footprint' and isinstance(value, int):
                print(f"   ‚Ä¢ Memory: {value / 1024 / 1024:.1f} MB")
            else:
                print(f"   ‚Ä¢ {key.replace('_', ' ').title()}: {value}")
    
    # Check for model errors
    if 'error' in model_info:
        print(f"   ‚ö†Ô∏è Model Info Error: {model_info['error']}")
    
    # Test prompt system
    test_prompt = llm_query_manager._create_general_query_prompt(
        question="Test chemical process optimization query",
        context="You are a chemical engineering expert."
    )
    
    print(f"\nüìã Prompt System Test:")
    print(f"   ‚Ä¢ Template rendering: ‚úÖ Success")
    print(f"   ‚Ä¢ Prompt length: {len(test_prompt)} characters")
    
    # Test tokenizer if available
    if hasattr(llm_runner, 'tokenizer') and llm_runner.tokenizer:
        vocab_size = len(llm_runner.tokenizer)
        print(f"   ‚Ä¢ Vocabulary Size: {vocab_size:,}")
    
    # DSPy workflow status
    print(f"\nüß† DSPy Workflow Status:")
    print(f"   ‚Ä¢ DSPy Available: {'‚úÖ' if dspy_available else '‚ùå'}")
    if dspy_available:
        print(f"   ‚Ä¢ Answer Engine: Ready for advanced query processing")
        try:
            device_info = device_manager.get_device_info()
            print(f"   ‚Ä¢ Device Manager: {device_info}")
        except:
            print(f"   ‚Ä¢ Device Manager: Initialized")
    else:
        print(f"   ‚Ä¢ Advanced DSPy workflows not available in this session")
    
except Exception as e:
    print(f"‚ùå LLM initialization error: {e}")
    import traceback
    traceback.print_exc()

print("\n‚úÖ LLM development environment ready!")

ü§ñ LLM Development Environment & DSPy Workflow Initialization...


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


‚úÖ LLM Runner initialized
‚úÖ LLM Query Manager initialized
‚ö†Ô∏è DSPy modules not available: cannot import name 'PyNucleusDSPyProgram' from 'pynucleus.llm.dspy_program' (/Users/mohammadalmusaiteer/PyNucleus-Model/src/pynucleus/llm/dspy_program.py)

üîß Model Information:
   ‚Ä¢ Model ID: Qwen/Qwen2.5-1.5B-Instruct
   ‚Ä¢ Device: cpu
   ‚Ä¢ Dtype: torch.float32
   ‚Ä¢ Num Parameters: 1,543,714,304
   ‚Ä¢ Memory: 5888.8 MB

üìã Prompt System Test:
   ‚Ä¢ Template rendering: ‚úÖ Success
   ‚Ä¢ Prompt length: 108 characters
   ‚Ä¢ Vocabulary Size: 151,665

üß† DSPy Workflow Status:
   ‚Ä¢ DSPy Available: ‚ùå
   ‚Ä¢ Advanced DSPy workflows not available in this session

‚úÖ LLM development environment ready!


In [9]:
# SECTION 4.2: Advanced Prompt Engineering and Testing
# ====================================================

print("üéØ Advanced Prompt Engineering and Testing...")

try:
    # Test different prompt scenarios
    test_scenarios = [
        {
            "name": "Process Optimization",
            "query": "How can we optimize the distillation column efficiency?",
            "system": "You are a process optimization expert specializing in distillation systems."
        },
        {
            "name": "Safety Analysis", 
            "query": "What safety considerations are important for modular chemical plants?",
            "system": "You are a chemical safety engineer with expertise in process hazard analysis."
        },
        {
            "name": "Economic Assessment",
            "query": "Analyze the economic benefits of modular plant design.",
            "system": "You are a chemical engineering economist specializing in plant design economics."
        }
    ]
    
    print(f"üß™ Testing {len(test_scenarios)} prompt scenarios...")
    
    for i, scenario in enumerate(test_scenarios):
        print(f"\nüìã Scenario {i+1}: {scenario['name']}")
        
        try:
            # Render prompt
            prompt = llm_query_manager._create_general_query_prompt(
                question=scenario['query'],
                context=scenario['system']
            )
            
            print(f"   ‚úÖ Prompt rendered successfully ({len(prompt)} chars)")
            
            # Quick test with LLM (generate a short response) - FIXED: use generate_response
            response_result = llm_runner.generate_response(
                prompt=scenario['query'],
                max_length=100,
                temperature=0.7,
                do_sample=True
            )
            
            # Extract the actual response text
            response_text = response_result.get('response', 'No response generated')
            print(f"   ‚úÖ LLM response generated ({len(response_text)} chars)")
            print(f"   üìù Preview: {response_text[:100]}...")
            
        except Exception as e:
            print(f"   ‚ùå Scenario {i+1} failed: {e}")
    
except Exception as e:
    print(f"‚ùå Prompt engineering error: {e}")
    import traceback
    traceback.print_exc()

print("\n‚úÖ Prompt engineering and testing complete!")

üéØ Advanced Prompt Engineering and Testing...
üß™ Testing 3 prompt scenarios...

üìã Scenario 1: Process Optimization
   ‚úÖ Prompt rendered successfully (160 chars)
   ‚úÖ LLM response generated (457 chars)
   üìù Preview: To optimize a distillate column's efficiency, you should focus on several key factors:

1. Column de...

üìã Scenario 2: Safety Analysis
   ‚úÖ Prompt rendered successfully (176 chars)
   ‚úÖ LLM response generated (521 chars)
   üìù Preview: 1. Proper Ventilation: All chemicals, gases and vapors should be ventilated to ensure that the conce...

üìã Scenario 3: Economic Assessment
   ‚úÖ Prompt rendered successfully (164 chars)
   ‚úÖ LLM response generated (535 chars)
   üìù Preview: Modular plant designs offer several significant economic advantages over traditional, site-specific ...

‚úÖ Prompt engineering and testing complete!


In [10]:
# SECTION 5.1: Performance Analysis and Benchmarking
# ==================================================

print("üìà Performance Analysis and System Benchmarking...")

import time
import psutil
import gc

def measure_performance(func, name, *args, **kwargs):
    """Measure function performance"""
    gc.collect()  # Clean memory before measurement
    
    start_time = time.time()
    start_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
    
    try:
        result = func(*args, **kwargs)
        success = True
        error = None
    except Exception as e:
        result = None
        success = False
        error = str(e)
    
    end_time = time.time()
    end_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
    
    return {
        'name': name,
        'duration': end_time - start_time,
        'memory_used': end_memory - start_memory,
        'success': success,
        'error': error,
        'result': result
    }

# Performance benchmarks
benchmarks = []

print("üß™ Running Performance Benchmarks...")

# Benchmark 1: Pipeline initialization
bench1 = measure_performance(
    lambda: PipelineUtils(results_dir="data/05_output/results"),
    "Pipeline Initialization"
)
benchmarks.append(bench1)

# Benchmark 2: Configuration template creation
bench2 = measure_performance(
    lambda: config_manager.create_template_json("perf_test.json"),
    "Configuration Template Creation"
)
benchmarks.append(bench2)

# Benchmark 3: Quick status check
bench3 = measure_performance(
    pipeline.quick_test,
    "Quick Status Check"
)
benchmarks.append(bench3)

# Display results
print(f"\nüìä Performance Benchmark Results:")
print("-" * 60)
for bench in benchmarks:
    status = "‚úÖ" if bench['success'] else "‚ùå"
    print(f"{status} {bench['name']:<30} {bench['duration']:>8.3f}s {bench['memory_used']:>8.1f}MB")
    if not bench['success']:
        print(f"   Error: {bench['error']}")

# System resource usage
print(f"\nüíª Current System Resources:")
print(f"   ‚Ä¢ CPU Usage: {psutil.cpu_percent():.1f}%")
print(f"   ‚Ä¢ Memory Usage: {psutil.virtual_memory().percent:.1f}%")
print(f"   ‚Ä¢ Available Memory: {psutil.virtual_memory().available / 1024 / 1024 / 1024:.1f} GB")

print("\n‚úÖ Performance analysis complete!")


üìà Performance Analysis and System Benchmarking...
üß™ Running Performance Benchmarks...

üìä Performance Benchmark Results:
------------------------------------------------------------
‚úÖ Pipeline Initialization           4.732s  -3282.0MB
‚úÖ Configuration Template Creation    0.001s      0.1MB
‚úÖ Quick Status Check                0.006s      1.2MB

üíª Current System Resources:
   ‚Ä¢ CPU Usage: 40.5%
   ‚Ä¢ Memory Usage: 84.2%
   ‚Ä¢ Available Memory: 2.5 GB

‚úÖ Performance analysis complete!


In [11]:
# SECTION 5.2: Debug Tools and System Cleanup
# ============================================

print("üîß Debug Tools and System Maintenance...")

# System cleanup functions
def cleanup_temp_files():
    """Remove temporary files"""
    temp_patterns = ["perf_test.json", "dev_simulation_config.*"]
    cleaned = 0
    
    for pattern in temp_patterns:
        if "*" in pattern:
            import glob
            files = glob.glob(pattern)
            for file in files:
                try:
                    Path(file).unlink()
                    cleaned += 1
                except:
                    pass
        else:
            try:
                if Path(pattern).exists():
                    Path(pattern).unlink()
                    cleaned += 1
            except:
                pass
    
    return cleaned

def check_log_files():
    """Check system log files"""
    log_dirs = ["logs", "data/05_output/logs"]
    log_files = []
    
    for log_dir in log_dirs:
        if Path(log_dir).exists():
            for log_file in Path(log_dir).glob("*.log"):
                size = log_file.stat().st_size
                log_files.append({
                    'file': str(log_file),
                    'size': size,
                    'age': time.time() - log_file.stat().st_mtime
                })
    
    return log_files

# Run debug tools
print("üóëÔ∏è Running System Cleanup...")
cleaned_files = cleanup_temp_files()
print(f"   ‚Ä¢ Cleaned {cleaned_files} temporary files")

print("\nüìã Checking Log Files...")
log_files = check_log_files()
if log_files:
    print(f"   ‚Ä¢ Found {len(log_files)} log files:")
    for log in log_files[-5:]:  # Show last 5
        age_hours = log['age'] / 3600
        print(f"     - {Path(log['file']).name} ({log['size']} bytes, {age_hours:.1f}h old)")
else:
    print("   ‚Ä¢ No log files found")

# Memory cleanup
print("\nüíæ Memory Cleanup...")
gc.collect()
print("   ‚Ä¢ Garbage collection completed")

# Final status
print(f"\nüìä Development Session Summary:")
if 'start_time' in locals():
    print(f"   ‚Ä¢ Session Duration: {(datetime.now() - start_time).total_seconds():.1f} seconds")
print(f"   ‚Ä¢ Benchmarks Run: {len(benchmarks)}")
print(f"   ‚Ä¢ Components Tested: {'‚úÖ' if all(b['success'] for b in benchmarks) else '‚ö†Ô∏è'}")

print("\n‚úÖ Debug tools and cleanup complete!")


üîß Debug Tools and System Maintenance...
üóëÔ∏è Running System Cleanup...
   ‚Ä¢ Cleaned 0 temporary files

üìã Checking Log Files...
   ‚Ä¢ Found 1 log files:
     - pynucleus_20250618_231539.log (2029 bytes, 0.2h old)

üíæ Memory Cleanup...
   ‚Ä¢ Garbage collection completed

üìä Development Session Summary:
   ‚Ä¢ Session Duration: 118.5 seconds
   ‚Ä¢ Benchmarks Run: 3
   ‚Ä¢ Components Tested: ‚úÖ

‚úÖ Debug tools and cleanup complete!
