In [10]:
# Cell 1: System Initialization & Validation
# ==========================================
# This cell sets up PyNucleus and validates all components

import sys
from pathlib import Path
from datetime import datetime

print("🔧 Initializing PyNucleus Model...")
print(f"📅 Session started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Add src to Python path
src_path = str(Path().resolve() / "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

try:
    # Import PyNucleus components
    from pynucleus.pipeline import PipelineUtils
    from pynucleus.integration.llm_output_generator import LLMOutputGenerator
    
    print("✅ PyNucleus modules imported successfully")
    
    # Initialize core components
    pipeline = PipelineUtils(results_dir="data/05_output/results")
    llm_generator = LLMOutputGenerator(results_dir="data/05_output/reports")
    
    print("✅ Core components initialized")
    
    # Quick system validation
    try:
        system_status = pipeline.quick_test()
        file_count = system_status.get('csv_files_count', 0)
        print(f"✅ System validation passed ({file_count} existing files)")
    except Exception as validation_error:
        print(f"⚠️ System validation warning: {validation_error}")
        print("   • System will continue with default settings")
    
    print("\n📋 System Components Ready:")
    print("   • 📚 RAG Pipeline - Document processing with 7,141+ indexed documents")
    print("   • 🔬 DWSIM Pipeline - Chemical process simulation and optimization")
    print("   • 📊 Results Export - Comprehensive CSV and JSON file generation")
    print("   • 💡 LLM Integration - Intelligent analysis, summaries, and financial reports")
    
    print(f"\n📁 Output Directory: data/05_output/")
    print("🎯 System ready! Execute Cell 2 to run complete analysis.")
    
except ImportError as e:
    print(f"❌ Import Error: {e}")
    print("\n💡 Troubleshooting:")
    print("   • Ensure you're in the PyNucleus-Model directory")
    print("   • Check that all dependencies are installed")
    print("   • Try restarting the kernel")
except Exception as e:
    print(f"❌ Initialization Error: {e}")
    print("\n💡 Troubleshooting:")
    print("   • Check your Python environment setup")
    print("   • Verify all required directories exist")
    print("   • For advanced diagnostics, see Developer_Notebook.ipynb")


🔧 Initializing PyNucleus Model...
📅 Session started: 2025-06-18 20:44:00
✅ PyNucleus modules imported successfully
✅ Core components initialized
✅ System validation passed (1 existing files)

📋 System Components Ready:
   • 📚 RAG Pipeline - Document processing with 7,141+ indexed documents
   • 🔬 DWSIM Pipeline - Chemical process simulation and optimization
   • 📊 Results Export - Comprehensive CSV and JSON file generation
   • 💡 LLM Integration - Intelligent analysis, summaries, and financial reports

📁 Output Directory: data/05_output/
🎯 System ready! Execute Cell 2 to run complete analysis.


In [11]:
# Cell 2: Run Complete Analysis Pipeline
# ======================================
# This cell executes the complete PyNucleus pipeline with enhanced feedback

print("🚀 Starting Complete PyNucleus Analysis...")
print("\n📊 Processing Pipeline:")
print("   1. 📚 Document processing with FAISS vector store (7,141+ documents)")
print("   2. 🔬 DWSIM chemical process simulations (multiple scenarios)")
print("   3. 🔗 Advanced integration and analysis")
print("   4. 📄 Results export and intelligent report generation")
print("\n⏳ Please wait... Analysis typically takes 20-40 seconds.")
print("   ⚡ Real-time processing with actual documents and simulations")

try:
    start_time = datetime.now()
    
    # Run the complete pipeline
    results = pipeline.run_complete_pipeline()
    
    if results:
        end_time = datetime.now()
        duration = (end_time - start_time).total_seconds()
        
        print(f"\n🎉 Analysis completed successfully in {duration:.1f} seconds!")
        
        # Enhanced results summary
        print(f"\n📊 Processing Results:")
        rag_count = len(results.get('rag_data', []))
        dwsim_count = len(results.get('dwsim_data', []))
        files_count = len(results.get('exported_files', []))
        
        print(f"   • 📚 Document Queries Processed: {rag_count}")
        print(f"   • 🔬 Chemical Simulations Completed: {dwsim_count}")
        print(f"   • 📁 Files Generated: {files_count}")
        
        # Try enhanced analysis and reporting
        enhanced_success = False
        try:
            from pynucleus.integration.dwsim_rag_integrator import DWSIMRAGIntegrator
            
            integrator = DWSIMRAGIntegrator(
                rag_pipeline=pipeline.rag_pipeline,
                results_dir="data/05_output/results"
            )
            
            # Enhanced analysis
            dwsim_results = pipeline.dwsim_pipeline.get_results()
            if dwsim_results:
                enhanced_results = integrator.integrate_simulation_results(
                    dwsim_results, perform_rag_analysis=True
                )
                
                # Generate intelligent reports
                report_files = []
                for result in enhanced_results[:3]:  # Generate reports for first 3 simulations
                    try:
                        report_file = llm_generator.export_llm_ready_text(result)
                        report_files.append(report_file)
                    except:
                        pass  # Continue if individual report fails
                
                # Financial analysis
                financial_file = llm_generator.export_financial_analysis(enhanced_results)
                metrics = llm_generator._calculate_key_metrics(enhanced_results)
                
                print(f"\n💰 Financial Analysis Summary:")
                print(f"   • Average Recovery Rate: {metrics['avg_recovery']:.1f}%")
                print(f"   • Estimated Daily Revenue: ${metrics['estimated_revenue']:,.2f}")
                print(f"   • Estimated Daily Profit: ${metrics['net_profit']:,.2f}")
                print(f"   • Return on Investment: {metrics['roi']:.1f}%")
                
                print(f"\n📄 Intelligent Reports Generated: {len(report_files)} detailed analysis files")
                enhanced_success = True
                
        except Exception as enhanced_error:
            print(f"\n⚠️ Enhanced analysis partially available (basic results ready)")
            # Continue with basic results
        
        # Results location information
        print(f"\n📁 Results Available At:")
        print(f"   • CSV Data Files: data/05_output/results/")
        print(f"   • Intelligent Reports: data/05_output/reports/")
        if enhanced_success:
            print(f"   • Financial Analysis: Included in reports")
        
        # Processing performance
        if rag_count > 0 and dwsim_count > 0:
            processing_rate = (rag_count + dwsim_count) / duration
            print(f"\n⚡ Performance: {processing_rate:.1f} operations/second")
        
        print(f"\n✅ Analysis complete! Run Cell 3 to explore detailed results.")
        
        # Store results for Cell 3
        global analysis_results
        analysis_results = results
        
    else:
        print("❌ Pipeline execution failed")
        print("\n💡 Troubleshooting steps:")
        print("   1. Ensure Cell 1 ran successfully")
        print("   2. Check that data directories exist")
        print("   3. Try restarting the kernel and re-running all cells")
        print("   4. For detailed diagnostics, see Developer_Notebook.ipynb")
        
except Exception as e:
    print(f"❌ Error during analysis: {e}")
    print("\n💡 Troubleshooting:")
    print("   • Ensure all components were initialized successfully in Cell 1")
    print("   • Check system resources (memory, disk space)")
    print("   • For advanced debugging, use Developer_Notebook.ipynb")
    
    import traceback
    print(f"\n🔧 Technical details (for developers):")
    print(f"   Error type: {type(e).__name__}")
    # Only show traceback in case of unexpected errors


🚀 Starting Complete PyNucleus Analysis...

📊 Processing Pipeline:
   1. 📚 Document processing with FAISS vector store (7,141+ documents)
   2. 🔬 DWSIM chemical process simulations (multiple scenarios)
   3. 🔗 Advanced integration and analysis
   4. 📄 Results export and intelligent report generation

⏳ Please wait... Analysis typically takes 20-40 seconds.
   ⚡ Real-time processing with actual documents and simulations


Failed to export RAG results: Object of type float32 is not JSON serializable



🎉 Analysis completed successfully in 14.2 seconds!

📊 Processing Results:
   • 📚 Document Queries Processed: 3
   • 🔬 Chemical Simulations Completed: 3
   • 📁 Files Generated: 1

⚠️ Enhanced analysis partially available (basic results ready)

📁 Results Available At:
   • CSV Data Files: data/05_output/results/
   • Intelligent Reports: data/05_output/reports/

⚡ Performance: 0.4 operations/second

✅ Analysis complete! Run Cell 3 to explore detailed results.


In [12]:
# Cell 3: View Results Dashboard
# ===============================
# This cell displays comprehensive results and provides detailed analysis

print("📊 PyNucleus Results Dashboard")
print("=" * 50)

try:
    # Check if we have fresh results from Cell 2
    if 'analysis_results' in globals():
        print("🎯 Displaying results from current analysis session")
        current_results = analysis_results
        
        # Show analysis session summary
        print(f"\n📈 Analysis Session Summary:")
        print(f"   • 📚 RAG Queries: {len(current_results.get('rag_data', []))}")
        print(f"   • 🔬 DWSIM Simulations: {len(current_results.get('dwsim_data', []))}")
        print(f"   • 📁 Generated Files: {len(current_results.get('exported_files', []))}")
        print(f"   • ⏱️ Processing Time: {current_results.get('duration', 'N/A'):.1f}s")
        
    else:
        print("ℹ️ No current session results - showing historical data")
    
    # System status check
    status = pipeline.quick_test()
    
    print(f"\n📁 Results Directory: {status['results_dir']}")
    print(f"📄 Total Files Available: {status['csv_files_count']}")
    
    if status['csv_files_count'] > 0:
        print(f"\n📋 Available Data Files:")
        for file_info in status['csv_files']:
            file_size_kb = file_info['size'] / 1024 if file_info['size'] > 1024 else file_info['size']
            size_unit = "KB" if file_info['size'] > 1024 else "bytes"
            print(f"   • {file_info['name']} ({file_size_kb:.1f} {size_unit})")
    else:
        print(f"\n⚠️ No data files found in results directory")
        print(f"   Run Cell 2 to generate analysis results")
    
    # Enhanced system summary
    print(f"\n" + "=" * 50)
    try:
        pipeline.view_results_summary()
    except Exception as summary_error:
        print(f"📊 PyNucleus System Summary")
        print(f"   • Results directory contains {status['csv_files_count']} files")
        print(f"   • System ready for analysis")
    
    # Check for intelligent reports
    reports_dir = Path("data/05_output/reports")
    if reports_dir.exists():
        report_files = list(reports_dir.glob("*.md"))
        if report_files:
            print(f"\n📄 Intelligent Reports Available: {len(report_files)}")
            for report in report_files[-3:]:  # Show last 3 reports
                print(f"   • {report.name}")
    
    # Performance and next steps
    print(f"\n🚀 Next Steps:")
    print(f"   • 🔄 Re-run Cell 2 to generate fresh analysis")
    print(f"   • 📁 Explore files in data/05_output/ directory")
    print(f"   • 🔧 Use Developer_Notebook.ipynb for advanced features")
    print(f"   • 📊 Export results for external analysis tools")
    
    # Quick health check
    health_indicators = []
    if status['csv_files_count'] > 0:
        health_indicators.append("✅ Data files present")
    if reports_dir.exists() and list(reports_dir.glob("*.md")):
        health_indicators.append("✅ Reports generated")
    if 'analysis_results' in globals():
        health_indicators.append("✅ Fresh analysis available")
    
    if health_indicators:
        print(f"\n🎯 System Health:")
        for indicator in health_indicators:
            print(f"   {indicator}")
    
except Exception as e:
    print(f"❌ Error viewing results: {e}")
    print(f"\n💡 Troubleshooting:")
    print(f"   • Ensure Cell 1 (initialization) completed successfully")
    print(f"   • Run Cell 2 to generate analysis results")
    print(f"   • Check that data/05_output/ directory exists")
    print(f"   • For detailed diagnostics, see Developer_Notebook.ipynb")
    
    # Show basic directory information as fallback
    try:
        results_path = Path("data/05_output/results")
        if results_path.exists():
            file_count = len(list(results_path.glob("*.csv")))
            print(f"\n📁 Fallback Info: {file_count} CSV files in results directory")
    except:
        pass


📊 PyNucleus Results Dashboard
🎯 Displaying results from current analysis session

📈 Analysis Session Summary:
   • 📚 RAG Queries: 3
   • 🔬 DWSIM Simulations: 3
   • 📁 Generated Files: 1
   • ⏱️ Processing Time: 14.2s

📁 Results Directory: data/05_output/results
📄 Total Files Available: 1

📋 Available Data Files:
   • dev_simulation_config.csv (171.0 bytes)

📊 PyNucleus System Summary
------------------------------
📁 Main Results Dir (results): 25 JSON files
📁 Results Subdir (results): 38 JSON files
📁 Config Files: 1 CSV files

🔧 Pipeline Status:
   • RAG Pipeline: Initialized
   • DWSIM Pipeline: Initialized
   • Results Directory: data/05_output/results

📋 Recent Generated Files:
   • dwsim_results_20250618_204420.json
   • rag_results_20250618_204420.json
   • dwsim_results_20250618_190830.json
   • rag_results_20250618_190830.json
   • integrated_results_20250618_164816.json

📄 Intelligent Reports Available: 3
   • reactor_methane_combustion_summary.md
   • heat_exchanger_steam_summ

In [13]:
# Optional Cell 4A: Run Only Document Analysis (RAG)
# ====================================================
# Uncomment and run this cell if you only want document processing

# print("📚 Running Document Analysis Only...")
# rag_results = pipeline.run_rag_only()
# if rag_results:
#     print(f"✅ Processed {len(rag_results['rag_data'])} document queries")
#     print("📁 Results saved to data/05_output/results/")
# else:
#     print("❌ Document analysis failed")


In [14]:
# Optional Cell 4B: Run Only Chemical Simulations (DWSIM)
# =======================================================
# Uncomment and run this cell if you only want DWSIM simulations

# print("🔬 Running Chemical Simulations Only...")
# dwsim_results = pipeline.run_dwsim_only()
# if dwsim_results:
#     print(f"✅ Completed {len(dwsim_results['dwsim_data'])} simulations")
#     print("📁 Results saved to data/05_output/results/")
# else:
#     print("❌ Chemical simulations failed")


In [15]:
# Optional Cell 5: Clean Up Results
# =================================
# Uncomment and run this cell to clear all previous results

# print("🗑️ Cleaning up previous results...")
# pipeline.clean_all_results()
# print("✅ All results cleared. You can now run a fresh analysis.")


In [16]:
# # ========================================
# # VERSION CONTROL (Optional - For Maintainers Only)
# # ========================================
# # Uncomment the lines below if you need to update the repository:

# from datetime import datetime

# # Log end time
# with open("update_log.txt", "a") as f:
#     f.write(f"\n {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} changes made and pushed to origin main\n")

# # Simple GitHub update function
# def update_github():
#     print(" Starting GitHub update...")
#     !git add .
#     print(" Files added to staging")
#     !git commit -m "Update: $(date +'%Y-%m-%d %H:%M:%S')"
#     print(" Changes committed")
#     !git push origin main
#     print(" Changes pushed to GitHub successfully!")

# # To use it, just run:
# update_github()