In [1]:
# Cell 1: System Initialization
# ===================================
# This cell sets up PyNucleus and prepares all components

import sys
from pathlib import Path

print("üîß Initializing PyNucleus Model...")

# Add src to Python path
src_path = str(Path().resolve() / "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

try:
    # Import PyNucleus components
    from pynucleus.pipeline import PipelineUtils
    from pynucleus.integration.llm_output_generator import LLMOutputGenerator
    
    # Initialize pipeline
    pipeline = PipelineUtils(results_dir="data/05_output/results")
    llm_generator = LLMOutputGenerator(results_dir="data/05_output/reports")
    
    print("‚úÖ PyNucleus Model initialized successfully!")
    print("üìã System Ready:")
    print("   ‚Ä¢ RAG Pipeline - Document processing and retrieval")
    print("   ‚Ä¢ DWSIM Pipeline - Chemical process simulation")
    print("   ‚Ä¢ Results Export - CSV and report generation")
    print("   ‚Ä¢ LLM Integration - Intelligent analysis and summaries")
    print("\nüéØ Ready to run analysis! Execute Cell 2 to start.")
    
except ImportError as e:
    print(f"‚ùå Import Error: {e}")
    print("üí° Please ensure you're in the PyNucleus-Model directory")
except Exception as e:
    print(f"‚ùå Initialization Error: {e}")
    print("üí° Please check your system setup")


üîß Initializing PyNucleus Model...


  from .autonotebook import tqdm as notebook_tqdm
  embeddings = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')


‚úÖ PyNucleus Model initialized successfully!
üìã System Ready:
   ‚Ä¢ RAG Pipeline - Document processing and retrieval
   ‚Ä¢ DWSIM Pipeline - Chemical process simulation
   ‚Ä¢ Results Export - CSV and report generation
   ‚Ä¢ LLM Integration - Intelligent analysis and summaries

üéØ Ready to run analysis! Execute Cell 2 to start.


In [2]:
# Cell 2: Run Complete Analysis
# ===================================
# This cell runs the complete PyNucleus pipeline

print("üöÄ Starting Complete PyNucleus Analysis...")
print("\nüìä This will run:")
print("   1. Document processing and RAG analysis")
print("   2. DWSIM chemical process simulations")
print("   3. Results export and report generation")
print("\n‚è≥ Please wait... This may take 20-30 seconds.\n")

try:
    # Run the complete pipeline
    results = pipeline.run_complete_pipeline()
    
    if results:
        print(f"\nüéâ Analysis completed successfully in {results['duration']:.1f} seconds!")
        print("\nüìä Results Summary:")
        print(f"   ‚Ä¢ Documents Processed: {len(results['rag_data'])} queries")
        print(f"   ‚Ä¢ Simulations Completed: {len(results['dwsim_data'])} scenarios")
        print(f"   ‚Ä¢ Files Generated: {len(results['exported_files'])} CSV files")
        
        # Generate enhanced reports if available
        try:
            from pynucleus.integration.dwsim_rag_integrator import DWSIMRAGIntegrator
            
            integrator = DWSIMRAGIntegrator(
                rag_pipeline=pipeline.rag_pipeline,
                results_dir="data/05_output/results"
            )
            
            # Enhanced analysis
            dwsim_results = pipeline.dwsim_pipeline.get_results()
            if dwsim_results:
                enhanced_results = integrator.integrate_simulation_results(
                    dwsim_results, perform_rag_analysis=True
                )
                
                # Generate LLM reports
                report_files = []
                for result in enhanced_results[:3]:  # Generate reports for first 3 simulations
                    report_file = llm_generator.export_llm_ready_text(result)
                    report_files.append(report_file)
                
                # Financial analysis
                financial_file = llm_generator.export_financial_analysis(enhanced_results)
                metrics = llm_generator._calculate_key_metrics(enhanced_results)
                
                print("\nüí∞ Financial Analysis:")
                print(f"   ‚Ä¢ Recovery Rate: {metrics['avg_recovery']:.1f}%")
                print(f"   ‚Ä¢ Daily Revenue: ${metrics['estimated_revenue']:,.2f}")
                print(f"   ‚Ä¢ Daily Profit: ${metrics['net_profit']:,.2f}")
                print(f"   ‚Ä¢ ROI: {metrics['roi']:.1f}%")
                
                print(f"\nüìÑ Generated Reports: {len(report_files)} detailed analysis files")
                
        except Exception as e:
            print("‚ö†Ô∏è Enhanced analysis unavailable (using basic results only)")
        
        print("\nüìÅ All results saved to:")
        print("   ‚Ä¢ CSV Files: data/05_output/results/")
        print("   ‚Ä¢ Reports: data/05_output/reports/")
        print("\n‚úÖ Analysis complete! Run Cell 3 to explore your results.")
        
    else:
        print("‚ùå Pipeline execution failed")
        print("üí° Please check your data directories and try again")
        
except Exception as e:
    print(f"‚ùå Error during analysis: {e}")
    print("üí° Please ensure all components are properly initialized")


üöÄ Starting Complete PyNucleus Analysis...

üìä This will run:
   1. Document processing and RAG analysis
   2. DWSIM chemical process simulations
   3. Results export and report generation

‚è≥ Please wait... This may take 20-30 seconds.



Failed to export RAG results: Object of type float32 is not JSON serializable



üéâ Analysis completed successfully in 14.4 seconds!

üìä Results Summary:
   ‚Ä¢ Documents Processed: 3 queries
   ‚Ä¢ Simulations Completed: 3 scenarios
   ‚Ä¢ Files Generated: 1 CSV files
‚ö†Ô∏è Enhanced analysis unavailable (using basic results only)

üìÅ All results saved to:
   ‚Ä¢ CSV Files: data/05_output/results/
   ‚Ä¢ Reports: data/05_output/reports/

‚úÖ Analysis complete! Run Cell 3 to explore your results.


In [3]:
# Cell 3: View Results and Summary
# ===================================
# This cell displays your results and provides access to files

print("üìä PyNucleus Results Dashboard")
print("=" * 40)

try:
    # Quick status check
    status = pipeline.quick_test()
    
    print(f"üìÅ Results Directory: {status['results_dir']}")
    print(f"üìÑ CSV Files Found: {status['csv_files_count']}")
    
    if status['csv_files_count'] > 0:
        print("\nüìã Available Files:")
        for file_info in status['csv_files']:
            print(f"   ‚Ä¢ {file_info['name']} ({file_info['size']} bytes)")
    
    # Display detailed summary
    print("\n" + "=" * 40)
    pipeline.view_results_summary()
    
    print("\nüîß Additional Options:")
    print("   ‚Ä¢ Re-run Cell 2 to generate new results")
    print("   ‚Ä¢ Check data/05_output/ folder for all generated files")
    print("   ‚Ä¢ View Developer_Notebook.ipynb for advanced features")
    
except Exception as e:
    print(f"‚ùå Error viewing results: {e}")
    print("üí° Please run Cell 2 first to generate results")


üìä PyNucleus Results Dashboard
üìÅ Results Directory: data/05_output/results
üìÑ CSV Files Found: 2

üìã Available Files:
   ‚Ä¢ dev_simulation_config.csv (171 bytes)
   ‚Ä¢ bulk_modular_plants_template.csv (1372 bytes)

üìä PyNucleus System Summary
------------------------------
üìÅ Main Results Dir (results): 20 JSON files
üìÅ Results Subdir (results): 16 JSON files
üìÅ Config Files: 2 CSV files

üîß Pipeline Status:
   ‚Ä¢ RAG Pipeline: Initialized
   ‚Ä¢ DWSIM Pipeline: Initialized
   ‚Ä¢ Results Directory: data/05_output/results

üìã Recent Generated Files:
   ‚Ä¢ dwsim_results_20250618_135524.json
   ‚Ä¢ rag_results_20250618_135524.json
   ‚Ä¢ integrated_results_20250618_135323.json
   ‚Ä¢ dwsim_results_20250618_135323.json
   ‚Ä¢ rag_results_20250618_135323.json

üîß Additional Options:
   ‚Ä¢ Re-run Cell 2 to generate new results
   ‚Ä¢ Check data/05_output/ folder for all generated files
   ‚Ä¢ View Developer_Notebook.ipynb for advanced features


In [4]:
# Optional Cell 4A: Run Only Document Analysis (RAG)
# ====================================================
# Uncomment and run this cell if you only want document processing

# print("üìö Running Document Analysis Only...")
# rag_results = pipeline.run_rag_only()
# if rag_results:
#     print(f"‚úÖ Processed {len(rag_results['rag_data'])} document queries")
#     print("üìÅ Results saved to data/05_output/results/")
# else:
#     print("‚ùå Document analysis failed")


In [5]:
# Optional Cell 4B: Run Only Chemical Simulations (DWSIM)
# =======================================================
# Uncomment and run this cell if you only want DWSIM simulations

# print("üî¨ Running Chemical Simulations Only...")
# dwsim_results = pipeline.run_dwsim_only()
# if dwsim_results:
#     print(f"‚úÖ Completed {len(dwsim_results['dwsim_data'])} simulations")
#     print("üìÅ Results saved to data/05_output/results/")
# else:
#     print("‚ùå Chemical simulations failed")


In [6]:
# Optional Cell 5: Clean Up Results
# =================================
# Uncomment and run this cell to clear all previous results

# print("üóëÔ∏è Cleaning up previous results...")
# pipeline.clean_all_results()
# print("‚úÖ All results cleared. You can now run a fresh analysis.")
