In [1]:
# Cell 1: System Initialization
# ===================================
# This cell sets up PyNucleus and prepares all components

import sys
from pathlib import Path

print("🔧 Initializing PyNucleus Model...")

# Add src to Python path
src_path = str(Path().resolve() / "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

try:
    # Import PyNucleus components
    from pynucleus.pipeline import PipelineUtils
    from pynucleus.integration.llm_output_generator import LLMOutputGenerator
    
    # Initialize pipeline
    pipeline = PipelineUtils(results_dir="data/05_output/results")
    llm_generator = LLMOutputGenerator(results_dir="data/05_output/reports")
    
    print("✅ PyNucleus Model initialized successfully!")
    print("📋 System Ready:")
    print("   • RAG Pipeline - Document processing and retrieval")
    print("   • DWSIM Pipeline - Chemical process simulation")
    print("   • Results Export - CSV and report generation")
    print("   • LLM Integration - Intelligent analysis and summaries")
    print("\n🎯 Ready to run analysis! Execute Cell 2 to start.")
    
except ImportError as e:
    print(f"❌ Import Error: {e}")
    print("💡 Please ensure you're in the PyNucleus-Model directory")
except Exception as e:
    print(f"❌ Initialization Error: {e}")
    print("💡 Please check your system setup")


🔧 Initializing PyNucleus Model...


  from .autonotebook import tqdm as notebook_tqdm
  embeddings = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')


✅ PyNucleus Model initialized successfully!
📋 System Ready:
   • RAG Pipeline - Document processing and retrieval
   • DWSIM Pipeline - Chemical process simulation
   • Results Export - CSV and report generation
   • LLM Integration - Intelligent analysis and summaries

🎯 Ready to run analysis! Execute Cell 2 to start.


In [2]:
# Cell 2: Run Complete Analysis
# ===================================
# This cell runs the complete PyNucleus pipeline

print("🚀 Starting Complete PyNucleus Analysis...")
print("\n📊 This will run:")
print("   1. Document processing and RAG analysis")
print("   2. DWSIM chemical process simulations")
print("   3. Results export and report generation")
print("\n⏳ Please wait... This may take 20-30 seconds.\n")

try:
    # Run the complete pipeline
    results = pipeline.run_complete_pipeline()
    
    if results:
        print(f"\n🎉 Analysis completed successfully in {results['duration']:.1f} seconds!")
        print("\n📊 Results Summary:")
        print(f"   • Documents Processed: {len(results['rag_data'])} queries")
        print(f"   • Simulations Completed: {len(results['dwsim_data'])} scenarios")
        print(f"   • Files Generated: {len(results['exported_files'])} CSV files")
        
        # Generate enhanced reports if available
        try:
            from pynucleus.integration.dwsim_rag_integrator import DWSIMRAGIntegrator
            
            integrator = DWSIMRAGIntegrator(
                rag_pipeline=pipeline.rag_pipeline,
                results_dir="data/05_output/results"
            )
            
            # Enhanced analysis
            dwsim_results = pipeline.dwsim_pipeline.get_results()
            if dwsim_results:
                enhanced_results = integrator.integrate_simulation_results(
                    dwsim_results, perform_rag_analysis=True
                )
                
                # Generate LLM reports
                report_files = []
                for result in enhanced_results[:3]:  # Generate reports for first 3 simulations
                    report_file = llm_generator.export_llm_ready_text(result)
                    report_files.append(report_file)
                
                # Financial analysis
                financial_file = llm_generator.export_financial_analysis(enhanced_results)
                metrics = llm_generator._calculate_key_metrics(enhanced_results)
                
                print("\n💰 Financial Analysis:")
                print(f"   • Recovery Rate: {metrics['avg_recovery']:.1f}%")
                print(f"   • Daily Revenue: ${metrics['estimated_revenue']:,.2f}")
                print(f"   • Daily Profit: ${metrics['net_profit']:,.2f}")
                print(f"   • ROI: {metrics['roi']:.1f}%")
                
                print(f"\n📄 Generated Reports: {len(report_files)} detailed analysis files")
                
        except Exception as e:
            print("⚠️ Enhanced analysis unavailable (using basic results only)")
        
        print("\n📁 All results saved to:")
        print("   • CSV Files: data/05_output/results/")
        print("   • Reports: data/05_output/reports/")
        print("\n✅ Analysis complete! Run Cell 3 to explore your results.")
        
    else:
        print("❌ Pipeline execution failed")
        print("💡 Please check your data directories and try again")
        
except Exception as e:
    print(f"❌ Error during analysis: {e}")
    print("💡 Please ensure all components are properly initialized")


🚀 Starting Complete PyNucleus Analysis...

📊 This will run:
   1. Document processing and RAG analysis
   2. DWSIM chemical process simulations
   3. Results export and report generation

⏳ Please wait... This may take 20-30 seconds.



Failed to export RAG results: Object of type float32 is not JSON serializable



🎉 Analysis completed successfully in 14.4 seconds!

📊 Results Summary:
   • Documents Processed: 3 queries
   • Simulations Completed: 3 scenarios
   • Files Generated: 1 CSV files
⚠️ Enhanced analysis unavailable (using basic results only)

📁 All results saved to:
   • CSV Files: data/05_output/results/
   • Reports: data/05_output/reports/

✅ Analysis complete! Run Cell 3 to explore your results.


In [3]:
# Cell 3: View Results and Summary
# ===================================
# This cell displays your results and provides access to files

print("📊 PyNucleus Results Dashboard")
print("=" * 40)

try:
    # Quick status check
    status = pipeline.quick_test()
    
    print(f"📁 Results Directory: {status['results_dir']}")
    print(f"📄 CSV Files Found: {status['csv_files_count']}")
    
    if status['csv_files_count'] > 0:
        print("\n📋 Available Files:")
        for file_info in status['csv_files']:
            print(f"   • {file_info['name']} ({file_info['size']} bytes)")
    
    # Display detailed summary
    print("\n" + "=" * 40)
    pipeline.view_results_summary()
    
    print("\n🔧 Additional Options:")
    print("   • Re-run Cell 2 to generate new results")
    print("   • Check data/05_output/ folder for all generated files")
    print("   • View Developer_Notebook.ipynb for advanced features")
    
except Exception as e:
    print(f"❌ Error viewing results: {e}")
    print("💡 Please run Cell 2 first to generate results")


📊 PyNucleus Results Dashboard
📁 Results Directory: data/05_output/results
📄 CSV Files Found: 2

📋 Available Files:
   • dev_simulation_config.csv (171 bytes)
   • bulk_modular_plants_template.csv (1372 bytes)

📊 PyNucleus System Summary
------------------------------
📁 Main Results Dir (results): 23 JSON files
📁 Results Subdir (results): 28 JSON files
📁 Config Files: 2 CSV files

🔧 Pipeline Status:
   • RAG Pipeline: Initialized
   • DWSIM Pipeline: Initialized
   • Results Directory: data/05_output/results

📋 Recent Generated Files:
   • dwsim_results_20250618_155648.json
   • rag_results_20250618_155648.json
   • integrated_results_20250618_153352.json
   • dwsim_results_20250618_153352.json
   • rag_results_20250618_153352.json

🔧 Additional Options:
   • Re-run Cell 2 to generate new results
   • Check data/05_output/ folder for all generated files
   • View Developer_Notebook.ipynb for advanced features


In [4]:
# Optional Cell 4A: Run Only Document Analysis (RAG)
# ====================================================
# Uncomment and run this cell if you only want document processing

# print("📚 Running Document Analysis Only...")
# rag_results = pipeline.run_rag_only()
# if rag_results:
#     print(f"✅ Processed {len(rag_results['rag_data'])} document queries")
#     print("📁 Results saved to data/05_output/results/")
# else:
#     print("❌ Document analysis failed")


In [5]:
# Optional Cell 4B: Run Only Chemical Simulations (DWSIM)
# =======================================================
# Uncomment and run this cell if you only want DWSIM simulations

# print("🔬 Running Chemical Simulations Only...")
# dwsim_results = pipeline.run_dwsim_only()
# if dwsim_results:
#     print(f"✅ Completed {len(dwsim_results['dwsim_data'])} simulations")
#     print("📁 Results saved to data/05_output/results/")
# else:
#     print("❌ Chemical simulations failed")


In [6]:
# Optional Cell 5: Clean Up Results
# =================================
# Uncomment and run this cell to clear all previous results

# print("🗑️ Cleaning up previous results...")
# pipeline.clean_all_results()
# print("✅ All results cleared. You can now run a fresh analysis.")


In [8]:
# # ========================================
# # VERSION CONTROL (Optional - For Maintainers Only)
# # ========================================
# # Uncomment the lines below if you need to update the repository:

# from datetime import datetime

# # Log end time
# with open("update_log.txt", "a") as f:
#     f.write(f"\n {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} changes made and pushed to origin main\n")

# # Simple GitHub update function
# def update_github():
#     print(" Starting GitHub update...")
#     !git add .
#     print(" Files added to staging")
#     !git commit -m "Update: $(date +'%Y-%m-%d %H:%M:%S')"
#     print(" Changes committed")
#     !git push origin main
#     print(" Changes pushed to GitHub successfully!")

# # To use it, just run:
# update_github()

 Starting GitHub update...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 Files added to staging
[main 4f810f3] Update: 2025-06-18 15:58:47
 18 files changed, 435 insertions(+), 27 deletions(-)
 create mode 100644 data/validation/results/system_validation_20250618_155738.json
 create mode 100644 logs/pynucleus_20250618_155154.log
 create mode 100644 logs/pynucleus_20250618_155209.log
 create mode 100644 logs/pynucleus_20250618_155245.log
 create mode 100644 logs/pynucleus_20250618_155312.log
 create mode 100644 logs/pynucleus_20250618_155323.log
 create mode 100644 logs/pynucleus_20250618_155351.log
 create mode 100644 logs/pynucleus_20250618_155353.log
 create mode 100644 logs/pynucleus_20250618_155355.log
 create mode 100644 logs/pynucleus_20250618_155410.log
 create mode 100644 logs/pynucleus_20250618_155435.log
 create mode 100644 src/pynucleus/api/__init__.py
 create mode 100644 src/pynucleus/api/app.py
 create mode 100644 tests/test_api_health.py


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 Changes committed


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Enumerating objects: 39, done.
Counting objects: 100% (39/39), done.
Delta compression using up to 8 threads
Compressing objects: 100% (25/25), done.
Writing objects: 100% (26/26), 7.19 KiB | 7.19 MiB/s, done.
Total 26 (delta 13), reused 0 (delta 0), pack-reused 0 (from 0)
remote: Resolving deltas: 100% (13/13), completed with 10 local objects.[K
To https://github.com/Saytor20/PyNucleus-Model.git
   2a549d0..4f810f3  main -> main
 Changes pushed to GitHub successfully!
