In [None]:
# import os
# from datetime import datetime
# from dotenv import load_dotenv
# #
# # #--------Google Drive Integration--------#
# # # from google.colab import drive, userdata
# # # This gives Colab access to your files in Google Drive.
# # # drive.mount('/content/drive')
# # # 'GITHUB_USERNAME' and 'GITHUB_TOKEN' saved as secrets in Colab.
# # GITHUB_USERNAME = userdata.get('GITHUB_USERNAME')
# # GITHUB_TOKEN = userdata.get('GITHUB_TOKEN')
# # REPOSITORY_NAME = 'PyNucleus-Model' # Your repository name
# # NOTEBOOK_DRIVE_PATH = "/content/drive/MyDrive/PyNucleus Project/Capstone Project.ipynb"
# #
# #
# # #--------Cursor Integration--------#
# # # Load environment variables from .env file
# load_dotenv()
# #
# # # Get GitHub credentials from environment variables
# GITHUB_USERNAME = os.getenv('GITHUB_USERNAME')
# GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
# #
# # # Print to verify the variables are loaded (remove this in production)
# print(f"Username: {GITHUB_USERNAME}")
# print(f"Token: {GITHUB_TOKEN[:4]}...") # Only print first 4 chars of token for security
# #
# # Repository information
# REPOSITORY_NAME = 'PyNucleus-Model'
# NOTEBOOK_REPO_FILENAME = "Capstone Project.ipynb"
# LOG_FILENAME = "update_log.txt"

# # Pull latest changes from GitHub
# print("Pulling latest changes from GitHub...")
# !git pull https://{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{REPOSITORY_NAME}.git main

# print("Repository is up to date!")

# # Log start time
# with open("update_log.txt", "a") as f:
#     f.write(f" {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: Log Update\n")

In [47]:
# PyNucleus Model - Setup and Imports
import sys
import os
from pathlib import Path

# Add project root to path
sys.path.append(os.path.abspath('.'))

# Import PyNucleus Pipeline modules
from core_modules.pipeline import RAGPipeline, DWSIMPipeline, ResultsExporter, PipelineUtils

print(" PyNucleus Model - Pipeline Ready!")
print(" Available Components:")
print("   • RAGPipeline - Document processing and retrieval")
print("   • DWSIMPipeline - Chemical process simulation") 
print("   • ResultsExporter - CSV export functionality")
print("   • PipelineUtils - Complete pipeline orchestration")

 PyNucleus Model - Pipeline Ready!
 Available Components:
   • RAGPipeline - Document processing and retrieval
   • DWSIMPipeline - Chemical process simulation
   • ResultsExporter - CSV export functionality
   • PipelineUtils - Complete pipeline orchestration


# **PyNucleus Model - Complete Pipeline**

This notebook contains the complete PyNucleus model pipeline with separate sections for:
1. **Data Ingestion and Preprocessing for RAG** 
2. **DWSIM Integration and Simulation**
3. **Results Export to CSV**

In [None]:
# Initialize Pipeline Components
pipeline = PipelineUtils()

print("\n🔧 Pipeline Initialized!")
print("Available Functions:")
print("   • pipeline.run_complete_pipeline() - Run everything")
print("   • pipeline.run_rag_only() - RAG pipeline only")  
print("   • pipeline.run_dwsim_only() - DWSIM simulations only")
print("   • pipeline.quick_test() - Verify status")
print("   • pipeline.view_results_summary() - View results")
print("   • pipeline.print_pipeline_status() - Detailed status")
print("   • pipeline.clean_all_results() - Clean all data")


In [None]:
# ========================================
# SECTION 1: COMPLETE PIPELINE - Run Everything
# ========================================

# Run the complete pipeline (RAG + DWSIM + Export)
results = pipeline.run_complete_pipeline()

# Display results summary
if results:
    print(f"\n🎉 Pipeline completed in {results['duration']:.1f} seconds!")
    print(f"📊 RAG Results: {len(results['rag_data'])} queries processed")
    print(f"🔬 DWSIM Results: {len(results['dwsim_data'])} simulations completed")
    print(f"📁 Exported Files: {len(results['exported_files'])} CSV files created")
else:
    print("❌ Pipeline execution failed")

In [None]:
# ========================================
# SECTION 2: INDIVIDUAL PIPELINE COMPONENTS
# ========================================

# Option 1: Run only RAG Pipeline
print("📚 RAG Only Pipeline:")
rag_results = pipeline.run_rag_only()
if rag_results:
    print(f"   ✅ {len(rag_results['rag_data'])} RAG queries processed")

print("\n" + "="*50 + "\n")

# Option 2: Run only DWSIM Simulations  
print("🔬 DWSIM Only Pipeline:")
dwsim_results = pipeline.run_dwsim_only()
if dwsim_results:
    print(f"   ✅ {len(dwsim_results['dwsim_data'])} DWSIM simulations completed")


In [None]:
# ========================================
# SECTION 3: UTILITY FUNCTIONS
# ========================================

# View pipeline status
pipeline.print_pipeline_status()

print("\n" + "="*50 + "\n")

# View results summary
pipeline.view_results_summary()

print("\n" + "="*50 + "\n")

# Quick test
test_results = pipeline.quick_test()
print(f"✅ Quick test completed! Found {test_results['csv_files_count']} CSV files")


In [None]:
# ========================================
# SECTION 4: SIMPLE FUNCTION CALLS
# ========================================

print("🎯 Simple Function Calls for Easy Testing:")
print()

# Individual Component Access
print("📚 Access individual components:")
print("   • rag = pipeline.rag_pipeline")
print("   • dwsim = pipeline.dwsim_pipeline") 
print("   • exporter = pipeline.exporter")
print()

# Custom Simulations
print("🧪 Run custom simulations:")
custom_case = {
    "name": "custom_test",
    "type": "reactor", 
    "components": ["hydrogen", "nitrogen"],
    "description": "Custom ammonia synthesis reactor"
}

print(f"Running custom simulation: {custom_case['name']}")
custom_result = pipeline.dwsim_pipeline.run_single_simulation(custom_case)
if custom_result:
    print(f"   ✅ Custom simulation completed: {custom_result['success']}")

print()
print("📁 All results automatically saved as CSV files in ./results/ directory")
print("🔄 Run any cell again to re-execute that specific pipeline component")

In [None]:
# Clean up and reset (optional)
# pipeline.clean_all_results()  # Uncomment to clean all previous results

print(" PyNucleus Pipeline is ready!")
print(" Use the functions above to run different parts of the pipeline")
print(" All results are automatically exported to CSV files")
print(" You can run any cell multiple times to re-execute components")

In [None]:
# Complete pipeline
results = pipeline.run_complete_pipeline()

# Individual components
rag_results = pipeline.run_rag_only()
dwsim_results = pipeline.run_dwsim_only()

# Utilities
pipeline.quick_test()
pipeline.view_results_summary()
pipeline.print_pipeline_status()

This is the last step in the code for version control purpose

In [None]:
# from datetime import datetime

# # Log end time
# with open("update_log.txt", "a") as f:
#     f.write(f"\n {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} changes made and pushed to origin main\n")

# # Simple GitHub update function
# def update_github():
#     print(" Starting GitHub update...")
#     !git add .
#     print("📦 Files added to staging")
#     !git commit -m "Update: $(date +'%Y-%m-%d %H:%M:%S')"
#     print("💾 Changes committed")
#     !git push origin main
#     print("✅ Changes pushed to GitHub successfully!")

# # To use it, just run:
# update_github()