In [None]:
# Cell 1: ChromaDB Performance & Chunking Analysis
# =================================================
# This cell analyzes ChromaDB performance and chunking strategies

import sys
from pathlib import Path
from datetime import datetime
import json

print("🔍 ChromaDB Performance & Chunking Analysis")
print("=" * 50)
print(f"📅 Analysis started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Add src to Python path
src_path = str(Path().resolve() / "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

def analyze_data_structure():
    """Analyze current data directory structure"""
    print("\n📁 Data Structure Analysis:")
    
    data_dirs = {
        'data/01_raw/source_documents': 'Source documents for processing',
        'data/01_raw/web_sources': 'Web-scraped content',
        'data/03_intermediate/converted_chunked_data': 'Text chunks from processing',
        'data/03_intermediate/vector_db': 'ChromaDB database storage',
        'data/04_models/chunk_reports': 'Chunking performance analysis',
        'data/04_models/recall_evaluation': 'RAG system recall metrics',
        'data/validation': 'Golden dataset and validation results'
    }
    
    for dir_path, description in data_dirs.items():
        path = Path(dir_path)
        if path.exists():
            if path.is_dir():
                file_count = len([f for f in path.iterdir() if f.is_file()])
                dir_count = len([d for d in path.iterdir() if d.is_dir()])
                print(f"   ✅ {dir_path}: {file_count} files, {dir_count} subdirs - {description}")
            else:
                print(f"   📄 {dir_path}: File exists - {description}")
        else:
            print(f"   ❌ {dir_path}: Missing - {description}")

def analyze_chunking_strategy():
    """Analyze current chunking configuration"""
    print("\n🔤 Chunking Strategy Analysis:")
    
    try:
        from pynucleus.settings import settings
        print(f"   • Embedding Model: {settings.EMB_MODEL}")
        print(f"   • Retrieval Top-K: {settings.RETRIEVE_TOP_K}")
        
        # Check if chunk data exists
        chunk_dir = Path("data/03_intermediate/converted_chunked_data")
        if chunk_dir.exists():
            chunk_files = list(chunk_dir.glob("*.json"))
            print(f"   • Chunk Files: {len(chunk_files)} files found")
            
            if chunk_files:
                # Analyze a sample chunk file
                with open(chunk_files[0], 'r') as f:
                    sample_chunk = json.load(f)
                    print(f"   • Sample chunk keys: {list(sample_chunk.keys())}")
        else:
            print(f"   ⚠️ No chunk data found in {chunk_dir}")
            
    except Exception as e:
        print(f"   ❌ Error analyzing chunking: {e}")

def analyze_chromadb_performance():
    """Analyze ChromaDB setup and performance"""
    print("\n🗄️ ChromaDB Performance Analysis:")
    
    try:
        from pynucleus.settings import settings
        
        chroma_path = Path(settings.CHROMA_PATH)
        print(f"   • ChromaDB Path: {settings.CHROMA_PATH}")
        print(f"   • Database Exists: {'✅' if chroma_path.exists() else '❌'}")
        
        if chroma_path.exists():
            # Check database size
            db_files = list(chroma_path.rglob("*"))
            total_size = sum(f.stat().st_size for f in db_files if f.is_file())
            print(f"   • Database Size: {total_size / 1024 / 1024:.2f} MB")
            print(f"   • Database Files: {len([f for f in db_files if f.is_file()])} files")
            
        # Test retrieval if possible
        try:
            from pynucleus.rag.engine import retrieve
            print(f"   • Retrieval Engine: ✅ Available")
            
            # Test basic retrieval
            test_docs = retrieve("chemical engineering", top_k=1)
            if test_docs and len(test_docs) > 0:
                print(f"   • Test Retrieval: ✅ {len(test_docs)} documents found")
                sample_length = len(test_docs[0]) if test_docs[0] else 0
                print(f"   • Sample Document Length: {sample_length} characters")
            else:
                print(f"   • Test Retrieval: ⚠️ No documents found")
                
        except Exception as e:
            print(f"   • Retrieval Engine: ❌ Error - {e}")
            
    except Exception as e:
        print(f"   ❌ ChromaDB analysis failed: {e}")

def performance_recommendations():
    """Provide performance optimization recommendations"""
    print("\n💡 Performance Optimization Recommendations:")
    
    recommendations = [
        "🔤 Chunking: Ensure optimal chunk size (512-1024 tokens) for your documents",
        "🧮 Embeddings: Use 'all-MiniLM-L6-v2' for faster processing or 'all-mpnet-base-v2' for better quality",
        "🗄️ ChromaDB: Enable persistence and consider indexing parameters for large datasets",
        "🔍 Retrieval: Tune top-k value (4-8) based on your accuracy requirements",
        "💾 Storage: Monitor database size and consider compression for large document sets",
        "⚡ Performance: First query is slower (model loading), subsequent queries are faster"
    ]
    
    for rec in recommendations:
        print(f"   {rec}")

# Run all analysis functions
try:
    analyze_data_structure()
    analyze_chunking_strategy()
    analyze_chromadb_performance()
    performance_recommendations()
    
    print(f"\n✅ ChromaDB Performance Analysis Complete!")
    print(f"📝 Next: Run Cell 2 to initialize the PyNucleus system")
    
except Exception as e:
    print(f"❌ Analysis Error: {e}")
    print("\n💡 Troubleshooting:")
    print("   • Ensure you're in the PyNucleus-Model directory")
    print("   • Check that the data directory structure exists")
    print("   • Try running this cell again after initializing the system")


In [None]:
# Cell 2: System Initialization & Validation
# ==========================================
# This cell sets up PyNucleus Clean and validates all components

import sys
from pathlib import Path
from datetime import datetime

print("🔧 Initializing PyNucleus Clean Architecture...")
print(f"📅 Session started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Add src to Python path
src_path = str(Path().resolve() / "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

try:
    # Import PyNucleus Clean components
    from pynucleus.settings import settings
    from pynucleus.utils.logger import logger
    from pynucleus.rag.collector import ingest
    from pynucleus.rag.engine import ask, retrieve
    from pynucleus.llm.qwen_loader import generate
    
    print("✅ PyNucleus Clean modules imported successfully")
    
    # Validate configuration
    print(f"📋 Configuration:")
    print(f"   • ChromaDB Path: {settings.CHROMA_PATH}")
    print(f"   • Model: {settings.MODEL_ID}")
    print(f"   • Embedding Model: {settings.EMB_MODEL}")
    print(f"   • Max Tokens: {settings.MAX_TOKENS}")
    print(f"   • Retrieve Top-K: {settings.RETRIEVE_TOP_K}")
    print(f"   • Log Level: {settings.LOG_LEVEL}")
    print(f"   • Use CUDA: {settings.USE_CUDA}")
    
    # Test logging
    logger.info("PyNucleus Clean initialization successful")
    
    print("✅ Configuration validated")
    print("✅ Logging system active")
    
    print("\n📋 System Components Ready:")
    print("   • 📚 ChromaDB Vector Store - Modern document indexing")
    print("   • 🤖 Qwen Model - Efficient quantized AI generation")
    print("   • 📊 Document Ingestion - Text processing and embedding")
    print("   • 💡 RAG Engine - Retrieval-augmented generation")
    print("   • 🔍 Golden Dataset Evaluation - Validation and testing")
    
    # Check for existing vector database
    chroma_path = Path(settings.CHROMA_PATH)
    if chroma_path.exists():
        print(f"\n📁 Vector Database: {settings.CHROMA_PATH} (✅ Exists)")
        print("🎯 System ready! You can skip to Cell 4 if documents are already ingested.")
    else:
        print(f"\n📁 Vector Database: {settings.CHROMA_PATH} (❌ Not Found)")
        print("🎯 System ready! Execute Cell 3 to ingest documents.")
    
    # Store initialization status for other cells
    globals()['system_initialized'] = True
    
except ImportError as e:
    print(f"❌ Import Error: {e}")
    print("\n💡 Troubleshooting:")
    print("   • Ensure you're in the PyNucleus-Model directory")
    print("   • Try: pip install -e .")
    print("   • Check dependencies: pip install tiktoken sentence-transformers chromadb")
    print("   • Try restarting the kernel")
except Exception as e:
    print(f"❌ Initialization Error: {e}")
    print("\n💡 Troubleshooting:")
    print("   • Check your Python environment setup")
    print("   • Verify all required directories exist")
    print("   • For advanced diagnostics, see Developer_Notebook_Clean.ipynb")


In [None]:
# Cell 3: Document Ingestion
# ===========================
# This cell processes documents into the ChromaDB vector store

print("📚 Starting Document Ingestion...")
print("\n📊 Processing Pipeline:")
print("   1. 📁 Scan source directory for text files")
print("   2. 🔤 Extract and clean text content")
print("   3. 🧮 Generate embeddings using SentenceTransformers")
print("   4. 💾 Store in ChromaDB with persistent storage")

# Check if system is initialized
if 'system_initialized' not in globals():
    print("\n⚠️ Please run Cell 2 (System Initialization) first.")
else:
    print("\n⏳ Please wait... Document processing may take 30-60 seconds.")
    
    try:
        start_time = datetime.now()
        
        # Run document ingestion
        logger.info("Starting document ingestion process")
        
        # Check for documents in source directory
        source_dirs = ["data/01_raw/source_documents", "data/01_raw"]
        docs_found = False
        
        for source_dir in source_dirs:
            if Path(source_dir).exists():
                files = list(Path(source_dir).glob("*.txt"))
                if files:
                    print(f"   📄 Found {len(files)} .txt files in {source_dir}")
                    ingest(source_dir=source_dir)
                    docs_found = True
                    break
        
        if not docs_found:
            print("   ⚠️ No .txt files found in source directories")
            print("   💡 Please add documents to data/01_raw/source_documents/")
            raise FileNotFoundError("No source documents found")
        
        end_time = datetime.now()
        duration = (end_time - start_time).total_seconds()
        
        print(f"\n🎉 Document ingestion completed in {duration:.1f} seconds!")
        
        # Test the vector store
        test_docs = retrieve("chemical engineering")
        doc_count = len(test_docs) if test_docs else 0
        
        print(f"\n📊 Ingestion Results:")
        print(f"   • Vector Database: {settings.CHROMA_PATH}")
        print(f"   • Collection: docs") 
        print(f"   • Test Query Results: {doc_count} documents retrieved")
        print(f"   • Processing Time: {duration:.1f} seconds")
        
        if doc_count > 0:
            print(f"\n📋 Sample Retrieved Content:")
            sample_doc = test_docs[0][:200] + "..." if len(test_docs[0]) > 200 else test_docs[0]
            print(f"   '{sample_doc}'")
        
        print(f"\n✅ Document ingestion complete! Run Cell 4 to start asking questions.")
        
        # Store status for next cells
        globals()['ingestion_completed'] = True
        
    except Exception as e:
        print(f"❌ Ingestion Error: {e}")
        print("\n💡 Troubleshooting:")
        print("   • Ensure Cell 2 completed successfully")
        print("   • Check that data/01_raw/ contains .txt files")
        print("   • Verify sufficient disk space for vector database")
        print("   • Try restarting the kernel and re-running Cell 2")
        
        import traceback
        print(f"\n🔧 Technical details (for developers):")
        print(f"   Error type: {type(e).__name__}")
        # Only show first few lines of traceback
        tb_lines = traceback.format_exc().split('\n')[:5]
        for line in tb_lines:
            if line.strip():
                print(f"   {line}")


In [None]:
# Cell 4: Interactive Q&A System
# ================================
# This cell demonstrates the RAG system with sample questions

print("🚀 PyNucleus Clean Q&A System")
print("=" * 50)

# Check if ingestion was completed
if 'ingestion_completed' not in globals() and 'system_initialized' not in globals():
    print("⚠️ Please run Cell 2 (System Initialization) and Cell 3 (Document Ingestion) first.")
elif 'ingestion_completed' not in globals():
    print("⚠️ Please run Cell 3 (Document Ingestion) first, or check if documents are already loaded.")
    # Try to test if retrieval works anyway
    try:
        test_retrieve = retrieve("test", top_k=1)
        if test_retrieve:
            print("✅ Vector database appears to be loaded. Proceeding with Q&A...")
            globals()['ingestion_completed'] = True
        else:
            print("❌ No documents found in vector database.")
    except:
        print("❌ Cannot access vector database.")

if 'ingestion_completed' in globals() or 'system_initialized' in globals():
    print("🎯 Ask questions about chemical engineering!")
    print("\n📋 Sample Questions to Try:")
    sample_questions = [
        "What are the advantages of modular chemical plants?",
        "How does distillation work in chemical processes?",
        "What factors affect reactor conversion efficiency?",
        "Why do modular plants reduce costs?",
        "What are the key principles of process safety?",
        "How do heat exchangers improve energy efficiency?"
    ]
    
    for i, question in enumerate(sample_questions, 1):
        print(f"   {i}. {question}")
    
    print(f"\n🚀 Testing with sample questions...")
    
    # Test with a few sample questions
    test_questions = [
        "What are the advantages of modular chemical plants?",
        "How does distillation work?",
        "Why do modular plants reduce costs?"
    ]
    
    for i, question in enumerate(test_questions, 1):
        print(f"\n🔍 Question {i}: {question}")
        
        try:
            start_time = datetime.now()
            result = ask(question)
            duration = (datetime.now() - start_time).total_seconds()
            
            answer = result.get("answer", "No answer generated")
            sources = result.get("sources", [])
            
            print(f"⏱️ Response time: {duration:.2f} seconds")
            print(f"📝 Answer: {answer[:300]}{'...' if len(answer) > 300 else ''}")
            print(f"📚 Sources: {len(sources)} documents used")
            
            if sources:
                print(f"🔗 Source preview: '{sources[0][:100]}...'" if len(sources[0]) > 100 else f"🔗 Source preview: '{sources[0]}'")
            
        except Exception as e:
            print(f"❌ Error: {e}")
    
    print(f"\n" + "=" * 50)
    print("💡 To ask custom questions, use:")
    print("   result = ask('Your question here')")
    print("   print(result['answer'])")
    print("   print(result['sources'])")
    
    print(f"\n🎯 System Status:")
    print(f"   • Vector Database: ✅ Ready")
    print(f"   • AI Model: ✅ Loaded")
    print(f"   • Q&A System: ✅ Active")
    
    print(f"\n✅ Interactive Q&A ready! Run Cell 5 to view detailed results.")


In [None]:
# Cell 5: System Status & Results Dashboard
# ==========================================
# This cell shows comprehensive system status and usage examples

print("📊 PyNucleus Clean Results Dashboard")
print("=" * 50)

try:
    # System health check
    print("🔍 System Health Check:")
    
    # Check vector database
    try:
        test_retrieval = retrieve("test", top_k=1)
        db_status = "✅ Ready" if test_retrieval is not None else "⚠️ Empty"
        print(f"   • ChromaDB Vector Store: {db_status}")
        
        if test_retrieval:
            # Get some statistics
            sample_retrieval = retrieve("chemical engineering", top_k=10)
            print(f"   • Sample Retrieval: {len(sample_retrieval)} documents found")
            
    except Exception as e:
        print(f"   • ChromaDB Vector Store: ❌ Error - {e}")
    
    # Check AI model
    try:
        test_generation = generate("Test prompt", max_tokens=10)
        model_status = "✅ Ready" if test_generation else "❌ Error"
        print(f"   • Qwen AI Model: {model_status}")
    except Exception as e:
        print(f"   • Qwen AI Model: ❌ Error - {e}")
    
    # Configuration status
    print(f"\n⚙️ Configuration:")
    print(f"   • Model: {settings.MODEL_ID}")
    print(f"   • Vector DB: {settings.CHROMA_PATH}")
    print(f"   • Embedding Model: {settings.EMB_MODEL}")
    print(f"   • Max Tokens: {settings.MAX_TOKENS}")
    print(f"   • Top-K Retrieval: {settings.RETRIEVE_TOP_K}")
    
    # Database statistics
    chroma_path = Path(settings.CHROMA_PATH)
    if chroma_path.exists():
        db_files = list(chroma_path.rglob("*"))
        total_size = sum(f.stat().st_size for f in db_files if f.is_file())
        print(f"\n📊 Database Statistics:")
        print(f"   • Database Size: {total_size / 1024 / 1024:.2f} MB")
        print(f"   • Database Files: {len([f for f in db_files if f.is_file()])} files")
    
    # Usage examples
    print(f"\n💡 Usage Examples:")
    print(f"")
    print(f"📝 Basic Question:")
    print(f"   result = ask('What is distillation?')")
    print(f"   print(result['answer'])")
    print(f"")
    print(f"🔍 Document Retrieval:")
    print(f"   docs = retrieve('modular plants', top_k=3)")
    print(f"   for doc in docs:")
    print(f"       print(doc[:100])")
    print(f"")
    print(f"🤖 Direct Generation:")
    print(f"   response = generate('Explain chemical processes', max_tokens=100)")
    print(f"   print(response)")
    
    # CLI usage
    print(f"\n🔧 Command Line Interface:")
    print(f"   # Ingest documents")
    print(f"   pynucleus ingest_docs --source-dir data/01_raw")
    print(f"")
    print(f"   # Ask questions")
    print(f"   pynucleus ask 'What are the benefits of modular plants?'")
    print(f"")
    print(f"   # Run evaluation")
    print(f"   pynucleus eval_golden")
    
    # Performance metrics
    print(f"\n📈 Performance Tips:")
    print(f"   • First question may be slower (model loading)")
    print(f"   • Subsequent questions are faster (~1-2 seconds)")
    print(f"   • Use specific technical terms for better retrieval")
    print(f"   • ChromaDB persists between sessions")
    print(f"   • Optimal chunk size: 512-1024 tokens")
    
    # Next steps
    print(f"\n🚀 Next Steps:")
    print(f"   • 📚 Add more documents to data/01_raw/source_documents/")
    print(f"   • 🔄 Re-run Cell 3 to update vector database")
    print(f"   • 🔧 Use Developer_Notebook_Clean.ipynb for advanced features")
    print(f"   • ⚙️ Modify settings.py for custom configuration")
    print(f"   • 🔍 Run Cell 6 for golden dataset evaluation")
    
    print(f"\n🎯 System Summary:")
    if 'ingestion_completed' in globals():
        print(f"   ✅ Documents processed and indexed")
        print(f"   ✅ Q&A system ready")
        print(f"   ✅ All components functional")
    else:
        print(f"   ⚠️ Run Cell 3 to complete document ingestion")
    
except Exception as e:
    print(f"❌ Dashboard Error: {e}")
    print("\n💡 Troubleshooting:")
    print("   • Ensure all previous cells completed successfully")
    print("   • Check system configuration and dependencies")
    print("   • Try restarting the kernel and re-running all cells")

print(f"\n✅ PyNucleus Clean system analysis complete!")


In [None]:
# Cell 6: Golden Dataset Evaluation
# ==================================
# This cell runs validation tests using the golden dataset

print("🔍 Golden Dataset Evaluation")
print("=" * 50)

# Check if system is ready
if 'system_initialized' not in globals():
    print("⚠️ Please run Cell 2 (System Initialization) first.")
else:
    try:
        from pynucleus.eval.golden_eval import run_eval
        
        print("🎯 Running Golden Dataset Evaluation...")
        print("\n📋 Evaluation Process:")
        print("   1. 📄 Load golden dataset from data/validation/golden_dataset.csv")
        print("   2. 🔍 Query each question through the RAG system")
        print("   3. 🎯 Check if expected keywords appear in responses")
        print("   4. 📊 Calculate overall accuracy score")
        
        # Check if golden dataset exists
        golden_path = Path("data/validation/golden_dataset.csv")
        if not golden_path.exists():
            print(f"\n❌ Golden dataset not found at {golden_path}")
            print("💡 Please ensure the golden dataset file exists")
        else:
            # Count questions in dataset
            import pandas as pd
            df = pd.read_csv(golden_path)
            question_count = len(df)
            
            print(f"\n📊 Dataset Info:")
            print(f"   • Questions: {question_count}")
            print(f"   • Domains: {len(df['domain'].unique())} unique domains")
            print(f"   • Difficulty levels: {list(df['difficulty'].unique())}")
            
            print(f"\n⏳ Running evaluation... This may take 1-2 minutes.")
            
            start_time = datetime.now()
            
            # Run evaluation with different thresholds
            thresholds = [0.6, 0.7, 0.8]
            results = {}
            
            for threshold in thresholds:
                success = run_eval(threshold=threshold)
                results[threshold] = success
                print(f"   📊 Threshold {threshold:.0%}: {'✅ PASSED' if success else '❌ FAILED'}")
            
            end_time = datetime.now()
            duration = (end_time - start_time).total_seconds()
            
            print(f"\n🎉 Evaluation completed in {duration:.1f} seconds!")
            
            # Summary
            print(f"\n📊 Evaluation Summary:")
            print(f"   • Total Questions: {question_count}")
            print(f"   • Evaluation Time: {duration:.1f} seconds")
            print(f"   • Average Time per Question: {duration/question_count:.1f} seconds")
            
            # Recommendations based on results
            if results.get(0.8, False):
                print(f"\n🎯 Performance Assessment: EXCELLENT")
                print(f"   ✅ System exceeds 80% accuracy threshold")
                print(f"   ✅ Ready for production use")
            elif results.get(0.7, False):
                print(f"\n🎯 Performance Assessment: GOOD")
                print(f"   ✅ System meets 70% accuracy threshold")
                print(f"   💡 Consider adding more domain-specific documents")
            elif results.get(0.6, False):
                print(f"\n🎯 Performance Assessment: ACCEPTABLE")
                print(f"   ⚠️ System meets 60% accuracy threshold")
                print(f"   💡 Recommendations:")
                print(f"      • Add more comprehensive source documents")
                print(f"      • Review chunking strategy")
                print(f"      • Consider fine-tuning retrieval parameters")
            else:
                print(f"\n🎯 Performance Assessment: NEEDS IMPROVEMENT")
                print(f"   ❌ System below 60% accuracy threshold")
                print(f"   💡 Action Items:")
                print(f"      • Review and expand document collection")
                print(f"      • Verify document quality and relevance")
                print(f"      • Check embedding model configuration")
                print(f"      • Consider adjusting chunk size and overlap")
            
            print(f"\n🔧 Advanced Analysis:")
            print(f"   • For detailed per-question analysis, see Developer_Notebook_Clean.ipynb")
            print(f"   • For system diagnostics, run: python scripts/system_validator.py")
            print(f"   • For comprehensive testing, run: python scripts/comprehensive_system_diagnostic.py")
            
    except ImportError as e:
        print(f"❌ Import Error: {e}")
        print("💡 Ensure golden evaluation module is available")
    except Exception as e:
        print(f"❌ Evaluation Error: {e}")
        print("\n💡 Troubleshooting:")
        print("   • Ensure documents are ingested (run Cell 3)")
        print("   • Check that golden dataset exists")
        print("   • Verify system is properly initialized")

print(f"\n✅ Golden dataset evaluation complete!")
print(f"🎯 PyNucleus Clean system fully validated!")


In [7]:
# ========================================
# VERSION CONTROL (Optional - For Maintainers Only)
# ========================================
# Uncomment the lines below if you need to update the repository:

from datetime import datetime

# Log end time
with open("update_log.txt", "a") as f:
    f.write(f"\n {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} changes made and pushed to origin main\n")

# Simple GitHub update function
def update_github():
    print(" Starting GitHub update...")
    !git add .
    print(" Files added to staging")
    !git commit -m "Update: $(date +'%Y-%m-%d %H:%M:%S')"
    print(" Changes committed")
    !git push origin main
    print(" Changes pushed to GitHub successfully!")

# To use it, just run:
update_github()

 Starting GitHub update...
 Files added to staging
[main 449ba67] Update: 2025-06-19 17:54:05
 14 files changed, 480 insertions(+), 75 deletions(-)
 create mode 100644 Fixes.txt
 create mode 100644 logs/pynucleus_20250619_173708.log
 create mode 100644 logs/pynucleus_20250619_174101.log
 create mode 100644 logs/pynucleus_20250619_174244.log
 create mode 100644 logs/pynucleus_20250619_175156.log
 create mode 100644 logs/pynucleus_20250619_175157.log
 Changes committed
Enumerating objects: 162, done.
Counting objects: 100% (162/162), done.
Delta compression using up to 8 threads
Compressing objects: 100% (123/123), done.
Writing objects: 100% (129/129), 376.41 MiB | 9.39 MiB/s, done.
Total 129 (delta 59), reused 0 (delta 0), pack-reused 0 (from 0)
remote: Resolving deltas: 100% (59/59), completed with 17 local objects.[K
remote: [1;31merror[m: Trace: 6ebc5f3e8d1edc93688a01e2203d0e4dcb0ede607c5c5687136a3b2e9610ddab[K
remote: [1;31merror[m: See https://gh.io/lfs for more information.

python run_pipeline.py chat
