In [None]:
# Cell 1: Advanced System Diagnostics & Health Checks
# =====================================================
# This cell provides comprehensive system analysis for developers

import sys
import os
import time
import json
import psutil
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any

print("🔍 PyNucleus Advanced System Diagnostics")
print("=" * 60)
print(f"📅 Diagnostic session started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Add src to Python path
src_path = str(Path().resolve() / "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

class SystemDiagnostics:
    """Advanced system diagnostics and monitoring"""
    
    def __init__(self):
        self.start_time = time.time()
        self.diagnostics = {}
        
    def check_system_resources(self) -> Dict[str, Any]:
        """Check system resource usage"""
        print("\n🖥️ System Resource Analysis:")
        
        # CPU usage
        cpu_percent = psutil.cpu_percent(interval=1)
        cpu_count = psutil.cpu_count()
        
        # Memory usage
        memory = psutil.virtual_memory()
        memory_gb = memory.total / (1024**3)
        memory_used_percent = memory.percent
        
        # Disk usage
        disk = psutil.disk_usage('/')
        disk_gb = disk.total / (1024**3)
        disk_used_percent = (disk.used / disk.total) * 100
        
        resources = {
            'cpu_percent': cpu_percent,
            'cpu_count': cpu_count,
            'memory_total_gb': memory_gb,
            'memory_used_percent': memory_used_percent,
            'disk_total_gb': disk_gb,
            'disk_used_percent': disk_used_percent
        }
        
        print(f"   • CPU Usage: {cpu_percent:.1f}% ({cpu_count} cores)")
        print(f"   • Memory: {memory_used_percent:.1f}% used of {memory_gb:.1f} GB")
        print(f"   • Disk: {disk_used_percent:.1f}% used of {disk_gb:.1f} GB")
        
        return resources
    
    def check_python_environment(self) -> Dict[str, Any]:
        """Check Python environment and dependencies"""
        print("\n🐍 Python Environment Analysis:")
        
        # Python version
        python_version = sys.version
        python_executable = sys.executable
        
        # Check key dependencies
        dependencies = {}
        required_packages = [
            'torch', 'transformers', 'sentence_transformers', 
            'chromadb', 'pydantic', 'loguru', 'tiktoken'
        ]
        
        for package in required_packages:
            try:
                module = __import__(package)
                version = getattr(module, '__version__', 'unknown')
                dependencies[package] = {'installed': True, 'version': version}
                print(f"   ✅ {package}: {version}")
            except ImportError:
                dependencies[package] = {'installed': False, 'version': None}
                print(f"   ❌ {package}: Not installed")
        
        env_info = {
            'python_version': python_version.split()[0],
            'python_executable': python_executable,
            'dependencies': dependencies
        }
        
        return env_info
    
    def check_pynucleus_components(self) -> Dict[str, Any]:
        """Check PyNucleus specific components"""
        print("\n🧪 PyNucleus Component Analysis:")
        
        components = {}
        
        try:
            from pynucleus.settings import settings
            components['settings'] = {'loaded': True, 'config': dict(settings)}
            print("   ✅ Settings: Loaded successfully")
            
            # Check configuration values
            critical_settings = ['CHROMA_PATH', 'MODEL_ID', 'EMB_MODEL']
            for setting in critical_settings:
                value = getattr(settings, setting, None)
                print(f"      • {setting}: {value}")
                
        except Exception as e:
            components['settings'] = {'loaded': False, 'error': str(e)}
            print(f"   ❌ Settings: Error - {e}")
        
        # Check other components
        component_modules = {
            'logger': 'pynucleus.utils.logger',
            'rag_engine': 'pynucleus.rag.engine',
            'rag_collector': 'pynucleus.rag.collector',
            'qwen_loader': 'pynucleus.llm.qwen_loader',
            'golden_eval': 'pynucleus.eval.golden_eval'
        }
        
        for name, module_path in component_modules.items():
            try:
                module = __import__(module_path, fromlist=[''])
                components[name] = {'loaded': True, 'module_path': module_path}
                print(f"   ✅ {name}: Available")
            except Exception as e:
                components[name] = {'loaded': False, 'error': str(e)}
                print(f"   ❌ {name}: Error - {e}")
        
        return components
    
    def check_data_structure(self) -> Dict[str, Any]:
        """Analyze data directory structure and contents"""
        print("\n📁 Data Structure Deep Analysis:")
        
        data_analysis = {}
        
        # Define expected directory structure
        data_structure = {
            'data/01_raw/source_documents': {'type': 'dir', 'description': 'Source documents'},
            'data/01_raw/web_sources': {'type': 'dir', 'description': 'Web content'},
            'data/03_intermediate/converted_chunked_data': {'type': 'dir', 'description': 'Processed chunks'},
            'data/03_intermediate/vector_db': {'type': 'dir', 'description': 'ChromaDB storage'},
            'data/04_models/chunk_reports': {'type': 'dir', 'description': 'Chunking analysis'},
            'data/04_models/recall_evaluation': {'type': 'dir', 'description': 'Recall metrics'},
            'data/validation/golden_dataset.csv': {'type': 'file', 'description': 'Golden dataset'},
            'data/validation/results': {'type': 'dir', 'description': 'Validation results'}
        }
        
        for path, info in data_structure.items():
            path_obj = Path(path)
            analysis = {
                'exists': path_obj.exists(),
                'type': info['type'],
                'description': info['description']
            }
            
            if path_obj.exists():
                if path_obj.is_dir():
                    files = list(path_obj.iterdir())
                    analysis['file_count'] = len([f for f in files if f.is_file()])
                    analysis['dir_count'] = len([d for d in files if d.is_dir()])
                    analysis['total_size_mb'] = sum(
                        f.stat().st_size for f in path_obj.rglob('*') if f.is_file()
                    ) / (1024 * 1024)
                    
                    status = f"✅ {analysis['file_count']} files, {analysis['dir_count']} dirs, {analysis['total_size_mb']:.1f} MB"
                else:
                    analysis['size_mb'] = path_obj.stat().st_size / (1024 * 1024)
                    status = f"✅ File exists, {analysis['size_mb']:.1f} MB"
            else:
                status = "❌ Missing"
            
            print(f"   {status} - {path}")
            data_analysis[path] = analysis
        
        return data_analysis
    
    def run_comprehensive_diagnostic(self) -> Dict[str, Any]:
        """Run all diagnostic checks"""
        print("🚀 Running Comprehensive System Diagnostic...")
        
        try:
            self.diagnostics['resources'] = self.check_system_resources()
            self.diagnostics['python_env'] = self.check_python_environment()
            self.diagnostics['pynucleus'] = self.check_pynucleus_components()
            self.diagnostics['data_structure'] = self.check_data_structure()
            
            # Performance summary
            end_time = time.time()
            duration = end_time - self.start_time
            
            print(f"\n📊 Diagnostic Summary:")
            print(f"   • Total diagnostic time: {duration:.2f} seconds")
            print(f"   • System status: {'✅ HEALTHY' if self._is_system_healthy() else '⚠️ ISSUES DETECTED'}")
            
            self.diagnostics['summary'] = {
                'duration_seconds': duration,
                'healthy': self._is_system_healthy(),
                'timestamp': datetime.now().isoformat()
            }
            
            return self.diagnostics
            
        except Exception as e:
            print(f"❌ Diagnostic error: {e}")
            return {'error': str(e)}
    
    def _is_system_healthy(self) -> bool:
        """Determine if system is healthy based on checks"""
        try:
            # Check if core components loaded
            pynucleus_healthy = self.diagnostics.get('pynucleus', {}).get('settings', {}).get('loaded', False)
            
            # Check if essential directories exist
            data_healthy = any(
                self.diagnostics.get('data_structure', {}).get(path, {}).get('exists', False)
                for path in ['data/01_raw/source_documents', 'data/03_intermediate/vector_db']
            )
            
            # Check resource usage isn't critical
            resources = self.diagnostics.get('resources', {})
            resource_healthy = (
                resources.get('memory_used_percent', 0) < 90 and
                resources.get('disk_used_percent', 0) < 95
            )
            
            return pynucleus_healthy and resource_healthy
            
        except:
            return False

# Run comprehensive diagnostics
diagnostics = SystemDiagnostics()
diagnostic_results = diagnostics.run_comprehensive_diagnostic()

print(f"\n🎯 Next Steps:")
print("   • 📊 Run Cell 2 for configuration management")
print("   • 🧪 Run Cell 3 for advanced testing")
print("   • 🔧 Check logs for detailed error information")

# Store results for other cells
globals()['diagnostic_results'] = diagnostic_results
globals()['system_diagnostics'] = diagnostics


In [None]:
# Cell 2: Advanced Configuration Management & Environment Setup
# ==============================================================
# This cell provides dynamic configuration management for developers

import os
import json
import yaml
from typing import Dict, Any, Optional
from pathlib import Path
from datetime import datetime

print("⚙️ Advanced Configuration Management")
print("=" * 50)

class ConfigurationManager:
    """Advanced configuration management and validation"""
    
    def __init__(self):
        self.config_history = []
        self.backup_configs = {}
        
    def analyze_current_settings(self) -> Dict[str, Any]:
        """Analyze current PyNucleus settings"""
        print("\n📋 Current Configuration Analysis:")
        
        try:
            from pynucleus.settings import settings
            
            config_analysis = {}
            
            # Core settings
            core_settings = {
                'CHROMA_PATH': settings.CHROMA_PATH,
                'MODEL_ID': settings.MODEL_ID,
                'EMB_MODEL': settings.EMB_MODEL,
                'MAX_TOKENS': settings.MAX_TOKENS,
                'RETRIEVE_TOP_K': settings.RETRIEVE_TOP_K,
                'USE_CUDA': settings.USE_CUDA,
                'LOG_LEVEL': settings.LOG_LEVEL
            }
            
            for key, value in core_settings.items():
                print(f"   • {key}: {value} ({type(value).__name__})")
                config_analysis[key] = {'value': value, 'type': type(value).__name__}
            
            # Validate paths
            path_validation = self._validate_paths(settings)
            config_analysis['path_validation'] = path_validation
            
            return config_analysis
            
        except Exception as e:
            print(f"   ❌ Error analyzing settings: {e}")
            return {'error': str(e)}
    
    def _validate_paths(self, settings) -> Dict[str, bool]:
        """Validate file and directory paths in settings"""
        print("\n🔍 Path Validation:")
        
        paths_to_check = {
            'CHROMA_PATH': settings.CHROMA_PATH,
        }
        
        validation_results = {}
        
        for name, path in paths_to_check.items():
            path_obj = Path(path)
            exists = path_obj.exists()
            validation_results[name] = exists
            status = "✅" if exists else "❌"
            print(f"   {status} {name}: {path}")
        
        return validation_results
    
    def create_environment_configs(self) -> Dict[str, str]:
        """Create environment-specific configuration files"""
        print("\n🏗️ Creating Environment Configurations:")
        
        configs = {}
        
        # Development configuration
        dev_config = {
            'MODEL_ID': 'Qwen/Qwen1.5-0.5B-Chat',
            'EMB_MODEL': 'all-MiniLM-L6-v2',
            'MAX_TOKENS': 256,
            'RETRIEVE_TOP_K': 4,
            'USE_CUDA': False,
            'LOG_LEVEL': 'DEBUG',
            'CHROMA_PATH': 'data/03_intermediate/vector_db_dev'
        }
        
        # Production configuration
        prod_config = {
            'MODEL_ID': 'Qwen/Qwen1.5-0.5B-Chat',
            'EMB_MODEL': 'all-mpnet-base-v2',
            'MAX_TOKENS': 512,
            'RETRIEVE_TOP_K': 6,
            'USE_CUDA': True,
            'LOG_LEVEL': 'INFO',
            'CHROMA_PATH': 'data/03_intermediate/vector_db_prod'
        }
        
        # Testing configuration
        test_config = {
            'MODEL_ID': 'Qwen/Qwen1.5-0.5B-Chat',
            'EMB_MODEL': 'all-MiniLM-L6-v2',
            'MAX_TOKENS': 128,
            'RETRIEVE_TOP_K': 2,
            'USE_CUDA': False,
            'LOG_LEVEL': 'WARNING',
            'CHROMA_PATH': 'data/03_intermediate/vector_db_test'
        }
        
        config_files = {
            'development': dev_config,
            'production': prod_config,
            'testing': test_config
        }
        
        # Save configuration files
        configs_dir = Path('configs')
        configs_dir.mkdir(exist_ok=True)
        
        for env_name, config in config_files.items():
            config_file = configs_dir / f'{env_name}_config.json'
            
            with open(config_file, 'w') as f:
                json.dump(config, f, indent=2)
            
            configs[env_name] = str(config_file)
            print(f"   ✅ Created {env_name} config: {config_file}")
        
        return configs
    
    def validate_model_availability(self) -> Dict[str, bool]:
        """Check availability of different models"""
        print("\n🤖 Model Availability Check:")
        
        models_to_check = [
            'Qwen/Qwen1.5-0.5B-Chat',
            'Qwen/Qwen1.5-1.8B-Chat',
            'microsoft/DialoGPT-medium'
        ]
        
        model_status = {}
        
        for model_id in models_to_check:
            try:
                from transformers import AutoTokenizer
                tokenizer = AutoTokenizer.from_pretrained(model_id)
                model_status[model_id] = True
                print(f"   ✅ {model_id}: Available")
            except Exception as e:
                model_status[model_id] = False
                print(f"   ❌ {model_id}: Error - {str(e)[:50]}...")
        
        return model_status
    
    def check_embedding_models(self) -> Dict[str, Dict[str, Any]]:
        """Check embedding model performance and availability"""
        print("\n🧮 Embedding Model Analysis:")
        
        embedding_models = [
            'all-MiniLM-L6-v2',
            'all-mpnet-base-v2',
            'multi-qa-MiniLM-L6-cos-v1'
        ]
        
        model_analysis = {}
        
        for model_name in embedding_models:
            try:
                from sentence_transformers import SentenceTransformer
                
                start_time = time.time()
                model = SentenceTransformer(model_name)
                
                # Test embedding
                test_text = "Chemical engineering process optimization"
                embedding = model.encode([test_text])
                
                load_time = time.time() - start_time
                
                analysis = {
                    'available': True,
                    'load_time_seconds': load_time,
                    'embedding_dimension': embedding.shape[1],
                    'model_size_mb': sum(
                        p.numel() * p.element_size() 
                        for p in model.parameters()
                    ) / (1024 * 1024)
                }
                
                model_analysis[model_name] = analysis
                print(f"   ✅ {model_name}:")
                print(f"      • Load time: {load_time:.2f}s")
                print(f"      • Dimensions: {analysis['embedding_dimension']}")
                print(f"      • Size: {analysis['model_size_mb']:.1f} MB")
                
            except Exception as e:
                model_analysis[model_name] = {
                    'available': False,
                    'error': str(e)
                }
                print(f"   ❌ {model_name}: Error - {str(e)[:50]}...")
        
        return model_analysis
    
    def export_diagnostic_report(self, diagnostic_data: Dict[str, Any]) -> str:
        """Export comprehensive diagnostic report"""
        print("\n📄 Exporting Diagnostic Report:")
        
        report = {
            'timestamp': datetime.now().isoformat(),
            'system_info': diagnostic_data,
            'configuration': self.analyze_current_settings(),
            'model_availability': self.validate_model_availability(),
            'embedding_analysis': self.check_embedding_models()
        }
        
        report_file = f"diagnostic_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        
        with open(report_file, 'w') as f:
            json.dump(report, f, indent=2, default=str)
        
        print(f"   ✅ Report exported: {report_file}")
        print(f"   📊 Report size: {Path(report_file).stat().st_size / 1024:.1f} KB")
        
        return report_file

# Run configuration management
if 'diagnostic_results' in globals():
    config_manager = ConfigurationManager()
    
    # Analyze current configuration
    current_config = config_manager.analyze_current_settings()
    
    # Create environment configs
    env_configs = config_manager.create_environment_configs()
    
    # Check model availability
    model_status = config_manager.validate_model_availability()
    
    # Analyze embedding models
    embedding_analysis = config_manager.check_embedding_models()
    
    # Export comprehensive report
    if diagnostic_results:
        report_file = config_manager.export_diagnostic_report(diagnostic_results)
    
    print(f"\n🎯 Configuration Management Complete!")
    print(f"   • Environment configs created: {len(env_configs)}")
    print(f"   • Models checked: {len(model_status)}")
    print(f"   • Embedding models analyzed: {len(embedding_analysis)}")
    
    # Store for next cells
    globals()['config_manager'] = config_manager
    globals()['current_config'] = current_config
    globals()['model_status'] = model_status
    
else:
    print("⚠️ Please run Cell 1 (System Diagnostics) first to get diagnostic data.")


In [None]:
# Cell 3: Advanced Testing & Evaluation Suite
# =============================================
# This cell provides comprehensive testing and evaluation capabilities

import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Any
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

print("🧪 Advanced Testing & Evaluation Suite")
print("=" * 50)

class AdvancedEvaluator:
    """Comprehensive evaluation and testing framework"""
    
    def __init__(self):
        self.test_results = {}
        self.evaluation_history = []
        
    def load_golden_dataset(self) -> pd.DataFrame:
        """Load and analyze golden dataset"""
        print("\n📄 Golden Dataset Analysis:")
        
        try:
            dataset_path = Path("data/validation/golden_dataset.csv")
            if not dataset_path.exists():
                print(f"   ❌ Golden dataset not found at {dataset_path}")
                return pd.DataFrame()
            
            df = pd.read_csv(dataset_path)
            
            print(f"   ✅ Dataset loaded: {len(df)} questions")
            print(f"   📊 Domains: {list(df['domain'].unique())}")
            print(f"   📊 Difficulties: {list(df['difficulty'].unique())}")
            
            # Analyze dataset distribution
            domain_counts = df['domain'].value_counts()
            difficulty_counts = df['difficulty'].value_counts()
            
            print(f"\n   📈 Domain Distribution:")
            for domain, count in domain_counts.items():
                print(f"      • {domain}: {count} questions ({count/len(df)*100:.1f}%)")
            
            print(f"\n   📈 Difficulty Distribution:")
            for difficulty, count in difficulty_counts.items():
                print(f"      • {difficulty}: {count} questions ({count/len(df)*100:.1f}%)")
            
            return df
            
        except Exception as e:
            print(f"   ❌ Error loading dataset: {e}")
            return pd.DataFrame()
    
    def run_detailed_evaluation(self, df: pd.DataFrame) -> Dict[str, Any]:
        """Run detailed evaluation with per-question analysis"""
        print("\n🔍 Running Detailed Evaluation:")
        
        if df.empty:
            print("   ⚠️ No dataset available for evaluation")
            return {}
        
        try:
            from pynucleus.rag.engine import ask
            from pynucleus.eval.golden_eval import evaluate_answer
            
            results = []
            
            print(f"   ⏳ Evaluating {len(df)} questions...")
            
            for idx, row in df.iterrows():
                question = row['question']
                expected_keywords = row['expected_answer'].split(',') if pd.notna(row['expected_answer']) else []
                domain = row['domain']
                difficulty = row['difficulty']
                
                try:
                    # Get answer from system
                    start_time = time.time()
                    result = ask(question)
                    response_time = time.time() - start_time
                    
                    answer = result.get('answer', '')
                    sources = result.get('sources', [])
                    
                    # Evaluate answer quality
                    keyword_matches = sum(1 for keyword in expected_keywords 
                                        if keyword.lower().strip() in answer.lower())
                    keyword_score = keyword_matches / len(expected_keywords) if expected_keywords else 0
                    
                    # Additional metrics
                    answer_length = len(answer)
                    source_count = len(sources)
                    has_sources = source_count > 0
                    
                    question_result = {
                        'question_id': idx,
                        'question': question,
                        'domain': domain,
                        'difficulty': difficulty,
                        'answer': answer,
                        'answer_length': answer_length,
                        'keyword_score': keyword_score,
                        'keyword_matches': keyword_matches,
                        'total_keywords': len(expected_keywords),
                        'source_count': source_count,
                        'has_sources': has_sources,
                        'response_time': response_time,
                        'success': keyword_score >= 0.6
                    }
                    
                    results.append(question_result)
                    
                    # Progress indicator
                    if (idx + 1) % 5 == 0:
                        print(f"      • Completed {idx + 1}/{len(df)} questions...")
                
                except Exception as e:
                    print(f"      ❌ Error with question {idx}: {e}")
                    results.append({
                        'question_id': idx,
                        'question': question,
                        'domain': domain,
                        'difficulty': difficulty,
                        'error': str(e),
                        'success': False
                    })
            
            # Analyze results
            results_df = pd.DataFrame(results)
            analysis = self._analyze_evaluation_results(results_df)
            
            return {
                'results': results_df,
                'analysis': analysis,
                'timestamp': datetime.now().isoformat()
            }
            
        except Exception as e:
            print(f"   ❌ Evaluation error: {e}")
            return {'error': str(e)}
    
    def _analyze_evaluation_results(self, results_df: pd.DataFrame) -> Dict[str, Any]:
        """Analyze evaluation results in detail"""
        print("\n📊 Evaluation Results Analysis:")
        
        if results_df.empty:
            return {}
        
        # Overall metrics
        total_questions = len(results_df)
        successful_questions = results_df['success'].sum() if 'success' in results_df.columns else 0
        overall_accuracy = successful_questions / total_questions if total_questions > 0 else 0
        
        print(f"   📈 Overall Performance:")
        print(f"      • Accuracy: {overall_accuracy:.1%} ({successful_questions}/{total_questions})")
        
        # Performance by domain
        if 'domain' in results_df.columns:
            domain_performance = results_df.groupby('domain')['success'].agg(['count', 'sum', 'mean']).round(3)
            print(f"\n   📈 Performance by Domain:")
            for domain, stats in domain_performance.iterrows():
                accuracy = stats['mean']
                print(f"      • {domain}: {accuracy:.1%} ({stats['sum']:.0f}/{stats['count']:.0f})")
        
        # Performance by difficulty
        if 'difficulty' in results_df.columns:
            difficulty_performance = results_df.groupby('difficulty')['success'].agg(['count', 'sum', 'mean']).round(3)
            print(f"\n   📈 Performance by Difficulty:")
            for difficulty, stats in difficulty_performance.iterrows():
                accuracy = stats['mean']
                print(f"      • {difficulty}: {accuracy:.1%} ({stats['sum']:.0f}/{stats['count']:.0f})")
        
        # Response time analysis
        if 'response_time' in results_df.columns:
            avg_response_time = results_df['response_time'].mean()
            min_response_time = results_df['response_time'].min()
            max_response_time = results_df['response_time'].max()
            
            print(f"\n   ⏱️ Response Time Analysis:")
            print(f"      • Average: {avg_response_time:.2f}s")
            print(f"      • Min: {min_response_time:.2f}s")
            print(f"      • Max: {max_response_time:.2f}s")
        
        # Quality metrics
        if 'keyword_score' in results_df.columns:
            avg_keyword_score = results_df['keyword_score'].mean()
            print(f"\n   📝 Answer Quality:")
            print(f"      • Average keyword score: {avg_keyword_score:.1%}")
        
        analysis = {
            'overall_accuracy': overall_accuracy,
            'total_questions': total_questions,
            'successful_questions': successful_questions,
            'domain_performance': domain_performance.to_dict() if 'domain' in results_df.columns else {},
            'difficulty_performance': difficulty_performance.to_dict() if 'difficulty' in results_df.columns else {},
            'avg_response_time': avg_response_time if 'response_time' in results_df.columns else None,
            'avg_keyword_score': avg_keyword_score if 'keyword_score' in results_df.columns else None
        }
        
        return analysis
    
    def run_retrieval_quality_test(self) -> Dict[str, Any]:
        """Test retrieval quality and relevance"""
        print("\n🔍 Retrieval Quality Assessment:")
        
        try:
            from pynucleus.rag.engine import retrieve
            
            test_queries = [
                "modular chemical plants",
                "distillation process",
                "reactor design",
                "heat transfer",
                "process safety",
                "chemical engineering optimization"
            ]
            
            retrieval_results = []
            
            for query in test_queries:
                try:
                    start_time = time.time()
                    docs = retrieve(query, top_k=5)
                    retrieval_time = time.time() - start_time
                    
                    if docs:
                        avg_doc_length = np.mean([len(doc) for doc in docs])
                        doc_count = len(docs)
                        
                        # Simple relevance check (keyword overlap)
                        query_words = set(query.lower().split())
                        relevance_scores = []
                        
                        for doc in docs:
                            doc_words = set(doc.lower().split())
                            overlap = len(query_words.intersection(doc_words))
                            relevance = overlap / len(query_words) if query_words else 0
                            relevance_scores.append(relevance)
                        
                        avg_relevance = np.mean(relevance_scores)
                        
                        retrieval_results.append({
                            'query': query,
                            'doc_count': doc_count,
                            'avg_doc_length': avg_doc_length,
                            'avg_relevance': avg_relevance,
                            'retrieval_time': retrieval_time,
                            'success': True
                        })
                        
                        print(f"   ✅ '{query}': {doc_count} docs, {avg_relevance:.2f} relevance")
                    else:
                        retrieval_results.append({
                            'query': query,
                            'success': False,
                            'error': 'No documents retrieved'
                        })
                        print(f"   ❌ '{query}': No documents retrieved")
                        
                except Exception as e:
                    retrieval_results.append({
                        'query': query,
                        'success': False,
                        'error': str(e)
                    })
                    print(f"   ❌ '{query}': Error - {e}")
            
            return {
                'retrieval_results': retrieval_results,
                'timestamp': datetime.now().isoformat()
            }
            
        except Exception as e:
            print(f"   ❌ Retrieval test error: {e}")
            return {'error': str(e)}
    
    def save_evaluation_results(self, results: Dict[str, Any], filename: str = None) -> str:
        """Save evaluation results to file"""
        if not filename:
            filename = f"evaluation_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        
        filepath = Path("data/validation/results") / filename
        filepath.parent.mkdir(parents=True, exist_ok=True)
        
        # Convert DataFrames to dict for JSON serialization
        serializable_results = results.copy()
        if 'results' in serializable_results and hasattr(serializable_results['results'], 'to_dict'):
            serializable_results['results'] = serializable_results['results'].to_dict('records')
        
        with open(filepath, 'w') as f:
            json.dump(serializable_results, f, indent=2, default=str)
        
        print(f"\n💾 Results saved: {filepath}")
        return str(filepath)

# Run advanced evaluation
if 'system_diagnostics' in globals():
    evaluator = AdvancedEvaluator()
    
    # Load golden dataset
    golden_df = evaluator.load_golden_dataset()
    
    if not golden_df.empty:
        # Run detailed evaluation
        print("\n🚀 Starting comprehensive evaluation...")
        evaluation_results = evaluator.run_detailed_evaluation(golden_df)
        
        # Run retrieval quality test
        retrieval_results = evaluator.run_retrieval_quality_test()
        
        # Save results
        if evaluation_results:
            results_file = evaluator.save_evaluation_results({
                'evaluation': evaluation_results,
                'retrieval_quality': retrieval_results
            })
        
        print(f"\n✅ Advanced evaluation complete!")
        
        # Store for next cells
        globals()['evaluator'] = evaluator
        globals()['evaluation_results'] = evaluation_results
        globals()['retrieval_results'] = retrieval_results
    
else:
    print("⚠️ Please run Cell 1 (System Diagnostics) first.")


In [None]:
# Cell 4: Performance Profiling & Optimization
# ==============================================
# This cell provides detailed performance analysis and optimization recommendations

import time
import gc
import memory_profiler
from typing import Dict, List, Any
from datetime import datetime

print("📊 Performance Profiling & Optimization")
print("=" * 50)

class PerformanceProfiler:
    """Advanced performance profiling and optimization"""
    
    def __init__(self):
        self.profile_results = {}
        
    def profile_model_loading(self) -> Dict[str, Any]:
        """Profile model loading performance"""
        print("\n🤖 Model Loading Performance:")
        
        try:
            from pynucleus.llm.qwen_loader import QwenLoader
            
            # Profile Qwen model loading
            start_time = time.time()
            start_memory = memory_profiler.memory_usage()[0]
            
            loader = QwenLoader()
            model, tokenizer = loader.load_model()
            
            end_time = time.time()
            end_memory = memory_profiler.memory_usage()[0]
            
            loading_time = end_time - start_time
            memory_usage = end_memory - start_memory
            
            print(f"   • Model loading time: {loading_time:.2f} seconds")
            print(f"   • Memory usage: {memory_usage:.1f} MB")
            
            # Test inference speed
            test_prompt = "What is chemical engineering?"
            
            start_time = time.time()
            response = loader.generate(test_prompt, max_tokens=50)
            inference_time = time.time() - start_time
            
            tokens_generated = len(response.split())
            tokens_per_second = tokens_generated / inference_time if inference_time > 0 else 0
            
            print(f"   • First inference time: {inference_time:.2f} seconds")
            print(f"   • Tokens per second: {tokens_per_second:.1f}")
            
            # Test subsequent inference (warm)
            start_time = time.time()
            response2 = loader.generate("Another test prompt", max_tokens=50)
            warm_inference_time = time.time() - start_time
            
            print(f"   • Warm inference time: {warm_inference_time:.2f} seconds")
            print(f"   • Speedup: {inference_time / warm_inference_time:.1f}x")
            
            return {
                'loading_time': loading_time,
                'memory_usage_mb': memory_usage,
                'cold_inference_time': inference_time,
                'warm_inference_time': warm_inference_time,
                'tokens_per_second': tokens_per_second
            }
            
        except Exception as e:
            print(f"   ❌ Model profiling error: {e}")
            return {'error': str(e)}
    
    def profile_embedding_performance(self) -> Dict[str, Any]:
        """Profile embedding model performance"""
        print("\n🧮 Embedding Performance Analysis:")
        
        try:
            from sentence_transformers import SentenceTransformer
            
            embedding_models = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2']
            test_texts = [
                "Chemical engineering process optimization",
                "Distillation column design and operation", 
                "Reactor kinetics and mass transfer",
                "Heat exchanger efficiency analysis",
                "Process safety and risk assessment"
            ]
            
            model_performance = {}
            
            for model_name in embedding_models:
                try:
                    # Load model
                    start_time = time.time()
                    model = SentenceTransformer(model_name)
                    load_time = time.time() - start_time
                    
                    # Profile embedding generation
                    start_time = time.time()
                    embeddings = model.encode(test_texts)
                    encoding_time = time.time() - start_time
                    
                    # Calculate metrics
                    texts_per_second = len(test_texts) / encoding_time
                    avg_text_length = sum(len(text) for text in test_texts) / len(test_texts)
                    
                    model_performance[model_name] = {
                        'load_time': load_time,
                        'encoding_time': encoding_time,
                        'texts_per_second': texts_per_second,
                        'embedding_dimension': embeddings.shape[1],
                        'avg_text_length': avg_text_length
                    }
                    
                    print(f"   ✅ {model_name}:")
                    print(f"      • Load time: {load_time:.2f}s")
                    print(f"      • Encoding time: {encoding_time:.3f}s") 
                    print(f"      • Texts/second: {texts_per_second:.1f}")
                    print(f"      • Dimensions: {embeddings.shape[1]}")
                    
                except Exception as e:
                    print(f"   ❌ {model_name}: Error - {e}")
                    model_performance[model_name] = {'error': str(e)}
            
            return model_performance
            
        except Exception as e:
            print(f"   ❌ Embedding profiling error: {e}")
            return {'error': str(e)}
    
    def profile_retrieval_performance(self) -> Dict[str, Any]:
        """Profile document retrieval performance"""
        print("\n🔍 Retrieval Performance Analysis:")
        
        try:
            from pynucleus.rag.engine import retrieve
            
            test_queries = [
                "modular chemical plants",
                "distillation efficiency", 
                "reactor design optimization",
                "heat transfer mechanisms",
                "process safety protocols"
            ]
            
            retrieval_metrics = []
            
            for query in test_queries:
                try:
                    # Test different top_k values
                    for top_k in [1, 3, 5, 10]:
                        start_time = time.time()
                        docs = retrieve(query, top_k=top_k)
                        retrieval_time = time.time() - start_time
                        
                        if docs:
                            avg_doc_length = sum(len(doc) for doc in docs) / len(docs)
                            total_length = sum(len(doc) for doc in docs)
                        else:
                            avg_doc_length = 0
                            total_length = 0
                        
                        retrieval_metrics.append({
                            'query': query,
                            'top_k': top_k,
                            'retrieval_time': retrieval_time,
                            'docs_returned': len(docs) if docs else 0,
                            'avg_doc_length': avg_doc_length,
                            'total_length': total_length
                        })
                        
                        print(f"   📝 '{query}' (k={top_k}): {retrieval_time:.3f}s, {len(docs) if docs else 0} docs")
                
                except Exception as e:
                    print(f"   ❌ Query '{query}': Error - {e}")
            
            # Analyze performance patterns
            if retrieval_metrics:
                avg_time_by_k = {}
                for metric in retrieval_metrics:
                    k = metric['top_k']
                    if k not in avg_time_by_k:
                        avg_time_by_k[k] = []
                    avg_time_by_k[k].append(metric['retrieval_time'])
                
                print(f"\n   📊 Average retrieval time by top_k:")
                for k, times in avg_time_by_k.items():
                    avg_time = sum(times) / len(times)
                    print(f"      • k={k}: {avg_time:.3f}s")
            
            return {
                'retrieval_metrics': retrieval_metrics,
                'avg_time_by_k': {k: sum(times)/len(times) for k, times in avg_time_by_k.items()}
            }
            
        except Exception as e:
            print(f"   ❌ Retrieval profiling error: {e}")
            return {'error': str(e)}
    
    def profile_end_to_end_performance(self) -> Dict[str, Any]:
        """Profile complete question-answering pipeline"""
        print("\n🚀 End-to-End Pipeline Performance:")
        
        try:
            from pynucleus.rag.engine import ask
            
            test_questions = [
                "What are the advantages of modular chemical plants?",
                "How does distillation work in chemical processes?",
                "What factors affect reactor efficiency?"
            ]
            
            pipeline_metrics = []
            
            for question in test_questions:
                try:
                    # Profile complete pipeline
                    start_time = time.time()
                    result = ask(question)
                    total_time = time.time() - start_time
                    
                    answer = result.get('answer', '')
                    sources = result.get('sources', [])
                    
                    metrics = {
                        'question': question,
                        'total_time': total_time,
                        'answer_length': len(answer),
                        'source_count': len(sources),
                        'words_per_second': len(answer.split()) / total_time if total_time > 0 else 0
                    }
                    
                    pipeline_metrics.append(metrics)
                    
                    print(f"   ✅ Question: {question[:50]}...")
                    print(f"      • Total time: {total_time:.2f}s")
                    print(f"      • Answer length: {len(answer)} chars")
                    print(f"      • Sources used: {len(sources)}")
                    print(f"      • Words/second: {metrics['words_per_second']:.1f}")
                    
                except Exception as e:
                    print(f"   ❌ Question error: {e}")
                    pipeline_metrics.append({
                        'question': question,
                        'error': str(e)
                    })
            
            # Calculate summary statistics
            if pipeline_metrics:
                valid_metrics = [m for m in pipeline_metrics if 'total_time' in m]
                if valid_metrics:
                    avg_time = sum(m['total_time'] for m in valid_metrics) / len(valid_metrics)
                    avg_answer_length = sum(m['answer_length'] for m in valid_metrics) / len(valid_metrics)
                    
                    print(f"\n   📊 Pipeline Summary:")
                    print(f"      • Average response time: {avg_time:.2f}s")
                    print(f"      • Average answer length: {avg_answer_length:.0f} chars")
            
            return {
                'pipeline_metrics': pipeline_metrics,
                'summary': {
                    'avg_response_time': avg_time if 'avg_time' in locals() else None,
                    'avg_answer_length': avg_answer_length if 'avg_answer_length' in locals() else None
                }
            }
            
        except Exception as e:
            print(f"   ❌ End-to-end profiling error: {e}")
            return {'error': str(e)}
    
    def generate_optimization_recommendations(self, profile_data: Dict[str, Any]) -> List[str]:
        """Generate performance optimization recommendations"""
        print("\n💡 Performance Optimization Recommendations:")
        
        recommendations = []
        
        # Model loading optimizations
        if 'model_loading' in profile_data:
            loading_data = profile_data['model_loading']
            if 'loading_time' in loading_data and loading_data['loading_time'] > 10:
                recommendations.append("🤖 Consider model caching or keeping model loaded in memory")
            
            if 'memory_usage_mb' in loading_data and loading_data['memory_usage_mb'] > 2000:
                recommendations.append("💾 Consider using a smaller model or quantization for memory efficiency")
        
        # Embedding optimizations
        if 'embedding_performance' in profile_data:
            for model, perf in profile_data['embedding_performance'].items():
                if 'texts_per_second' in perf and perf['texts_per_second'] < 5:
                    recommendations.append(f"🧮 Consider switching from {model} to a faster embedding model")
        
        # Retrieval optimizations
        if 'retrieval_performance' in profile_data:
            avg_times = profile_data['retrieval_performance'].get('avg_time_by_k', {})
            if 5 in avg_times and 10 in avg_times:
                if avg_times[10] > avg_times[5] * 1.5:
                    recommendations.append("🔍 Consider using lower top_k values for better performance")
        
        # General recommendations
        recommendations.extend([
            "⚡ Use GPU acceleration if available (set USE_CUDA=True)",
            "🗄️ Ensure ChromaDB is using persistent storage for better caching",
            "📊 Monitor memory usage and implement cleanup for long-running sessions",
            "🔧 Consider batch processing for multiple queries",
            "📈 Profile with production-sized document collections"
        ])
        
        for i, rec in enumerate(recommendations, 1):
            print(f"   {i}. {rec}")
        
        return recommendations

# Run performance profiling
if 'config_manager' in globals():
    profiler = PerformanceProfiler()
    
    print("🚀 Starting comprehensive performance profiling...")
    
    # Profile different components
    model_perf = profiler.profile_model_loading()
    embedding_perf = profiler.profile_embedding_performance()
    retrieval_perf = profiler.profile_retrieval_performance()
    pipeline_perf = profiler.profile_end_to_end_performance()
    
    # Collect all performance data
    performance_data = {
        'model_loading': model_perf,
        'embedding_performance': embedding_perf,
        'retrieval_performance': retrieval_perf,
        'pipeline_performance': pipeline_perf,
        'timestamp': datetime.now().isoformat()
    }
    
    # Generate recommendations
    recommendations = profiler.generate_optimization_recommendations(performance_data)
    
    print(f"\n✅ Performance profiling complete!")
    
    # Store for next cells
    globals()['profiler'] = profiler
    globals()['performance_data'] = performance_data
    globals()['optimization_recommendations'] = recommendations
    
else:
    print("⚠️ Please run previous cells first to initialize the system.")


In [None]:
# Cell 5: Debug Tools & Troubleshooting
# ======================================
# This cell provides advanced debugging and troubleshooting capabilities

import traceback
import logging
import sys
from typing import Dict, List, Any
from pathlib import Path
from datetime import datetime

print("🔧 Debug Tools & Troubleshooting")
print("=" * 50)

class DebugToolkit:
    """Advanced debugging and troubleshooting tools"""
    
    def __init__(self):
        self.debug_logs = []
        self.error_history = []
        
    def setup_detailed_logging(self) -> None:
        """Setup detailed logging for debugging"""
        print("\n📝 Setting up detailed logging:")
        
        # Create logs directory
        logs_dir = Path("logs")
        logs_dir.mkdir(exist_ok=True)
        
        # Configure detailed logging
        log_file = logs_dir / f"debug_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
        
        logging.basicConfig(
            level=logging.DEBUG,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(log_file),
                logging.StreamHandler(sys.stdout)
            ]
        )
        
        print(f"   ✅ Debug logging enabled: {log_file}")
        
        # Test logging
        logger = logging.getLogger("PyNucleus.Debug")
        logger.debug("Debug logging system initialized")
        logger.info("Ready for detailed debugging")
    
    def diagnose_import_issues(self) -> Dict[str, Any]:
        """Diagnose common import and module issues"""
        print("\n🐍 Diagnosing Import Issues:")
        
        import_diagnosis = {
            'python_path': sys.path.copy(),
            'current_directory': str(Path.cwd()),
            'module_issues': {}
        }
        
        # Test critical imports
        critical_modules = [
            'pynucleus.settings',
            'pynucleus.rag.engine', 
            'pynucleus.rag.collector',
            'pynucleus.llm.qwen_loader',
            'pynucleus.eval.golden_eval'
        ]
        
        for module_name in critical_modules:
            try:
                module = __import__(module_name, fromlist=[''])
                import_diagnosis['module_issues'][module_name] = {
                    'status': 'success',
                    'path': getattr(module, '__file__', 'unknown')
                }
                print(f"   ✅ {module_name}: OK")
            except ImportError as e:
                import_diagnosis['module_issues'][module_name] = {
                    'status': 'import_error',
                    'error': str(e),
                    'traceback': traceback.format_exc()
                }
                print(f"   ❌ {module_name}: ImportError - {e}")
            except Exception as e:
                import_diagnosis['module_issues'][module_name] = {
                    'status': 'other_error',
                    'error': str(e),
                    'traceback': traceback.format_exc()
                }
                print(f"   ⚠️ {module_name}: Error - {e}")
        
        # Check if src is in path
        src_path = Path.cwd() / "src"
        if str(src_path) not in sys.path:
            print(f"   ⚠️ Warning: {src_path} not in Python path")
            import_diagnosis['warnings'] = [f"src directory not in Python path: {src_path}"]
        
        return import_diagnosis
    
    def test_core_functionality(self) -> Dict[str, Any]:
        """Test core PyNucleus functionality with detailed error reporting"""
        print("\n🧪 Testing Core Functionality:")
        
        test_results = {}
        
        # Test 1: Settings loading
        try:
            from pynucleus.settings import settings
            test_results['settings'] = {
                'status': 'success',
                'chroma_path': settings.CHROMA_PATH,
                'model_id': settings.MODEL_ID
            }
            print("   ✅ Settings: Loaded successfully")
        except Exception as e:
            test_results['settings'] = {
                'status': 'error',
                'error': str(e),
                'traceback': traceback.format_exc()
            }
            print(f"   ❌ Settings: Error - {e}")
        
        # Test 2: Vector database connection
        try:
            from pynucleus.rag.engine import retrieve
            test_docs = retrieve("test query", top_k=1)
            test_results['vector_db'] = {
                'status': 'success',
                'docs_returned': len(test_docs) if test_docs else 0
            }
            print(f"   ✅ Vector DB: Connected, {len(test_docs) if test_docs else 0} docs found")
        except Exception as e:
            test_results['vector_db'] = {
                'status': 'error',
                'error': str(e),
                'traceback': traceback.format_exc()
            }
            print(f"   ❌ Vector DB: Error - {e}")
        
        # Test 3: Model loading
        try:
            from pynucleus.llm.qwen_loader import generate
            test_response = generate("test", max_tokens=5)
            test_results['model'] = {
                'status': 'success',
                'response_length': len(test_response) if test_response else 0
            }
            print("   ✅ Model: Loaded and responding")
        except Exception as e:
            test_results['model'] = {
                'status': 'error',
                'error': str(e),
                'traceback': traceback.format_exc()
            }
            print(f"   ❌ Model: Error - {e}")
        
        # Test 4: End-to-end pipeline
        try:
            from pynucleus.rag.engine import ask
            result = ask("What is chemical engineering?")
            test_results['pipeline'] = {
                'status': 'success',
                'answer_length': len(result.get('answer', '')) if result else 0,
                'sources_count': len(result.get('sources', [])) if result else 0
            }
            print("   ✅ Pipeline: End-to-end test successful")
        except Exception as e:
            test_results['pipeline'] = {
                'status': 'error', 
                'error': str(e),
                'traceback': traceback.format_exc()
            }
            print(f"   ❌ Pipeline: Error - {e}")
        
        return test_results
    
    def analyze_error_patterns(self, test_results: Dict[str, Any]) -> List[str]:
        """Analyze error patterns and provide specific troubleshooting steps"""
        print("\n🔍 Error Pattern Analysis:")
        
        troubleshooting_steps = []
        
        # Check for common error patterns
        for component, result in test_results.items():
            if result.get('status') == 'error':
                error_msg = result.get('error', '').lower()
                
                # Pattern 1: Import errors
                if 'no module named' in error_msg:
                    troubleshooting_steps.append(f"📦 {component}: Run 'pip install -e .' to install PyNucleus package")
                
                # Pattern 2: Model not found
                elif 'model' in error_msg and ('not found' in error_msg or 'does not exist' in error_msg):
                    troubleshooting_steps.append(f"🤖 {component}: Check internet connection and HuggingFace access")
                
                # Pattern 3: CUDA/device errors
                elif 'cuda' in error_msg or 'device' in error_msg:
                    troubleshooting_steps.append(f"🎮 {component}: Try setting USE_CUDA=False in settings")
                
                # Pattern 4: ChromaDB issues
                elif 'chroma' in error_msg or 'vector' in error_msg:
                    troubleshooting_steps.append(f"🗄️ {component}: Check ChromaDB installation and permissions")
                
                # Pattern 5: Memory issues
                elif 'memory' in error_msg or 'out of memory' in error_msg:
                    troubleshooting_steps.append(f"💾 {component}: Reduce batch size or use smaller model")
                
                # Pattern 6: Permission errors
                elif 'permission' in error_msg or 'access denied' in error_msg:
                    troubleshooting_steps.append(f"🔐 {component}: Check file/directory permissions")
                
                # Generic troubleshooting
                else:
                    troubleshooting_steps.append(f"⚠️ {component}: Check logs for detailed error information")
        
        # General troubleshooting steps
        if troubleshooting_steps:
            troubleshooting_steps.extend([
                "🔄 Try restarting the Jupyter kernel",
                "📋 Verify all requirements are installed: pip install -r requirements.txt",
                "🏗️ Check if you're in the correct directory (PyNucleus-Model)",
                "🌐 Ensure internet connection for model downloads"
            ])
        
        if troubleshooting_steps:
            print("   📝 Recommended troubleshooting steps:")
            for i, step in enumerate(troubleshooting_steps, 1):
                print(f"      {i}. {step}")
        else:
            print("   ✅ No critical errors detected")
        
        return troubleshooting_steps
    
    def generate_debug_report(self, all_data: Dict[str, Any]) -> str:
        """Generate comprehensive debug report"""
        print("\n📄 Generating Debug Report:")
        
        report = {
            'timestamp': datetime.now().isoformat(),
            'system_info': {
                'python_version': sys.version,
                'platform': sys.platform,
                'working_directory': str(Path.cwd())
            },
            'diagnostic_results': all_data.get('diagnostics', {}),
            'import_diagnosis': all_data.get('import_diagnosis', {}),
            'functionality_tests': all_data.get('functionality_tests', {}),
            'performance_data': all_data.get('performance_data', {}),
            'troubleshooting_steps': all_data.get('troubleshooting_steps', [])
        }
        
        # Save debug report
        debug_file = f"debug_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        
        with open(debug_file, 'w') as f:
            json.dump(report, f, indent=2, default=str)
        
        print(f"   ✅ Debug report saved: {debug_file}")
        print(f"   📊 Report size: {Path(debug_file).stat().st_size / 1024:.1f} KB")
        
        return debug_file

# Run debugging tools
if 'performance_data' in globals():
    debugger = DebugToolkit()
    
    # Setup detailed logging
    debugger.setup_detailed_logging()
    
    # Diagnose import issues
    import_diagnosis = debugger.diagnose_import_issues()
    
    # Test core functionality
    functionality_tests = debugger.test_core_functionality()
    
    # Analyze error patterns
    troubleshooting_steps = debugger.analyze_error_patterns(functionality_tests)
    
    # Generate comprehensive debug report
    debug_data = {
        'diagnostics': diagnostic_results if 'diagnostic_results' in globals() else {},
        'import_diagnosis': import_diagnosis,
        'functionality_tests': functionality_tests,
        'performance_data': performance_data,
        'troubleshooting_steps': troubleshooting_steps
    }
    
    debug_report_file = debugger.generate_debug_report(debug_data)
    
    print(f"\n✅ Debug analysis complete!")
    print(f"   📊 Tests run: {len(functionality_tests)}")
    print(f"   ⚠️ Issues found: {sum(1 for r in functionality_tests.values() if r.get('status') == 'error')}")
    print(f"   💡 Troubleshooting steps: {len(troubleshooting_steps)}")
    
    # Store for next cells
    globals()['debugger'] = debugger
    globals()['debug_data'] = debug_data
    globals()['functionality_tests'] = functionality_tests
    
else:
    print("⚠️ Please run previous cells first to gather diagnostic data.")


In [None]:
# Cell 6: Production Monitoring & Alerting
# =========================================
# This cell provides production-grade monitoring and alerting capabilities

import threading
import queue
import time
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from pathlib import Path

print("🚀 Production Monitoring & Alerting")
print("=" * 50)

class ProductionMonitor:
    """Production-grade monitoring and alerting system"""
    
    def __init__(self):
        self.metrics_queue = queue.Queue()
        self.alerts = []
        self.monitoring_active = False
        self.monitor_thread = None
        
    def start_real_time_monitoring(self, interval: int = 30) -> None:
        """Start real-time system monitoring"""
        print(f"\n📊 Starting real-time monitoring (interval: {interval}s):")
        
        if self.monitoring_active:
            print("   ⚠️ Monitoring already active")
            return
        
        self.monitoring_active = True
        self.monitor_thread = threading.Thread(
            target=self._monitor_loop,
            args=(interval,),
            daemon=True
        )
        self.monitor_thread.start()
        
        print("   ✅ Real-time monitoring started")
        print("   📈 Monitoring system resources, response times, and error rates")
    
    def _monitor_loop(self, interval: int) -> None:
        """Main monitoring loop"""
        while self.monitoring_active:
            try:
                # Collect metrics
                metrics = self._collect_metrics()
                self.metrics_queue.put(metrics)
                
                # Check for alerts
                self._check_alerts(metrics)
                
                time.sleep(interval)
                
            except Exception as e:
                self.alerts.append({
                    'timestamp': datetime.now().isoformat(),
                    'type': 'monitoring_error',
                    'message': f"Monitoring error: {e}",
                    'severity': 'high'
                })
    
    def _collect_metrics(self) -> Dict[str, Any]:
        """Collect current system metrics"""
        import psutil
        
        # System metrics
        cpu_percent = psutil.cpu_percent()
        memory = psutil.virtual_memory()
        disk = psutil.disk_usage('/')
        
        # Test response time
        try:
            from pynucleus.rag.engine import retrieve
            start_time = time.time()
            test_docs = retrieve("test", top_k=1)
            response_time = time.time() - start_time
            retrieval_success = True
        except Exception:
            response_time = None
            retrieval_success = False
        
        return {
            'timestamp': datetime.now().isoformat(),
            'cpu_percent': cpu_percent,
            'memory_percent': memory.percent,
            'disk_percent': (disk.used / disk.total) * 100,
            'response_time': response_time,
            'retrieval_success': retrieval_success
        }
    
    def _check_alerts(self, metrics: Dict[str, Any]) -> None:
        """Check metrics against alert thresholds"""
        alerts = []
        
        # CPU alert
        if metrics['cpu_percent'] > 80:
            alerts.append({
                'type': 'high_cpu',
                'message': f"High CPU usage: {metrics['cpu_percent']:.1f}%",
                'severity': 'medium' if metrics['cpu_percent'] < 90 else 'high'
            })
        
        # Memory alert
        if metrics['memory_percent'] > 85:
            alerts.append({
                'type': 'high_memory',
                'message': f"High memory usage: {metrics['memory_percent']:.1f}%",
                'severity': 'medium' if metrics['memory_percent'] < 95 else 'high'
            })
        
        # Disk alert
        if metrics['disk_percent'] > 90:
            alerts.append({
                'type': 'high_disk',
                'message': f"High disk usage: {metrics['disk_percent']:.1f}%",
                'severity': 'high'
            })
        
        # Response time alert
        if metrics['response_time'] and metrics['response_time'] > 5.0:
            alerts.append({
                'type': 'slow_response',
                'message': f"Slow response time: {metrics['response_time']:.2f}s",
                'severity': 'medium'
            })
        
        # Service availability alert
        if not metrics['retrieval_success']:
            alerts.append({
                'type': 'service_down',
                'message': "Retrieval service not responding",
                'severity': 'high'
            })
        
        # Add alerts with timestamp
        for alert in alerts:
            alert['timestamp'] = metrics['timestamp']
            self.alerts.append(alert)
    
    def get_monitoring_dashboard(self) -> Dict[str, Any]:
        """Generate monitoring dashboard data"""
        print("\n📊 Real-time Monitoring Dashboard:")
        
        # Get recent metrics
        recent_metrics = []
        temp_queue = queue.Queue()
        
        while not self.metrics_queue.empty():
            metric = self.metrics_queue.get()
            recent_metrics.append(metric)
            temp_queue.put(metric)
        
        # Put metrics back
        while not temp_queue.empty():
            self.metrics_queue.put(temp_queue.get())
        
        if recent_metrics:
            latest = recent_metrics[-1]
            
            print(f"   🖥️ System Status (Last Update: {latest['timestamp']}):")
            print(f"      • CPU Usage: {latest['cpu_percent']:.1f}%")
            print(f"      • Memory Usage: {latest['memory_percent']:.1f}%")
            print(f"      • Disk Usage: {latest['disk_percent']:.1f}%")
            
            if latest['response_time']:
                print(f"      • Response Time: {latest['response_time']:.3f}s")
            
            status = "🟢 HEALTHY" if latest['retrieval_success'] else "🔴 ISSUES"
            print(f"      • Service Status: {status}")
            
            # Recent alerts
            recent_alerts = [a for a in self.alerts if a['timestamp'] > (datetime.now() - timedelta(minutes=30)).isoformat()]
            
            print(f"\n   🚨 Recent Alerts ({len(recent_alerts)} in last 30min):")
            if recent_alerts:
                for alert in recent_alerts[-5:]:  # Show last 5 alerts
                    severity_icon = {"low": "🟡", "medium": "🟠", "high": "🔴"}.get(alert['severity'], "⚪")
                    print(f"      {severity_icon} {alert['message']} ({alert['timestamp'][:19]})")
            else:
                print("      ✅ No recent alerts")
        
        else:
            print("   ⚠️ No monitoring data available")
        
        return {
            'metrics': recent_metrics,
            'alerts': self.alerts,
            'monitoring_active': self.monitoring_active
        }
    
    def run_health_check(self) -> Dict[str, Any]:
        """Comprehensive health check for production readiness"""
        print("\n🏥 Production Health Check:")
        
        health_status = {
            'overall_health': 'unknown',
            'checks': {},
            'recommendations': []
        }
        
        # Check 1: Core services
        try:
            from pynucleus.rag.engine import ask
            result = ask("test question")
            service_healthy = bool(result and result.get('answer'))
            health_status['checks']['core_service'] = {
                'status': 'pass' if service_healthy else 'fail',
                'message': 'Core Q&A service responding' if service_healthy else 'Core service not responding'
            }
        except Exception as e:
            health_status['checks']['core_service'] = {
                'status': 'fail',
                'message': f'Core service error: {e}'
            }
        
        # Check 2: Vector database
        try:
            from pynucleus.rag.engine import retrieve
            docs = retrieve("test", top_k=1)
            db_healthy = docs is not None
            health_status['checks']['vector_database'] = {
                'status': 'pass' if db_healthy else 'fail',
                'message': f'Vector DB responding ({len(docs) if docs else 0} docs)' if db_healthy else 'Vector DB not responding'
            }
        except Exception as e:
            health_status['checks']['vector_database'] = {
                'status': 'fail',
                'message': f'Vector DB error: {e}'
            }
        
        # Check 3: Model availability
        try:
            from pynucleus.llm.qwen_loader import generate
            response = generate("test", max_tokens=5)
            model_healthy = bool(response)
            health_status['checks']['ai_model'] = {
                'status': 'pass' if model_healthy else 'fail',
                'message': 'AI model responding' if model_healthy else 'AI model not responding'
            }
        except Exception as e:
            health_status['checks']['ai_model'] = {
                'status': 'fail',
                'message': f'AI model error: {e}'
            }
        
        # Check 4: Resource availability
        import psutil
        memory = psutil.virtual_memory()
        disk = psutil.disk_usage('/')
        
        resource_issues = []
        if memory.percent > 90:
            resource_issues.append(f"High memory usage: {memory.percent:.1f}%")
        if (disk.used / disk.total) * 100 > 95:
            resource_issues.append(f"High disk usage: {(disk.used / disk.total) * 100:.1f}%")
        
        health_status['checks']['resources'] = {
            'status': 'pass' if not resource_issues else 'warn',
            'message': 'Resources available' if not resource_issues else '; '.join(resource_issues)
        }
        
        # Check 5: Data integrity
        golden_dataset_path = Path("data/validation/golden_dataset.csv")
        vector_db_path = Path("data/03_intermediate/vector_db")
        
        data_issues = []
        if not golden_dataset_path.exists():
            data_issues.append("Golden dataset missing")
        if not vector_db_path.exists():
            data_issues.append("Vector database missing")
        
        health_status['checks']['data_integrity'] = {
            'status': 'pass' if not data_issues else 'fail',
            'message': 'Data files present' if not data_issues else '; '.join(data_issues)
        }
        
        # Calculate overall health
        statuses = [check['status'] for check in health_status['checks'].values()]
        if all(s == 'pass' for s in statuses):
            health_status['overall_health'] = 'healthy'
        elif any(s == 'fail' for s in statuses):
            health_status['overall_health'] = 'unhealthy'
        else:
            health_status['overall_health'] = 'degraded'
        
        # Generate recommendations
        if health_status['overall_health'] != 'healthy':
            failed_checks = [name for name, check in health_status['checks'].items() if check['status'] == 'fail']
            health_status['recommendations'] = [
                f"Address failed checks: {', '.join(failed_checks)}",
                "Check system logs for detailed error information",
                "Verify all dependencies are properly installed",
                "Ensure sufficient system resources are available"
            ]
        
        # Display results
        status_icon = {
            'healthy': '🟢',
            'degraded': '🟡', 
            'unhealthy': '🔴'
        }.get(health_status['overall_health'], '⚪')
        
        print(f"   {status_icon} Overall Health: {health_status['overall_health'].upper()}")
        
        for check_name, check_result in health_status['checks'].items():
            status_icon = {'pass': '✅', 'warn': '⚠️', 'fail': '❌'}.get(check_result['status'], '⚪')
            print(f"   {status_icon} {check_name.replace('_', ' ').title()}: {check_result['message']}")
        
        return health_status
    
    def stop_monitoring(self) -> None:
        """Stop real-time monitoring"""
        if self.monitoring_active:
            self.monitoring_active = False
            print("\n🛑 Stopping real-time monitoring...")
            print("   ✅ Monitoring stopped")
        else:
            print("\n⚠️ Monitoring not active")

# Run production monitoring
if 'debug_data' in globals():
    monitor = ProductionMonitor()
    
    # Run health check
    health_status = monitor.run_health_check()
    
    # Start monitoring (optional - will run in background)
    monitor.start_real_time_monitoring(interval=30)
    
    # Give monitoring a moment to collect initial data
    time.sleep(2)
    
    # Show dashboard
    dashboard_data = monitor.get_monitoring_dashboard()
    
    print(f"\n🎯 Production Monitoring Summary:")
    print(f"   • Overall Health: {health_status['overall_health']}")
    print(f"   • Checks Passed: {sum(1 for c in health_status['checks'].values() if c['status'] == 'pass')}/{len(health_status['checks'])}")
    print(f"   • Real-time Monitoring: {'🟢 Active' if monitor.monitoring_active else '🔴 Inactive'}")
    
    print(f"\n💡 Next Steps:")
    print("   • Monitor the dashboard for real-time metrics")
    print("   • Set up automated alerting for production deployment")
    print("   • Review logs regularly for performance optimization")
    print("   • Scale resources based on monitoring data")
    
    # Store for session
    globals()['monitor'] = monitor
    globals()['health_status'] = health_status
    globals()['dashboard_data'] = dashboard_data
    
    print(f"\n✅ Production monitoring system ready!")
    print(f"🔧 Use 'monitor.stop_monitoring()' to stop background monitoring")
    
else:
    print("⚠️ Please run previous cells first to gather system data.")
