diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..f0d3cf7 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,64 @@ +name: Deploy MkDocs to GitHub Pages + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Cache dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Build with MkDocs + run: mkdocs build --clean --strict + + - name: Setup Pages + uses: actions/configure-pages@v3 + + - name: Upload artifact + uses: actions/upload-pages-artifact@v2 + with: + path: ./site + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + if: github.ref == 'refs/heads/main' + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1c259e2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# MkDocs +site/ +.cache/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Logs +*.log +logs/ + +# Temporary files +*.tmp +*.temp +.temp/ \ No newline at end of file diff --git a/README.md b/README.md index 2a2dee6..88e6e94 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,92 @@ -# docs -content for docs.bitsec.ai +# Bitsec-AI Documentation + +This repository contains the documentation for [docs.bitsec.ai](https://docs.bitsec.ai), providing comprehensive guides for Bittensor subnet operators including validators and miners. + +## 🚀 Quick Start + +This documentation site is built with [MkDocs](https://www.mkdocs.org/) and the [Material theme](https://squidfunk.github.io/mkdocs-material/). + +### Local Development + +1. **Install dependencies:** + ```bash + pip install -r requirements.txt + ``` + +2. **Serve locally:** + ```bash + mkdocs serve + ``` + The site will be available at `http://localhost:8000` + +3. **Build static site:** + ```bash + mkdocs build + ``` + +## 📚 Documentation Structure + +- **Getting Started**: Basic setup and installation guides +- **For Validators**: Comprehensive validator operation guides +- **For Miners**: Mining setup, optimization, and best practices +- **Subnet Operations**: Architecture, monitoring, and troubleshooting +- **API Reference**: Complete API documentation + +## 🔧 Site Configuration + +The site is configured via `mkdocs.yml` with: + +- **Material theme** with dark/light mode support +- **Navigation tabs** for easy browsing +- **Code highlighting** and copy functionality +- **Search** functionality +- **Mobile responsive** design + +## 🚀 Deployment + +The site automatically deploys to GitHub Pages via GitHub Actions when changes are pushed to the `main` branch. + +### Manual Deployment + +```bash +mkdocs gh-deploy +``` + +## 🤝 Contributing + +We welcome contributions to improve this documentation: + +1. Fork this repository +2. Create a feature branch +3. Make your changes +4. Test locally with `mkdocs serve` +5. Submit a pull request + +### Writing Guidelines + +- Use clear, concise language +- Include code examples where helpful +- Add screenshots for UI-related content +- Follow the existing structure and style +- Test all links and references + +## 📋 Requirements + +- Python 3.8+ +- MkDocs 1.5.0+ +- MkDocs Material 9.4.0+ +- PyMdown Extensions 10.3.0+ + +## 📝 License + +This documentation is licensed under the MIT License. See [LICENSE](LICENSE) for details. + +## 🔗 Links + +- **Live Site**: [docs.bitsec.ai](https://docs.bitsec.ai) +- **GitHub Repository**: [Bitsec-AI/docs](https://github.com/Bitsec-AI/docs) +- **Bitsec-AI Main**: [Bitsec-AI](https://github.com/Bitsec-AI) + +--- + +*Documentation for the Bitsec-AI subnet on the Bittensor network* diff --git a/docs/api/overview.md b/docs/api/overview.md new file mode 100644 index 0000000..2fab6e2 --- /dev/null +++ b/docs/api/overview.md @@ -0,0 +1,73 @@ +# API Overview + +The Bitsec-AI API provides programmatic access to subnet functionality for advanced users and developers. + +## Authentication + +All API requests require authentication using API keys: + +```bash +curl -H "Authorization: Bearer YOUR_API_KEY" \ + https://api.bitsec.ai/v1/status +``` + +## Base URL + +``` +https://api.bitsec.ai/v1 +``` + +## Endpoints + +### Health Check + +```http +GET /health +``` + +Returns the current status of the API service. + +### Validator Status + +```http +GET /validators/{validator_id}/status +``` + +Get the current status of a specific validator. + +### Miner Status + +```http +GET /miners/{miner_id}/status +``` + +Get the current status of a specific miner. + +### Subnet Metrics + +```http +GET /subnet/metrics +``` + +Retrieve current subnet performance metrics. + +## Rate Limits + +- 1000 requests per hour for authenticated users +- 100 requests per hour for unauthenticated requests + +## SDK + +We provide official SDKs for popular programming languages: + +- Python: `pip install bitsec-sdk` +- JavaScript: `npm install @bitsec/sdk` +- Go: `go get github.com/bitsec-ai/go-sdk` + +## Support + +For API support: + +- Check our [troubleshooting guide](../subnet/troubleshooting.md) +- Visit our GitHub repository +- Contact our developer support team \ No newline at end of file diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 0000000..ba7c060 --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,111 @@ +# Installation + +This guide will walk you through installing the necessary components to get started with Bitsec-AI. + +## System Requirements + +### Minimum Requirements +- **OS**: Ubuntu 20.04 LTS or later, macOS 10.15+, or Windows 10+ +- **RAM**: 8GB minimum, 16GB recommended +- **Storage**: 100GB available space +- **Network**: Stable internet connection with good bandwidth + +### Recommended Requirements +- **RAM**: 32GB or more +- **Storage**: 500GB SSD +- **CPU**: Multi-core processor (8+ cores recommended) + +## Installation Steps + +### 1. Install Python + +Ensure you have Python 3.8 or higher: + +```bash +python3 --version +``` + +If you need to install Python: + +=== "Ubuntu/Debian" + ```bash + sudo apt update + sudo apt install python3 python3-pip python3-venv + ``` + +=== "macOS" + ```bash + # Using Homebrew + brew install python3 + ``` + +=== "Windows" + Download Python from [python.org](https://python.org) and follow the installation wizard. + +### 2. Install Git + +```bash +git --version +``` + +If Git is not installed: + +=== "Ubuntu/Debian" + ```bash + sudo apt install git + ``` + +=== "macOS" + ```bash + brew install git + ``` + +=== "Windows" + Download Git from [git-scm.com](https://git-scm.com) and install. + +### 3. Clone the Repository + +```bash +git clone https://github.com/Bitsec-AI/subnet +cd subnet +``` + +### 4. Set Up Virtual Environment + +```bash +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +``` + +### 5. Install Dependencies + +```bash +pip install -r requirements.txt +``` + +## Verification + +To verify your installation: + +```bash +python3 -c "import bittensor; print('Bittensor installed successfully')" +``` + +## Next Steps + +Now that you have everything installed: + +- **Validators**: Proceed to [Validator Introduction](../validators/introduction.md) +- **Miners**: Proceed to [Miner Introduction](../miners/introduction.md) + +## Troubleshooting + +### Common Issues + +**Python version conflicts**: Use `python3` explicitly instead of `python`. + +**Permission errors**: Use virtual environments to avoid system-wide installations. + +**Network issues**: Ensure your firewall allows the necessary connections. + +For more help, see our [troubleshooting guide](../subnet/troubleshooting.md). \ No newline at end of file diff --git a/docs/getting-started/overview.md b/docs/getting-started/overview.md new file mode 100644 index 0000000..168e298 --- /dev/null +++ b/docs/getting-started/overview.md @@ -0,0 +1,36 @@ +# Overview + +Welcome to Bitsec-AI! This guide will help you understand the fundamentals of operating on the Bittensor network with our secure infrastructure solutions. + +## What You'll Learn + +In this getting started section, you'll discover: + +- How Bitsec-AI fits into the Bittensor ecosystem +- Basic concepts and terminology +- How to set up your development environment +- Your first steps toward becoming a subnet operator + +## Prerequisites + +Before you begin, you should have: + +- Basic understanding of blockchain technology +- Familiarity with command-line interfaces +- A computer with internet access +- Python 3.8 or higher installed + +## Next Steps + +1. **[Installation](installation.md)** - Set up your environment +2. **Choose your path:** + - [Validator Setup](../validators/introduction.md) - For those running validator nodes + - [Miner Setup](../miners/introduction.md) - For those running miner nodes + +## Need Help? + +If you get stuck at any point: + +- Check our [troubleshooting guide](../subnet/troubleshooting.md) +- Review the relevant section for your role (validator or miner) +- Visit our GitHub repository for community support \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..e21bfcb --- /dev/null +++ b/docs/index.md @@ -0,0 +1,40 @@ +# Welcome to Bitsec-AI Documentation + +Welcome to the comprehensive documentation for Bitsec-AI, your guide to operating on the Bittensor network. This documentation is designed specifically for subnet operators, validators, and miners. + +## What is Bitsec-AI? + +Bitsec-AI provides secure and reliable infrastructure solutions for the Bittensor ecosystem. Our documentation helps you understand, deploy, and maintain your operations on the network. + +## Quick Navigation + +### 🚀 [Getting Started](getting-started/overview.md) +New to Bitsec-AI? Start here to understand the basics and get your environment set up. + +### ⚡ [For Validators](validators/introduction.md) +Comprehensive guides for setting up and maintaining validator nodes on the Bittensor network. + +### ⛏️ [For Miners](miners/introduction.md) +Detailed instructions for miners, including optimization techniques and best practices. + +### 🔧 [Subnet Operations](subnet/architecture.md) +Advanced topics for subnet operators, including architecture, monitoring, and troubleshooting. + +### 📚 [API Reference](api/overview.md) +Complete API documentation for developers and advanced users. + +## Support + +If you need help or have questions: + +- Check our [troubleshooting guide](subnet/troubleshooting.md) +- Visit our [GitHub repository](https://github.com/Bitsec-AI/docs) +- Join our community discussions + +## Contributing + +We welcome contributions to improve this documentation. Please see our GitHub repository for contribution guidelines. + +--- + +*This documentation is maintained by the Bitsec-AI team and is continuously updated to reflect the latest developments in the Bittensor ecosystem.* \ No newline at end of file diff --git a/docs/miners/introduction.md b/docs/miners/introduction.md new file mode 100644 index 0000000..9904922 --- /dev/null +++ b/docs/miners/introduction.md @@ -0,0 +1,93 @@ +# Miner Introduction + +Miners are the computational backbone of the Bittensor network, providing valuable services and competing to earn TAO tokens through quality work and innovation. + +## What Miners Do + +Miners: + +- **Perform Computational Work**: Execute tasks assigned by validators +- **Compete for Rewards**: Strive to provide the highest quality outputs +- **Innovation**: Develop and deploy advanced AI models and techniques +- **Serve the Network**: Provide valuable services to network users + +## Types of Mining + +### Computational Mining +- Run AI models and algorithms +- Process data and generate insights +- Provide prediction and analysis services + +### Service Mining +- Offer specialized AI services +- Provide infrastructure and hosting +- Support network operations + +## Key Responsibilities + +### Performance Optimization +- Develop efficient algorithms +- Optimize hardware utilization +- Minimize response times + +### Quality Assurance +- Ensure output accuracy and reliability +- Implement error checking and validation +- Maintain consistent service quality + +### Innovation +- Research and develop new approaches +- Implement cutting-edge AI techniques +- Contribute to network advancement + +## Miner Economics + +### Revenue Streams +- Task completion rewards +- Quality bonuses for superior performance +- Long-term staking rewards + +### Investment Considerations +- Hardware and infrastructure costs +- Research and development expenses +- Ongoing operational costs + +## Getting Started + +1. **[Setup Guide](setup.md)** - Configure your miner node +2. **[Optimization](optimization.md)** - Maximize your mining efficiency +3. **[Monitoring](../subnet/monitoring.md)** - Track performance and earnings + +## Prerequisites + +Before setting up a miner: + +- Complete the [installation guide](../getting-started/installation.md) +- Have appropriate hardware for your mining strategy +- Understand the competitive landscape +- Plan your resource allocation and strategy + +## Success Strategies + +### Technical Excellence +- Focus on algorithm efficiency +- Optimize for both speed and accuracy +- Stay updated with latest AI developments + +### Resource Management +- Balance performance with cost +- Plan for scalability and growth +- Monitor and optimize resource usage + +### Community Engagement +- Participate in community discussions +- Share knowledge and best practices +- Collaborate on research and development + +## Support + +For miner-specific support: + +- Review our [optimization guide](optimization.md) +- Check the [troubleshooting section](../subnet/troubleshooting.md) +- Join the miner community for tips and strategies \ No newline at end of file diff --git a/docs/miners/optimization.md b/docs/miners/optimization.md new file mode 100644 index 0000000..cc77163 --- /dev/null +++ b/docs/miners/optimization.md @@ -0,0 +1,410 @@ +# Miner Optimization + +This guide provides advanced techniques and strategies to maximize your mining efficiency and profitability on the Bitsec-AI subnet. + +## Performance Optimization + +### Hardware Optimization + +#### GPU Optimization + +**Memory Management:** +```python +# Optimize GPU memory usage +import torch +torch.cuda.set_per_process_memory_fraction(0.8) +torch.backends.cudnn.benchmark = True +torch.backends.cudnn.deterministic = False +``` + +**Power and Clock Settings:** +```bash +# Set optimal power limits (adjust for your GPU) +sudo nvidia-smi -pl 250 # 250W power limit + +# Set memory and core clocks +sudo nvidia-smi -ac 5001,1506 + +# Enable persistence mode +sudo nvidia-smi -pm 1 +``` + +#### CPU Optimization + +```bash +# Set CPU governor to performance mode +echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + +# Set CPU affinity for miner process +taskset -c 0-7 python miner.py + +# Adjust process priority +nice -n -10 python miner.py +``` + +### Software Optimization + +#### Model Optimization + +**Quantization:** +```python +# INT8 quantization for faster inference +model = torch.quantization.quantize_dynamic( + model, {torch.nn.Linear}, dtype=torch.qint8 +) +``` + +**TensorRT Optimization:** +```python +import torch_tensorrt + +# Compile model with TensorRT +optimized_model = torch_tensorrt.compile( + model, + inputs=[torch_tensorrt.Input((1, 3, 224, 224))], + enabled_precisions={torch.float, torch.half} +) +``` + +#### Batch Processing + +Optimize batch sizes for your hardware: + +```yaml +# Configuration optimization +performance: + # GPU memory vs. throughput trade-off + batch_size: 32 # Increase for more GPU memory usage + prefetch_factor: 2 + num_workers: 4 + pin_memory: true +``` + +## Algorithm Optimization + +### Model Selection + +Choose models based on: + +1. **Accuracy vs. Speed Trade-off** +2. **Memory Requirements** +3. **Computational Complexity** +4. **Network Validation Criteria** + +### Ensemble Methods + +Improve accuracy with ensemble techniques: + +```python +class EnsembleMiner: + def __init__(self, models): + self.models = models + + def predict(self, inputs): + predictions = [] + for model in self.models: + pred = model(inputs) + predictions.append(pred) + + # Weighted average or voting + return torch.mean(torch.stack(predictions), dim=0) +``` + +### Caching Strategies + +Implement intelligent caching: + +```python +from functools import lru_cache +import hashlib + +class SmartCache: + def __init__(self, max_size=1000): + self.cache = {} + self.max_size = max_size + + def get_hash(self, inputs): + return hashlib.md5(str(inputs).encode()).hexdigest() + + def get(self, inputs): + key = self.get_hash(inputs) + return self.cache.get(key) + + def put(self, inputs, result): + if len(self.cache) >= self.max_size: + # Remove oldest entry + self.cache.pop(next(iter(self.cache))) + key = self.get_hash(inputs) + self.cache[key] = result +``` + +## Network Optimization + +### Connection Management + +Optimize network connections: + +```python +# Connection pooling +import aiohttp +import asyncio + +class NetworkOptimizer: + def __init__(self): + self.connector = aiohttp.TCPConnector( + limit=100, + limit_per_host=10, + keepalive_timeout=60 + ) + self.session = aiohttp.ClientSession( + connector=self.connector, + timeout=aiohttp.ClientTimeout(total=30) + ) +``` + +### Bandwidth Management + +```bash +# Set network buffer sizes +echo 'net.core.rmem_max = 16777216' | sudo tee -a /etc/sysctl.conf +echo 'net.core.wmem_max = 16777216' | sudo tee -a /etc/sysctl.conf +echo 'net.ipv4.tcp_rmem = 4096 65536 16777216' | sudo tee -a /etc/sysctl.conf +echo 'net.ipv4.tcp_wmem = 4096 65536 16777216' | sudo tee -a /etc/sysctl.conf +sudo sysctl -p +``` + +## Economic Optimization + +### Cost Management + +#### Infrastructure Costs + +1. **Cloud vs. Dedicated Hardware** + - Calculate ROI for different setups + - Consider electricity costs + - Factor in maintenance and upgrades + +2. **Resource Utilization** + ```python + # Monitor and optimize resource usage + import psutil + + def get_resource_utilization(): + return { + 'cpu_percent': psutil.cpu_percent(), + 'memory_percent': psutil.virtual_memory().percent, + 'gpu_memory': torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated() + } + ``` + +#### Energy Efficiency + +```bash +# Monitor power consumption +sudo apt install powertop +sudo powertop --html=power_report.html + +# Use power-efficient settings during low-reward periods +echo powersave | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor +``` + +### Reward Optimization + +#### Dynamic Strategy Adjustment + +```python +class DynamicMiner: + def __init__(self): + self.performance_history = [] + self.current_strategy = "balanced" + + def adjust_strategy(self, recent_rewards): + if recent_rewards < threshold: + self.current_strategy = "aggressive" + self.increase_compute_allocation() + else: + self.current_strategy = "efficient" + self.optimize_for_efficiency() +``` + +#### Time-based Optimization + +```python +import datetime + +def get_optimal_mining_hours(): + # Analyze historical data to find peak reward times + current_hour = datetime.datetime.now().hour + + peak_hours = [9, 10, 11, 14, 15, 16, 20, 21] # Example + return current_hour in peak_hours +``` + +## Monitoring and Analytics + +### Performance Metrics + +Track key performance indicators: + +```python +class PerformanceMonitor: + def __init__(self): + self.metrics = { + 'requests_per_second': 0, + 'average_response_time': 0, + 'accuracy_score': 0, + 'resource_utilization': {}, + 'earnings_per_hour': 0 + } + + def update_metrics(self, new_data): + # Update metrics with exponential moving average + alpha = 0.1 + for key, value in new_data.items(): + if key in self.metrics: + self.metrics[key] = alpha * value + (1 - alpha) * self.metrics[key] +``` + +### Automated Optimization + +```python +class AutoOptimizer: + def __init__(self): + self.optimization_strategies = [ + self.optimize_batch_size, + self.optimize_model_selection, + self.optimize_resource_allocation + ] + + def run_optimization(self): + for strategy in self.optimization_strategies: + current_performance = self.measure_performance() + strategy() + new_performance = self.measure_performance() + + if new_performance <= current_performance: + self.rollback_changes() +``` + +## Advanced Techniques + +### Multi-Model Mining + +Run multiple specialized models: + +```python +class MultiModelMiner: + def __init__(self): + self.models = { + 'text_classification': TextClassificationModel(), + 'image_generation': ImageGenerationModel(), + 'code_completion': CodeCompletionModel() + } + + def route_request(self, request): + task_type = self.identify_task_type(request) + return self.models[task_type].process(request) +``` + +### Federated Learning + +Participate in collaborative learning: + +```python +class FederatedMiner: + def __init__(self): + self.local_model = Model() + self.global_updates = [] + + def federated_update(self, global_weights): + # Update local model with global knowledge + self.local_model.load_state_dict(global_weights) + + # Train on local data + self.train_local_model() + + # Share updates + return self.local_model.state_dict() +``` + +### Edge Computing Integration + +Optimize for edge deployment: + +```python +class EdgeOptimizedMiner: + def __init__(self): + self.lightweight_models = self.load_compressed_models() + self.edge_cache = EdgeCache() + + def process_with_edge_optimization(self, request): + # Use lightweight models for fast processing + # Cache frequently requested computations + # Batch similar requests for efficiency + pass +``` + +## Troubleshooting Performance Issues + +### Common Performance Bottlenecks + +1. **Memory Bottlenecks** + ```bash + # Monitor memory usage + watch -n 1 'free -h && nvidia-smi --query-gpu=memory.used,memory.total --format=csv' + ``` + +2. **CPU Bottlenecks** + ```bash + # Profile CPU usage + python -m cProfile -o profile.stats miner.py + python -c "import pstats; pstats.Stats('profile.stats').sort_stats('cumulative').print_stats(10)" + ``` + +3. **I/O Bottlenecks** + ```bash + # Monitor disk I/O + iostat -x 1 + + # Optimize with SSD caching + sudo apt install bcache-tools + ``` + +### Performance Debugging + +```python +import time +import functools + +def performance_monitor(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + end_time = time.time() + + print(f"{func.__name__} took {end_time - start_time:.4f} seconds") + return result + return wrapper + +@performance_monitor +def mining_function(data): + # Your mining logic here + pass +``` + +## Best Practices Summary + +1. **Hardware**: Invest in quality GPUs and fast storage +2. **Software**: Keep models and libraries updated +3. **Monitoring**: Implement comprehensive performance tracking +4. **Economics**: Balance costs with potential rewards +5. **Community**: Stay engaged with latest optimization techniques +6. **Security**: Don't compromise security for performance +7. **Scalability**: Design for future growth and upgrades + +## Conclusion + +Optimization is an ongoing process. Continuously monitor, measure, and improve your mining setup. The Bittensor ecosystem evolves rapidly, so staying informed about new techniques and best practices is crucial for long-term success. + +Remember: The most optimized miner is one that balances performance, cost-efficiency, and reliability while adapting to changing network conditions. \ No newline at end of file diff --git a/docs/miners/setup.md b/docs/miners/setup.md new file mode 100644 index 0000000..8bb92f7 --- /dev/null +++ b/docs/miners/setup.md @@ -0,0 +1,280 @@ +# Miner Setup Guide + +This comprehensive guide will help you set up a miner node for the Bitsec-AI subnet on the Bittensor network. + +## Prerequisites + +Before starting: + +- Complete the [installation guide](../getting-started/installation.md) +- Have adequate hardware for mining operations +- Understand the competitive nature of mining +- Have a stable internet connection with good bandwidth + +## Step 1: Environment Setup + +Create a dedicated directory for your miner: + +```bash +mkdir ~/bitsec-miner +cd ~/bitsec-miner +``` + +Set up a Python virtual environment: + +```bash +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +``` + +## Step 2: Hardware Configuration + +### GPU Setup (Recommended) + +For optimal performance, configure GPU acceleration: + +```bash +# Check GPU availability +nvidia-smi + +# Install CUDA toolkit if needed +sudo apt update +sudo apt install nvidia-cuda-toolkit + +# Verify CUDA installation +nvcc --version +``` + +### CPU Mining Setup + +If using CPU mining: + +```bash +# Set CPU affinity for better performance +taskset -c 0-7 python miner.py +``` + +## Step 3: Configuration + +Create your miner configuration file: + +```bash +cp config/miner.example.yaml config/miner.yaml +``` + +Edit the configuration with your specific settings: + +```yaml +# miner.yaml +miner: + netuid: 1 # Adjust based on your subnet + wallet_name: "my_miner" + hotkey_name: "my_hotkey" + +performance: + batch_size: 16 + max_workers: 4 + timeout: 30 + +hardware: + device: "cuda" # or "cpu" + memory_fraction: 0.8 + +network: + port: 8081 + external_ip: "your.external.ip" + +logging: + level: "INFO" + file: "logs/miner.log" +``` + +## Step 4: Wallet Setup + +Create and configure your miner wallet: + +```bash +btcli wallet new_coldkey --wallet.name my_miner +btcli wallet new_hotkey --wallet.name my_miner --wallet.hotkey my_hotkey +``` + +## Step 5: Registration + +Register your miner on the subnet: + +```bash +btcli subnet register --wallet.name my_miner --wallet.hotkey my_hotkey --netuid 1 +``` + +## Step 6: Install Dependencies + +Install required Python packages: + +```bash +pip install -r requirements.txt +``` + +For GPU acceleration: + +```bash +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 +``` + +## Step 7: Start Mining + +Launch your miner: + +```bash +python miner.py --config config/miner.yaml +``` + +For background operation: + +```bash +nohup python miner.py --config config/miner.yaml > logs/miner.log 2>&1 & +``` + +## Step 8: Monitoring + +Monitor your miner's performance: + +```bash +# Check miner status +btcli wallet overview --wallet.name my_miner + +# View real-time logs +tail -f logs/miner.log + +# Monitor system resources +htop +nvidia-smi # For GPU monitoring +``` + +## Performance Optimization + +### GPU Optimization + +For NVIDIA GPUs: + +```bash +# Set GPU power mode +sudo nvidia-smi -pm 1 + +# Set memory and core clocks +sudo nvidia-smi -ac 5001,1506 + +# Monitor GPU utilization +watch -n 1 nvidia-smi +``` + +### Memory Management + +Optimize memory usage: + +```python +# In your configuration +memory_settings: + max_memory_gb: 8 + garbage_collection: true + memory_mapped_files: true +``` + +### Network Optimization + +Improve network performance: + +```bash +# Increase network buffer sizes +echo 'net.core.rmem_default = 262144' | sudo tee -a /etc/sysctl.conf +echo 'net.core.rmem_max = 16777216' | sudo tee -a /etc/sysctl.conf +sudo sysctl -p +``` + +## Troubleshooting + +### Common Issues + +**GPU not detected:** +```bash +# Check CUDA installation +python -c "import torch; print(torch.cuda.is_available())" + +# Reinstall GPU drivers if needed +sudo apt purge nvidia* +sudo apt autoremove +sudo apt install nvidia-driver-525 +``` + +**Memory errors:** +```bash +# Reduce batch size in configuration +batch_size: 8 # Instead of 16 + +# Clear GPU memory +python -c "import torch; torch.cuda.empty_cache()" +``` + +**Connection issues:** +```bash +# Check network connectivity +ping -c 4 api.bitsec.ai + +# Verify firewall settings +sudo ufw status +``` + +## Advanced Configuration + +### Model Customization + +Configure custom AI models: + +```yaml +model: + name: "custom_model_v1" + path: "/path/to/model" + precision: "fp16" + optimization: "tensorrt" +``` + +### Load Balancing + +For multiple GPU setup: + +```yaml +multi_gpu: + enabled: true + devices: [0, 1, 2, 3] + strategy: "data_parallel" +``` + +## Security Considerations + +### Wallet Security + +1. **Separate Funds**: Keep mining rewards separate from main holdings +2. **Regular Withdrawals**: Don't accumulate large amounts in hot wallets +3. **Key Management**: Secure backup of all wallet keys + +### System Security + +1. **Access Control**: Limit SSH access to mining systems +2. **Monitoring**: Set up alerts for unusual activity +3. **Updates**: Keep system and mining software updated + +## Next Steps + +After successful setup: + +1. **[Optimization Guide](optimization.md)** - Maximize your mining efficiency +2. **[Monitoring Setup](../subnet/monitoring.md)** - Set up comprehensive monitoring +3. **Community Engagement** - Join miner discussions and share experiences + +## Support + +For miner-specific support: + +- Check the [optimization guide](optimization.md) +- Review [troubleshooting documentation](../subnet/troubleshooting.md) +- Join the miner community forums +- Contact technical support through official channels \ No newline at end of file diff --git a/docs/subnet/architecture.md b/docs/subnet/architecture.md new file mode 100644 index 0000000..37dcd96 --- /dev/null +++ b/docs/subnet/architecture.md @@ -0,0 +1,377 @@ +# Subnet Architecture + +Understanding the Bitsec-AI subnet architecture is crucial for effective operation as a validator or miner. This guide provides a comprehensive overview of the system design and components. + +## Overview + +The Bitsec-AI subnet operates as a specialized layer within the Bittensor network, focusing on secure and reliable AI infrastructure services. The architecture is designed for scalability, security, and decentralization. + +## Core Components + +### Network Layer + +```mermaid +graph TB + A[Bittensor Main Network] --> B[Bitsec-AI Subnet] + B --> C[Validators] + B --> D[Miners] + C --> E[Validation Logic] + D --> F[AI Services] + E --> G[Consensus Mechanism] + F --> H[Service Registry] +``` + +#### Bittensor Integration +- **Netuid**: Unique subnet identifier within Bittensor +- **TAO Integration**: Native token for rewards and staking +- **Consensus**: Participates in Bittensor's consensus mechanism +- **Metagraph**: Maintains network state and participant information + +### Validator Architecture + +#### Core Functions + +1. **Service Validation** + - Quality assessment of miner outputs + - Performance benchmarking + - Security compliance verification + +2. **Network Governance** + - Consensus participation + - Protocol upgrade voting + - Network parameter adjustment + +3. **Economic Management** + - Reward distribution + - Stake management + - Slashing condition enforcement + +#### Technical Components + +```python +class ValidatorNode: + def __init__(self): + self.consensus_engine = ConsensusEngine() + self.validation_logic = ValidationLogic() + self.economic_engine = EconomicEngine() + self.network_interface = NetworkInterface() + + def validate_miner_output(self, miner_id, output): + # Implement validation logic + pass + + def participate_in_consensus(self): + # Consensus participation + pass +``` + +### Miner Architecture + +#### Service Categories + +1. **AI Inference Services** + - Text processing and generation + - Image analysis and generation + - Code completion and analysis + - Data processing and analytics + +2. **Infrastructure Services** + - Compute resource provision + - Storage services + - Network services + - Security services + +#### Technical Stack + +```python +class MinerNode: + def __init__(self): + self.service_engine = ServiceEngine() + self.resource_manager = ResourceManager() + self.api_interface = APIInterface() + self.security_layer = SecurityLayer() + + def provide_service(self, request): + # Service provision logic + pass + + def register_capabilities(self): + # Capability registration + pass +``` + +## Data Flow Architecture + +### Request Processing Flow + +1. **Service Request Initiation** + ``` + Client → API Gateway → Load Balancer → Miner Selection + ``` + +2. **Miner Processing** + ``` + Selected Miner → Service Execution → Response Generation + ``` + +3. **Validation Process** + ``` + Miner Response → Validator Evaluation → Quality Assessment → Consensus + ``` + +4. **Reward Distribution** + ``` + Consensus Result → Economic Engine → Reward Calculation → Distribution + ``` + +### Data Storage + +#### On-Chain Data +- Network metadata and parameters +- Participant registrations and stakes +- Consensus results and rewards +- Governance decisions and votes + +#### Off-Chain Data +- Service request/response logs +- Performance metrics and analytics +- Private computation results +- Cached frequently accessed data + +## Security Architecture + +### Multi-Layer Security + +#### Network Security +```yaml +security_layers: + network: + - DDoS protection + - Rate limiting + - IP filtering + - Encrypted communications + + application: + - Authentication and authorization + - Input validation and sanitization + - Output verification + - API security + + consensus: + - Byzantine fault tolerance + - Sybil attack prevention + - Economic security mechanisms + - Slashing conditions +``` + +#### Cryptographic Foundations + +1. **Identity Management** + - Cryptographic key pairs for each participant + - Digital signatures for all transactions + - Zero-knowledge proofs for privacy + +2. **Communication Security** + - TLS/SSL for all network communications + - End-to-end encryption for sensitive data + - Message authentication codes + +3. **Data Integrity** + - Merkle trees for data verification + - Hash-based integrity checks + - Immutable audit logs + +## Scalability Design + +### Horizontal Scaling + +#### Validator Scaling +- Multiple validator nodes for redundancy +- Load balancing across validators +- Geographic distribution for latency optimization + +#### Miner Scaling +- Dynamic miner registration and deregistration +- Auto-scaling based on demand +- Resource pooling and sharing + +### Vertical Scaling + +#### Performance Optimization +- Efficient algorithms and data structures +- Caching strategies for frequently accessed data +- Database optimization and indexing + +#### Resource Management +```python +class ResourceManager: + def __init__(self): + self.cpu_pool = CPUPool() + self.gpu_pool = GPUPool() + self.memory_pool = MemoryPool() + self.storage_pool = StoragePool() + + def allocate_resources(self, request): + # Resource allocation logic + return self.optimize_allocation(request) + + def monitor_utilization(self): + # Resource monitoring + pass +``` + +## Consensus Mechanism + +### Proof-of-Intelligence (PoI) + +The subnet implements a custom consensus mechanism based on the quality and utility of AI services provided: + +#### Validation Criteria +1. **Accuracy**: Correctness of outputs +2. **Efficiency**: Resource utilization and speed +3. **Innovation**: Novel approaches and improvements +4. **Reliability**: Consistent performance over time + +#### Consensus Algorithm +```python +class ConsensusEngine: + def __init__(self): + self.validators = [] + self.consensus_threshold = 0.67 + + def reach_consensus(self, validation_results): + # Aggregate validator opinions + weighted_scores = self.calculate_weighted_scores(validation_results) + + # Apply consensus algorithm + consensus_score = self.apply_consensus_algorithm(weighted_scores) + + # Determine final result + return consensus_score >= self.consensus_threshold +``` + +## Economic Model + +### Token Economics + +#### Reward Mechanisms +- **Base Rewards**: Fixed rewards for participation +- **Performance Rewards**: Variable rewards based on quality +- **Innovation Bonuses**: Extra rewards for novel contributions +- **Staking Rewards**: Returns on staked tokens + +#### Cost Structure +- **Network Fees**: Transaction and service fees +- **Slashing Penalties**: Penalties for malicious behavior +- **Infrastructure Costs**: Operational expenses + +### Incentive Alignment + +The economic model is designed to align participant incentives with network goals: + +```python +class EconomicEngine: + def calculate_rewards(self, participant_id, performance_metrics): + base_reward = self.get_base_reward() + performance_multiplier = self.calculate_performance_multiplier(performance_metrics) + innovation_bonus = self.calculate_innovation_bonus(participant_id) + + total_reward = base_reward * performance_multiplier + innovation_bonus + return total_reward + + def apply_slashing(self, participant_id, violation_type): + # Slashing logic for violations + pass +``` + +## Network Topology + +### Physical Architecture + +#### Global Distribution +- Multiple geographic regions for redundancy +- Edge nodes for reduced latency +- Content delivery network integration + +#### Network Connectivity +```yaml +network_topology: + regions: + - us-east: + validators: 10 + miners: 100 + - eu-west: + validators: 8 + miners: 80 + - asia-pacific: + validators: 12 + miners: 120 + + connectivity: + inter_region_latency: <100ms + intra_region_latency: <10ms + bandwidth: >1Gbps +``` + +### Logical Architecture + +#### Service Mesh +- Microservices architecture for modularity +- Service discovery and registration +- Load balancing and failover +- Circuit breakers for resilience + +## Monitoring and Observability + +### System Metrics + +#### Network-Level Metrics +- Transaction throughput and latency +- Consensus timing and participation +- Network topology and connectivity +- Security incident detection + +#### Service-Level Metrics +- Request/response patterns +- Service quality scores +- Resource utilization +- Error rates and types + +### Alerting and Automation + +```python +class MonitoringSystem: + def __init__(self): + self.metrics_collector = MetricsCollector() + self.alert_manager = AlertManager() + self.auto_scaler = AutoScaler() + + def monitor_network_health(self): + metrics = self.metrics_collector.collect_all() + + if self.detect_anomaly(metrics): + self.alert_manager.send_alert() + self.auto_scaler.adjust_capacity() +``` + +## Future Architecture Evolution + +### Planned Enhancements + +1. **Layer 2 Integration**: State channels and sidechains +2. **Interoperability**: Cross-chain communication protocols +3. **AI Model Registry**: Decentralized model sharing and versioning +4. **Privacy Enhancements**: Zero-knowledge proofs and federated learning + +### Research Directions + +- Quantum-resistant cryptography +- Advanced consensus mechanisms +- AI-driven network optimization +- Sustainable computing practices + +## Conclusion + +The Bitsec-AI subnet architecture provides a robust, scalable, and secure foundation for decentralized AI services. Understanding these architectural principles is essential for effective participation as either a validator or miner. + +The architecture continues to evolve based on network needs, technological advances, and community feedback. Stay engaged with the development community to understand upcoming changes and improvements. \ No newline at end of file diff --git a/docs/subnet/monitoring.md b/docs/subnet/monitoring.md new file mode 100644 index 0000000..2e4d748 --- /dev/null +++ b/docs/subnet/monitoring.md @@ -0,0 +1,633 @@ +# Monitoring and Observability + +Comprehensive monitoring is essential for maintaining a healthy and efficient operation in the Bitsec-AI subnet. This guide covers monitoring strategies, tools, and best practices for both validators and miners. + +## Overview + +Effective monitoring provides: + +- **Real-time visibility** into system performance +- **Early warning** of potential issues +- **Performance optimization** insights +- **Compliance verification** with network requirements +- **Economic tracking** of earnings and costs + +## Monitoring Architecture + +### Three-Tier Monitoring Model + +```mermaid +graph TB + A[Infrastructure Monitoring] --> D[Centralized Dashboard] + B[Application Monitoring] --> D + C[Business Monitoring] --> D + D --> E[Alerting System] + D --> F[Analytics Engine] + E --> G[Incident Response] + F --> H[Optimization Decisions] +``` + +## Infrastructure Monitoring + +### System Metrics + +#### Core System Metrics + +```yaml +system_metrics: + cpu: + - utilization_percent + - load_average + - core_temperature + - frequency_scaling + + memory: + - usage_percent + - available_memory + - swap_usage + - memory_leaks + + disk: + - usage_percent + - io_operations_per_second + - read_write_latency + - disk_health_status + + network: + - bandwidth_utilization + - packet_loss_rate + - connection_count + - latency_metrics +``` + +#### GPU Monitoring (For Miners) + +```bash +# NVIDIA GPU monitoring script +#!/bin/bash + +while true; do + nvidia-smi --query-gpu=timestamp,name,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv,noheader,nounits >> gpu_metrics.log + sleep 10 +done +``` + +### Infrastructure Monitoring Tools + +#### System Monitoring with Prometheus + +```yaml +# prometheus.yml +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'node-exporter' + static_configs: + - targets: ['localhost:9100'] + + - job_name: 'gpu-exporter' + static_configs: + - targets: ['localhost:9101'] + + - job_name: 'validator-metrics' + static_configs: + - targets: ['localhost:8080'] +``` + +#### Grafana Dashboard Configuration + +```json +{ + "dashboard": { + "title": "Bitsec-AI Node Monitoring", + "panels": [ + { + "title": "CPU Usage", + "type": "graph", + "targets": [ + { + "expr": "100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)" + } + ] + }, + { + "title": "Memory Usage", + "type": "graph", + "targets": [ + { + "expr": "(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100" + } + ] + } + ] + } +} +``` + +## Application Monitoring + +### Validator Monitoring + +#### Key Validator Metrics + +```python +class ValidatorMetrics: + def __init__(self): + self.metrics = { + 'validation_requests_per_second': 0, + 'average_validation_time': 0, + 'consensus_participation_rate': 0, + 'reward_rate': 0, + 'network_connectivity_score': 0, + 'validator_rank': 0, + 'uptime_percentage': 0 + } + + def collect_metrics(self): + # Collect and update metrics + self.update_validation_metrics() + self.update_economic_metrics() + self.update_network_metrics() + + return self.metrics +``` + +#### Validator Health Checks + +```python +import asyncio +import aiohttp +from datetime import datetime + +class ValidatorHealthMonitor: + def __init__(self, validator_endpoints): + self.endpoints = validator_endpoints + self.health_status = {} + + async def health_check(self, endpoint): + try: + async with aiohttp.ClientSession() as session: + async with session.get(f"{endpoint}/health", timeout=5) as response: + if response.status == 200: + return {"status": "healthy", "timestamp": datetime.now()} + else: + return {"status": "unhealthy", "timestamp": datetime.now()} + except Exception as e: + return {"status": "error", "error": str(e), "timestamp": datetime.now()} + + async def monitor_all_validators(self): + tasks = [self.health_check(endpoint) for endpoint in self.endpoints] + results = await asyncio.gather(*tasks) + + for i, result in enumerate(results): + self.health_status[self.endpoints[i]] = result + + return self.health_status +``` + +### Miner Monitoring + +#### Key Miner Metrics + +```python +class MinerMetrics: + def __init__(self): + self.metrics = { + 'requests_processed_per_hour': 0, + 'average_response_time': 0, + 'accuracy_score': 0, + 'resource_utilization': { + 'cpu': 0, + 'memory': 0, + 'gpu': 0, + 'disk': 0 + }, + 'earnings_per_hour': 0, + 'rejection_rate': 0, + 'service_availability': 0 + } + + def collect_metrics(self): + # Implementation details + pass +``` + +#### Service Quality Monitoring + +```python +import time +from functools import wraps + +def monitor_service_quality(func): + @wraps(func) + async def wrapper(*args, **kwargs): + start_time = time.time() + + try: + result = await func(*args, **kwargs) + + # Calculate metrics + response_time = time.time() - start_time + success = True + + # Log metrics + await log_service_metrics({ + 'function': func.__name__, + 'response_time': response_time, + 'success': success, + 'timestamp': time.time() + }) + + return result + + except Exception as e: + # Log error metrics + await log_service_metrics({ + 'function': func.__name__, + 'response_time': time.time() - start_time, + 'success': False, + 'error': str(e), + 'timestamp': time.time() + }) + raise + + return wrapper +``` + +## Business Monitoring + +### Economic Metrics + +#### Revenue Tracking + +```python +class EconomicMonitor: + def __init__(self): + self.economic_metrics = { + 'daily_earnings': 0, + 'monthly_earnings': 0, + 'total_earnings': 0, + 'daily_costs': 0, + 'monthly_costs': 0, + 'profit_margin': 0, + 'roi_percentage': 0 + } + + def track_earnings(self, amount, timestamp): + # Track earnings over time + pass + + def track_costs(self, cost_type, amount, timestamp): + # Track various cost types + pass + + def calculate_profitability(self): + # Calculate profitability metrics + pass +``` + +#### Performance Benchmarking + +```python +class PerformanceBenchmark: + def __init__(self): + self.benchmarks = { + 'network_average_response_time': 0, + 'network_average_accuracy': 0, + 'network_average_uptime': 0, + 'top_performer_metrics': {}, + 'percentile_rankings': {} + } + + def compare_performance(self, my_metrics): + # Compare against network averages + performance_score = self.calculate_relative_performance(my_metrics) + return performance_score +``` + +## Alerting System + +### Alert Configuration + +```yaml +# alerting_rules.yml +groups: +- name: infrastructure_alerts + rules: + - alert: HighCPUUsage + expr: cpu_usage_percent > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "High CPU usage detected" + description: "CPU usage is above 80% for more than 5 minutes" + + - alert: LowDiskSpace + expr: disk_free_percent < 10 + for: 2m + labels: + severity: critical + annotations: + summary: "Low disk space" + description: "Disk space is below 10%" + +- name: application_alerts + rules: + - alert: ValidatorOffline + expr: validator_uptime_percent < 95 + for: 1m + labels: + severity: critical + annotations: + summary: "Validator offline" + description: "Validator uptime is below 95%" + + - alert: LowMiningRewards + expr: hourly_mining_rewards < mining_reward_threshold + for: 30m + labels: + severity: warning + annotations: + summary: "Low mining rewards" + description: "Mining rewards are below expected threshold" +``` + +### Alert Channels + +```python +class AlertManager: + def __init__(self): + self.channels = { + 'email': EmailChannel(), + 'slack': SlackChannel(), + 'telegram': TelegramChannel(), + 'sms': SMSChannel() + } + + async def send_alert(self, alert, severity): + message = self.format_alert_message(alert, severity) + + # Send to appropriate channels based on severity + if severity == 'critical': + await self.send_to_all_channels(message) + elif severity == 'warning': + await self.send_to_primary_channels(message) + else: + await self.send_to_log_channel(message) +``` + +## Logging and Observability + +### Structured Logging + +```python +import logging +import json +from datetime import datetime + +class StructuredLogger: + def __init__(self, service_name): + self.service_name = service_name + self.logger = logging.getLogger(service_name) + + # Configure JSON formatter + handler = logging.StreamHandler() + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + self.logger.addHandler(handler) + self.logger.setLevel(logging.INFO) + + def log(self, level, message, **kwargs): + log_entry = { + 'timestamp': datetime.utcnow().isoformat(), + 'service': self.service_name, + 'level': level, + 'message': message, + **kwargs + } + + if level == 'error': + self.logger.error(json.dumps(log_entry)) + elif level == 'warning': + self.logger.warning(json.dumps(log_entry)) + else: + self.logger.info(json.dumps(log_entry)) +``` + +### Distributed Tracing + +```python +from opentelemetry import trace +from opentelemetry.exporter.jaeger.thrift import JaegerExporter +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + +class DistributedTracing: + def __init__(self): + # Set up Jaeger exporter + jaeger_exporter = JaegerExporter( + agent_host_name="jaeger-agent", + agent_port=6831, + ) + + # Set up tracer provider + trace.set_tracer_provider(TracerProvider()) + tracer_provider = trace.get_tracer_provider() + + # Add span processor + span_processor = BatchSpanProcessor(jaeger_exporter) + tracer_provider.add_span_processor(span_processor) + + self.tracer = trace.get_tracer(__name__) + + def trace_operation(self, operation_name): + def decorator(func): + def wrapper(*args, **kwargs): + with self.tracer.start_as_current_span(operation_name): + return func(*args, **kwargs) + return wrapper + return decorator +``` + +## Monitoring Tools and Setup + +### Tool Stack Recommendations + +#### Open Source Stack +```yaml +monitoring_stack: + metrics_collection: + - prometheus + - node_exporter + - gpu_exporter + + visualization: + - grafana + - kibana + + logging: + - elasticsearch + - logstash + - fluentd + + alerting: + - alertmanager + - pushover + + tracing: + - jaeger + - zipkin +``` + +#### Commercial Alternatives +- DataDog +- New Relic +- Splunk +- Elastic Cloud + +### Installation Scripts + +#### Docker Compose Setup + +```yaml +# docker-compose.monitoring.yml +version: '3.8' + +services: + prometheus: + image: prom/prometheus:latest + ports: + - "9090:9090" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + - ./alerting_rules.yml:/etc/prometheus/alerting_rules.yml + + grafana: + image: grafana/grafana:latest + ports: + - "3000:3000" + volumes: + - grafana-storage:/var/lib/grafana + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + + node-exporter: + image: prom/node-exporter:latest + ports: + - "9100:9100" + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + + alertmanager: + image: prom/alertmanager:latest + ports: + - "9093:9093" + volumes: + - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml + +volumes: + grafana-storage: +``` + +### Automated Setup Script + +```bash +#!/bin/bash +# setup_monitoring.sh + +echo "Setting up Bitsec-AI monitoring stack..." + +# Install Docker and Docker Compose +sudo apt update +sudo apt install -y docker.io docker-compose + +# Create monitoring directory +mkdir -p ~/bitsec-monitoring +cd ~/bitsec-monitoring + +# Download configuration files +wget https://raw.githubusercontent.com/Bitsec-AI/docs/main/monitoring/docker-compose.yml +wget https://raw.githubusercontent.com/Bitsec-AI/docs/main/monitoring/prometheus.yml +wget https://raw.githubusercontent.com/Bitsec-AI/docs/main/monitoring/alerting_rules.yml + +# Start monitoring stack +docker-compose up -d + +echo "Monitoring stack started!" +echo "Grafana: http://localhost:3000 (admin/admin)" +echo "Prometheus: http://localhost:9090" +echo "Alertmanager: http://localhost:9093" +``` + +## Best Practices + +### Monitoring Best Practices + +1. **Monitor What Matters** + - Focus on user-facing metrics + - Monitor business KPIs + - Track resource utilization + - Measure service quality + +2. **Set Appropriate Thresholds** + - Use historical data to set baselines + - Account for normal variations + - Implement graduated alerting + +3. **Reduce Alert Fatigue** + - Implement smart alerting rules + - Use alert correlation + - Provide actionable information + +4. **Regular Review and Optimization** + - Review metrics and alerts monthly + - Update thresholds based on trends + - Remove obsolete monitoring + +### Security Considerations + +1. **Secure Monitoring Infrastructure** + - Use HTTPS for all communications + - Implement authentication and authorization + - Regularly update monitoring tools + +2. **Data Privacy** + - Avoid logging sensitive information + - Implement data retention policies + - Anonymize personal data + +3. **Access Control** + - Implement role-based access + - Use principle of least privilege + - Regular access review + +## Troubleshooting Monitoring Issues + +### Common Monitoring Problems + +1. **High Cardinality Metrics** + ```bash + # Check metric cardinality + curl -s 'http://localhost:9090/api/v1/label/__name__/values' | jq '.data | length' + ``` + +2. **Missing Metrics** + ```bash + # Check if exporters are running + curl -s http://localhost:9100/metrics | head -10 + ``` + +3. **Alert Fatigue** + - Review alert frequency + - Implement alert suppression + - Use smarter alert rules + +## Conclusion + +Effective monitoring is crucial for successful operation in the Bitsec-AI subnet. Implement comprehensive monitoring covering infrastructure, applications, and business metrics. Use appropriate tools and follow best practices to maintain high availability and optimal performance. + +Regular review and optimization of your monitoring setup ensures it continues to provide value and helps you stay competitive in the dynamic Bittensor ecosystem. \ No newline at end of file diff --git a/docs/subnet/troubleshooting.md b/docs/subnet/troubleshooting.md new file mode 100644 index 0000000..5798445 --- /dev/null +++ b/docs/subnet/troubleshooting.md @@ -0,0 +1,737 @@ +# Troubleshooting Guide + +This comprehensive troubleshooting guide helps resolve common issues encountered when operating on the Bitsec-AI subnet, covering both validator and miner operations. + +## General Troubleshooting Approach + +### Systematic Problem Resolution + +1. **Identify the Problem** + - Gather symptoms and error messages + - Determine scope (single node vs. network-wide) + - Check timing (when did it start?) + +2. **Gather Information** + - Check logs and metrics + - Verify system resources + - Test network connectivity + +3. **Isolate the Cause** + - Rule out common causes + - Test components individually + - Use diagnostic tools + +4. **Implement Solution** + - Apply targeted fixes + - Monitor for resolution + - Document the solution + +## Common Issues and Solutions + +### Connection and Network Issues + +#### Issue: Cannot Connect to Subnet + +**Symptoms:** +```bash +ERROR: Connection refused to subnet endpoint +ERROR: Timeout connecting to validator +``` + +**Diagnosis:** +```bash +# Check network connectivity +ping api.bitsec.ai + +# Test specific ports +telnet api.bitsec.ai 8080 + +# Check DNS resolution +nslookup api.bitsec.ai + +# Verify firewall settings +sudo ufw status verbose +``` + +**Solutions:** + +1. **Firewall Configuration** + ```bash + # Open required ports + sudo ufw allow 8080/tcp + sudo ufw allow 9946/tcp + sudo ufw reload + ``` + +2. **Network Configuration** + ```bash + # Check network interface + ip addr show + + # Test routing + traceroute api.bitsec.ai + + # Check for proxy settings + echo $http_proxy + echo $https_proxy + ``` + +3. **DNS Issues** + ```bash + # Use alternative DNS + echo "nameserver 8.8.8.8" | sudo tee /etc/resolv.conf + + # Flush DNS cache + sudo systemctl flush-dns + ``` + +#### Issue: High Network Latency + +**Symptoms:** +- Slow response times +- Timeout errors +- Poor performance metrics + +**Diagnosis:** +```bash +# Measure latency +ping -c 10 api.bitsec.ai + +# Check network quality +mtr api.bitsec.ai + +# Monitor bandwidth usage +iftop +``` + +**Solutions:** + +1. **Network Optimization** + ```bash + # Optimize TCP settings + echo 'net.core.rmem_max = 16777216' | sudo tee -a /etc/sysctl.conf + echo 'net.core.wmem_max = 16777216' | sudo tee -a /etc/sysctl.conf + echo 'net.ipv4.tcp_rmem = 4096 87380 16777216' | sudo tee -a /etc/sysctl.conf + sudo sysctl -p + ``` + +2. **Use Closer Endpoints** + ```yaml + # Update configuration to use regional endpoints + network: + primary_endpoint: "us-east.api.bitsec.ai" + fallback_endpoints: + - "us-west.api.bitsec.ai" + - "eu.api.bitsec.ai" + ``` + +### Validator-Specific Issues + +#### Issue: Validator Not Participating in Consensus + +**Symptoms:** +```bash +WARNING: Validator not included in consensus round +ERROR: Consensus participation rate below threshold +``` + +**Diagnosis:** +```bash +# Check validator registration +btcli subnet list --netuid 1 + +# Verify validator status +btcli wallet overview --wallet.name my_validator + +# Check validator logs +tail -f logs/validator.log + +# Monitor consensus participation +grep "consensus" logs/validator.log | tail -20 +``` + +**Solutions:** + +1. **Re-register Validator** + ```bash + btcli subnet register --wallet.name my_validator --wallet.hotkey my_hotkey --netuid 1 + ``` + +2. **Check Stake Requirements** + ```bash + # Verify minimum stake + btcli wallet balance --wallet.name my_validator + + # Check stake on network + btcli wallet overview --wallet.name my_validator + ``` + +3. **Synchronization Issues** + ```bash + # Restart validator + sudo systemctl restart validator + + # Force resync + python validator.py --resync --config config/validator.yaml + ``` + +#### Issue: Low Validation Accuracy + +**Symptoms:** +- Poor performance metrics +- Low rewards +- Negative feedback from network + +**Diagnosis:** +```python +# Check validation logic +def diagnose_validation_accuracy(): + # Review recent validations + recent_validations = get_recent_validations(limit=100) + + # Calculate accuracy metrics + accuracy_metrics = calculate_accuracy_metrics(recent_validations) + + # Compare with network average + network_average = get_network_average_accuracy() + + return { + 'current_accuracy': accuracy_metrics, + 'network_average': network_average, + 'improvement_needed': network_average - accuracy_metrics + } +``` + +**Solutions:** + +1. **Update Validation Logic** + ```python + # Implement improved validation algorithms + class ImprovedValidator: + def __init__(self): + self.validation_models = load_latest_models() + self.benchmark_data = load_benchmark_data() + + def validate_output(self, miner_output): + # Use multiple validation methods + scores = [] + for model in self.validation_models: + score = model.evaluate(miner_output) + scores.append(score) + + return np.mean(scores) + ``` + +2. **Calibrate Thresholds** + ```yaml + # Update validation thresholds + validation: + quality_threshold: 0.85 + performance_threshold: 0.80 + innovation_threshold: 0.75 + ``` + +### Miner-Specific Issues + +#### Issue: Low Mining Rewards + +**Symptoms:** +- Earnings below expectations +- Few successful validations +- Poor ranking in subnet + +**Diagnosis:** +```bash +# Check mining performance +python -c " +import json +with open('logs/miner_metrics.json', 'r') as f: + metrics = json.load(f) + print(f'Success rate: {metrics[\"success_rate\"]}') + print(f'Average response time: {metrics[\"avg_response_time\"]}') + print(f'Quality score: {metrics[\"quality_score\"]}') +" + +# Compare with network stats +btcli subnet list --netuid 1 | grep -A 5 -B 5 $(btcli wallet overview --wallet.name my_miner | grep "Hotkey" | cut -d: -f2) +``` + +**Solutions:** + +1. **Optimize Performance** + ```python + # Implement performance optimizations + class OptimizedMiner: + def __init__(self): + self.model = load_optimized_model() + self.cache = LRUCache(maxsize=1000) + + def process_request(self, request): + # Check cache first + cache_key = hash(str(request)) + if cache_key in self.cache: + return self.cache[cache_key] + + # Process and cache result + result = self.model.process(request) + self.cache[cache_key] = result + return result + ``` + +2. **Improve Model Quality** + ```bash + # Update to latest models + pip install --upgrade your-ai-models + + # Retrain with recent data + python train_model.py --data recent_training_data.json + ``` + +#### Issue: GPU/Hardware Issues + +**Symptoms:** +```bash +ERROR: CUDA out of memory +ERROR: GPU not detected +WARNING: High GPU temperature +``` + +**Diagnosis:** +```bash +# Check GPU status +nvidia-smi + +# Monitor GPU temperature +watch -n 1 nvidia-smi + +# Check CUDA installation +python -c "import torch; print(torch.cuda.is_available())" + +# Check memory usage +nvidia-smi --query-gpu=memory.used,memory.total --format=csv +``` + +**Solutions:** + +1. **Memory Management** + ```python + # Implement memory optimization + import torch + + def optimize_gpu_memory(): + # Clear cache + torch.cuda.empty_cache() + + # Set memory fraction + torch.cuda.set_per_process_memory_fraction(0.8) + + # Use gradient checkpointing + torch.utils.checkpoint.checkpoint_sequential() + ``` + +2. **Temperature Management** + ```bash + # Check cooling system + sensors + + # Reduce GPU power limit + sudo nvidia-smi -pl 200 # 200W limit + + # Improve ventilation + # Ensure proper airflow in server room + ``` + +3. **Driver Issues** + ```bash + # Update NVIDIA drivers + sudo apt update + sudo apt install nvidia-driver-525 + sudo reboot + + # Reinstall CUDA toolkit + sudo apt install nvidia-cuda-toolkit + ``` + +### Authentication and Wallet Issues + +#### Issue: Wallet Authentication Failures + +**Symptoms:** +```bash +ERROR: Invalid wallet credentials +ERROR: Hotkey not found +ERROR: Insufficient permissions +``` + +**Diagnosis:** +```bash +# Check wallet files +ls -la ~/.bittensor/wallets/ + +# Verify wallet integrity +btcli wallet check --wallet.name my_wallet + +# Test wallet access +btcli wallet balance --wallet.name my_wallet +``` + +**Solutions:** + +1. **Wallet Recovery** + ```bash + # Restore from mnemonic + btcli wallet regen_coldkey --wallet.name my_wallet --mnemonic "your mnemonic phrase" + + # Restore hotkey + btcli wallet regen_hotkey --wallet.name my_wallet --wallet.hotkey my_hotkey + ``` + +2. **Permission Issues** + ```bash + # Fix file permissions + chmod 600 ~/.bittensor/wallets/my_wallet/coldkey + chmod 600 ~/.bittensor/wallets/my_wallet/hotkeys/my_hotkey + + # Check ownership + chown -R $USER:$USER ~/.bittensor/wallets/ + ``` + +### Performance and Resource Issues + +#### Issue: High CPU/Memory Usage + +**Symptoms:** +- System slowdown +- Out of memory errors +- High load averages + +**Diagnosis:** +```bash +# Monitor resource usage +top -p $(pgrep -f "validator\|miner") + +# Check memory usage +free -h + +# Analyze process memory +ps aux --sort=-%mem | head + +# Check for memory leaks +valgrind --tool=memcheck --leak-check=full python miner.py +``` + +**Solutions:** + +1. **Resource Optimization** + ```python + # Implement resource monitoring + import psutil + import gc + + class ResourceMonitor: + def __init__(self, memory_limit_gb=8): + self.memory_limit = memory_limit_gb * 1024 * 1024 * 1024 + + def check_resources(self): + # Check memory usage + memory_usage = psutil.virtual_memory().used + if memory_usage > self.memory_limit: + self.cleanup_memory() + + # Check CPU usage + cpu_usage = psutil.cpu_percent(interval=1) + if cpu_usage > 90: + self.reduce_load() + + def cleanup_memory(self): + gc.collect() + torch.cuda.empty_cache() + ``` + +2. **Configuration Tuning** + ```yaml + # Optimize configuration + performance: + batch_size: 16 # Reduce if memory issues + max_workers: 4 # Limit concurrent operations + memory_limit_gb: 8 + + logging: + level: "WARNING" # Reduce logging overhead + max_file_size: "100MB" + ``` + +### Database and Storage Issues + +#### Issue: Database Corruption + +**Symptoms:** +```bash +ERROR: Database connection failed +ERROR: Corrupted index file +WARNING: Inconsistent data detected +``` + +**Diagnosis:** +```bash +# Check database integrity +sqlite3 database.db "PRAGMA integrity_check;" + +# Check disk space +df -h + +# Check file system errors +sudo fsck /dev/sda1 + +# Monitor I/O operations +iotop +``` + +**Solutions:** + +1. **Database Recovery** + ```bash + # Backup current database + cp database.db database.db.backup + + # Attempt repair + sqlite3 database.db ".recover" | sqlite3 database_recovered.db + + # Replace with recovered version + mv database_recovered.db database.db + ``` + +2. **Prevent Future Corruption** + ```python + # Implement database health monitoring + import sqlite3 + + class DatabaseMonitor: + def __init__(self, db_path): + self.db_path = db_path + + def health_check(self): + try: + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + cursor.execute("PRAGMA integrity_check;") + result = cursor.fetchone() + conn.close() + + return result[0] == "ok" + except Exception as e: + return False + + def backup_database(self): + # Implement regular backups + pass + ``` + +## Diagnostic Tools and Scripts + +### Comprehensive Health Check Script + +```bash +#!/bin/bash +# health_check.sh + +echo "=== Bitsec-AI Node Health Check ===" + +# System health +echo "1. System Resources:" +echo " CPU: $(top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | cut -d'%' -f1)% used" +echo " Memory: $(free | grep Mem | awk '{printf("%.1f%%", $3/$2 * 100.0)}')" +echo " Disk: $(df -h / | awk 'NR==2{printf "%s", $5}')" + +# Network connectivity +echo "2. Network Connectivity:" +if ping -c 1 api.bitsec.ai &> /dev/null; then + echo " ✓ Internet connectivity: OK" +else + echo " ✗ Internet connectivity: FAILED" +fi + +# Service status +echo "3. Service Status:" +if pgrep -f "validator\|miner" > /dev/null; then + echo " ✓ Node process: Running" +else + echo " ✗ Node process: Not running" +fi + +# Wallet status +echo "4. Wallet Status:" +if btcli wallet balance --wallet.name my_wallet &> /dev/null; then + echo " ✓ Wallet access: OK" +else + echo " ✗ Wallet access: FAILED" +fi + +echo "=== Health Check Complete ===" +``` + +### Log Analysis Script + +```python +#!/usr/bin/env python3 +# analyze_logs.py + +import re +import json +from datetime import datetime, timedelta +from collections import defaultdict + +class LogAnalyzer: + def __init__(self, log_file): + self.log_file = log_file + self.error_patterns = { + 'connection_errors': r'Connection.*failed|Timeout|Connection refused', + 'memory_errors': r'out of memory|MemoryError|OOM', + 'gpu_errors': r'CUDA.*error|GPU.*error', + 'wallet_errors': r'wallet.*error|authentication.*failed' + } + + def analyze(self): + errors = defaultdict(int) + recent_errors = [] + + with open(self.log_file, 'r') as f: + for line in f: + for error_type, pattern in self.error_patterns.items(): + if re.search(pattern, line, re.IGNORECASE): + errors[error_type] += 1 + + # Extract timestamp and add to recent errors + if 'ERROR' in line: + recent_errors.append({ + 'timestamp': self.extract_timestamp(line), + 'type': error_type, + 'message': line.strip() + }) + + return { + 'error_summary': dict(errors), + 'recent_errors': recent_errors[-10:], # Last 10 errors + 'recommendations': self.get_recommendations(errors) + } + + def get_recommendations(self, errors): + recommendations = [] + + if errors['connection_errors'] > 10: + recommendations.append("Check network connectivity and firewall settings") + + if errors['memory_errors'] > 5: + recommendations.append("Consider reducing batch size or adding more RAM") + + if errors['gpu_errors'] > 3: + recommendations.append("Check GPU drivers and CUDA installation") + + return recommendations + +if __name__ == "__main__": + analyzer = LogAnalyzer('logs/node.log') + results = analyzer.analyze() + print(json.dumps(results, indent=2)) +``` + +## Advanced Troubleshooting + +### Remote Debugging + +```python +# Enable remote debugging for complex issues +import pdb +import sys + +def enable_remote_debugging(): + """Enable remote debugging capability""" + try: + import remote_pdb + remote_pdb.set_trace() + except ImportError: + print("Install remote-pdb for remote debugging") + pdb.set_trace() + +# Usage in your code +if DEBUG_MODE: + enable_remote_debugging() +``` + +### Performance Profiling + +```python +import cProfile +import pstats +from functools import wraps + +def profile_performance(func): + @wraps(func) + def wrapper(*args, **kwargs): + profiler = cProfile.Profile() + profiler.enable() + + result = func(*args, **kwargs) + + profiler.disable() + stats = pstats.Stats(profiler) + stats.sort_stats('cumulative') + stats.print_stats(10) # Top 10 functions + + return result + return wrapper + +# Usage +@profile_performance +def mining_function(): + # Your mining logic + pass +``` + +## Getting Help + +### Support Channels + +1. **Community Forums** + - Discord: [Bitsec-AI Discord](https://discord.gg/bitsec-ai) + - Telegram: [Bitsec-AI Telegram](https://t.me/bitsec_ai) + +2. **Documentation** + - GitHub: [Bitsec-AI/docs](https://github.com/Bitsec-AI/docs) + - Official Docs: [docs.bitsec.ai](https://docs.bitsec.ai) + +3. **Technical Support** + - Create GitHub issue with: + - Detailed problem description + - Error messages and logs + - System configuration + - Steps to reproduce + +### Reporting Issues + +When reporting issues, include: + +```bash +# System information +uname -a +python --version +pip list | grep -E "(torch|bittensor|numpy)" + +# Hardware information +lscpu +free -h +nvidia-smi # if using GPU + +# Network configuration +ip addr show +netstat -tuln | grep -E "(8080|9946)" + +# Error logs (last 50 lines) +tail -50 logs/node.log +``` + +## Conclusion + +Effective troubleshooting requires a systematic approach and good understanding of the system components. Use the diagnostic tools and scripts provided, and don't hesitate to reach out to the community for help with complex issues. + +Keep your logs detailed, monitor your systems proactively, and maintain good documentation of any custom configurations or modifications you've made. \ No newline at end of file diff --git a/docs/validators/best-practices.md b/docs/validators/best-practices.md new file mode 100644 index 0000000..b35351f --- /dev/null +++ b/docs/validators/best-practices.md @@ -0,0 +1,223 @@ +# Validator Best Practices + +This guide covers best practices for running a successful validator on the Bitsec-AI subnet. + +## Performance Optimization + +### Hardware Recommendations + +**Minimum Setup:** +- 16GB RAM +- 4 CPU cores +- 100GB SSD storage +- 100 Mbps internet connection + +**Recommended Setup:** +- 32GB+ RAM +- 8+ CPU cores +- 500GB+ NVMe SSD +- 1 Gbps internet connection +- Uninterruptible Power Supply (UPS) + +### Network Configuration + +1. **Port Configuration** + ```bash + # Ensure required ports are open + sudo ufw allow 8080/tcp + sudo ufw allow 9946/tcp + ``` + +2. **Firewall Settings** + - Allow inbound connections on validator port + - Restrict SSH access to specific IPs + - Enable DDoS protection if available + +## Operational Excellence + +### Monitoring + +Set up comprehensive monitoring for: + +- **System Metrics**: CPU, memory, disk usage +- **Network Metrics**: Bandwidth, latency, packet loss +- **Validator Metrics**: Uptime, validation accuracy, earnings +- **Security Metrics**: Failed login attempts, unusual activity + +### Maintenance Schedule + +**Daily:** +- Check validator status and logs +- Monitor resource usage +- Verify network connectivity + +**Weekly:** +- Review earnings and performance metrics +- Update system packages +- Check for software updates + +**Monthly:** +- Perform full system backup +- Review security configurations +- Analyze long-term performance trends + +## Security Best Practices + +### Wallet Security + +1. **Cold Storage**: Keep the majority of funds in cold storage +2. **Hot Wallet Management**: Use minimal funds in active validator wallets +3. **Key Rotation**: Regularly rotate hotkeys when possible +4. **Backup Strategy**: Maintain secure, offline backups of all keys + +### System Security + +1. **Access Control** + ```bash + # Disable root login + sudo sed -i 's/PermitRootLogin yes/PermitRootLogin no/' /etc/ssh/sshd_config + + # Use key-based authentication only + sudo sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config + ``` + +2. **System Updates** + ```bash + # Enable automatic security updates + sudo apt install unattended-upgrades + sudo dpkg-reconfigure -plow unattended-upgrades + ``` + +3. **Firewall Configuration** + ```bash + # Basic UFW setup + sudo ufw default deny incoming + sudo ufw default allow outgoing + sudo ufw allow ssh + sudo ufw allow 8080/tcp + sudo ufw enable + ``` + +## Performance Optimization + +### Validator Configuration + +Optimize your validator configuration: + +```yaml +# validator.yaml +performance: + batch_size: 32 + max_concurrent_requests: 10 + timeout_seconds: 30 + +network: + max_connections: 100 + keepalive_timeout: 60 + +logging: + level: "INFO" + rotation: "daily" + max_size: "100MB" +``` + +### Resource Management + +1. **CPU Optimization** + - Set CPU affinity for validator process + - Use appropriate nice levels + - Monitor CPU thermal throttling + +2. **Memory Management** + - Configure swap appropriately + - Monitor memory leaks + - Use memory-mapped files when possible + +3. **Disk I/O** + - Use SSDs for database storage + - Implement proper log rotation + - Monitor disk health regularly + +## Troubleshooting Common Issues + +### Connection Problems + +If experiencing network issues: + +```bash +# Check network connectivity +ping -c 4 8.8.8.8 + +# Verify port accessibility +telnet your-validator-ip 8080 + +# Check firewall status +sudo ufw status verbose +``` + +### Performance Issues + +For performance problems: + +```bash +# Monitor system resources +htop +iotop -o + +# Check validator logs +tail -f logs/validator.log + +# Analyze network latency +mtr google.com +``` + +### Synchronization Issues + +If validator falls out of sync: + +```bash +# Restart validator service +sudo systemctl restart validator + +# Check blockchain sync status +btcli subnet list + +# Verify wallet registration +btcli wallet overview --wallet.name my_validator +``` + +## Economic Optimization + +### Staking Strategy + +1. **Stake Management**: Maintain optimal stake levels +2. **Risk Assessment**: Understand slashing conditions +3. **Reward Optimization**: Maximize validation efficiency + +### Cost Management + +1. **Infrastructure Costs**: Optimize hardware and hosting expenses +2. **Operational Costs**: Automate routine tasks +3. **Tax Considerations**: Maintain proper records for tax purposes + +## Community Engagement + +### Best Practices + +1. **Stay Informed**: Follow official announcements and updates +2. **Participate**: Engage in community discussions and governance +3. **Share Knowledge**: Contribute to documentation and help others +4. **Report Issues**: Help improve the network by reporting bugs + +### Resources + +- Official Discord/Telegram channels +- GitHub repositories and issue trackers +- Community forums and discussion boards +- Technical documentation and guides + +## Conclusion + +Following these best practices will help ensure your validator operates efficiently, securely, and profitably. Regular monitoring, proper security measures, and active community participation are key to long-term success. + +Remember: The Bittensor network evolves rapidly. Stay updated with the latest developments and adapt your practices accordingly. \ No newline at end of file diff --git a/docs/validators/introduction.md b/docs/validators/introduction.md new file mode 100644 index 0000000..6907a2e --- /dev/null +++ b/docs/validators/introduction.md @@ -0,0 +1,64 @@ +# Validator Introduction + +Validators play a crucial role in the Bittensor network by evaluating and validating the work performed by miners. As a validator operator, you're responsible for maintaining network integrity and ensuring quality standards. + +## What Validators Do + +Validators: + +- **Evaluate Miners**: Assess the quality and accuracy of miner outputs +- **Maintain Consensus**: Participate in network consensus mechanisms +- **Secure the Network**: Help prevent malicious activities and ensure network stability +- **Earn Rewards**: Receive TAO tokens for honest validation work + +## Key Responsibilities + +### Network Participation +- Run validator nodes with high uptime +- Participate in validation rounds +- Maintain updated software versions + +### Quality Assurance +- Implement fair and consistent evaluation criteria +- Monitor miner performance objectively +- Report suspicious or malicious behavior + +### Resource Management +- Ensure adequate computational resources +- Manage network bandwidth efficiently +- Monitor system health and performance + +## Validator Economics + +### Rewards +- Base rewards for participation +- Performance bonuses for consistent uptime +- Additional rewards for high-quality validations + +### Costs +- Hardware and infrastructure costs +- Network bandwidth requirements +- Electricity and maintenance expenses + +## Getting Started + +1. **[Setup Guide](setup.md)** - Configure your validator node +2. **[Best Practices](best-practices.md)** - Learn optimization techniques +3. **[Monitoring](../subnet/monitoring.md)** - Set up monitoring and alerts + +## Prerequisites + +Before setting up a validator: + +- Complete the [installation guide](../getting-started/installation.md) +- Have sufficient TAO tokens for staking +- Ensure reliable internet connection and hardware +- Understand the economic model and risks involved + +## Support + +For validator-specific support: + +- Review our [troubleshooting guide](../subnet/troubleshooting.md) +- Check the [best practices](best-practices.md) section +- Join the validator community discussions \ No newline at end of file diff --git a/docs/validators/setup.md b/docs/validators/setup.md new file mode 100644 index 0000000..629b323 --- /dev/null +++ b/docs/validators/setup.md @@ -0,0 +1,90 @@ +# Validator Setup Guide + +This guide will walk you through setting up a validator node for the Bitsec-AI subnet on the Bittensor network. + +## Prerequisites + +Before starting: + +- Complete the [installation guide](../getting-started/installation.md) +- Have at least 1000 TAO tokens for staking (recommended) +- Ensure your system meets the hardware requirements +- Have a stable internet connection + +## Step 1: Environment Setup + +Create a dedicated directory for your validator: + +```bash +mkdir ~/bitsec-validator +cd ~/bitsec-validator +``` + +## Step 2: Configuration + +Create your validator configuration file: + +```bash +cp config/validator.example.yaml config/validator.yaml +``` + +Edit the configuration file with your specific settings: + +```yaml +# validator.yaml +validator: + netuid: 1 # Adjust based on your subnet + wallet_name: "my_validator" + hotkey_name: "my_hotkey" + +network: + port: 8080 + external_ip: "your.external.ip" + +logging: + level: "INFO" + file: "logs/validator.log" +``` + +## Step 3: Wallet Setup + +Create and configure your validator wallet: + +```bash +btcli wallet new_coldkey --wallet.name my_validator +btcli wallet new_hotkey --wallet.name my_validator --wallet.hotkey my_hotkey +``` + +## Step 4: Registration + +Register your validator on the network: + +```bash +btcli subnet register --wallet.name my_validator --wallet.hotkey my_hotkey --netuid 1 +``` + +## Step 5: Start Validator + +Launch your validator node: + +```bash +python validator.py --config config/validator.yaml +``` + +## Monitoring + +Monitor your validator performance: + +```bash +# Check validator status +btcli wallet overview --wallet.name my_validator + +# View logs +tail -f logs/validator.log +``` + +## Next Steps + +- Set up [monitoring and alerts](../subnet/monitoring.md) +- Review [best practices](best-practices.md) +- Join the validator community for support \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..b076be1 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,75 @@ +site_name: Bitsec-AI Documentation +site_url: https://docs.bitsec.ai +site_description: Documentation for Bittensor subnet operators including validators and miners +site_author: Bitsec-AI + +repo_name: Bitsec-AI/docs +repo_url: https://github.com/Bitsec-AI/docs + +theme: + name: material + palette: + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + scheme: default + primary: blue + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: blue + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to light mode + features: + - navigation.tabs + - navigation.sections + - navigation.expand + - navigation.path + - navigation.top + - search.highlight + - search.share + - content.code.copy + - content.code.annotate + +nav: + - Home: index.md + - Getting Started: + - Overview: getting-started/overview.md + - Installation: getting-started/installation.md + - For Validators: + - Introduction: validators/introduction.md + - Setup Guide: validators/setup.md + - Best Practices: validators/best-practices.md + - For Miners: + - Introduction: miners/introduction.md + - Setup Guide: miners/setup.md + - Optimization: miners/optimization.md + - Subnet Operations: + - Architecture: subnet/architecture.md + - Monitoring: subnet/monitoring.md + - Troubleshooting: subnet/troubleshooting.md + - API Reference: + - Overview: api/overview.md + +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.tabbed: + alternate_style: true + - attr_list + - md_in_html + +plugins: + - search \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..19c9b95 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +mkdocs>=1.5.0 +mkdocs-material>=9.4.0 +pymdown-extensions>=10.3.0 \ No newline at end of file