# üöÄ Heady Colab Protocol - GPU Optimized

This notebook leverages Google Colab's GPU resources with **intelligent optimization** to run the Heady Admin Console and its AI capabilities efficiently.

## üéØ Features
- ‚úÖ **Automatic GPU Detection & Optimization**
- ‚úÖ **Memory Management & Cleanup**
- ‚úÖ **Real-time GPU Monitoring**
- ‚úÖ **Smart Model Loading**
- ‚úÖ **Ngrok Tunneling for Remote Access**

## üîß Step 1: Verify GPU Availability

In [None]:
# Check GPU availability and detailed info
import torch
import psutil
import sys
from datetime import datetime

print("üîç Heady GPU Detection & Analysis")
print("=" * 50)
print(f"‚è∞ Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"üêç Python: {sys.version}")
print(f"üî• PyTorch: {torch.__version__}")
print()

# GPU Detection
if torch.cuda.is_available():
    device_count = torch.cuda.device_count()
    current_device = torch.cuda.current_device()
    
    print(f"‚úÖ GPU Available: {device_count} device(s)")
    print(f"üéØ Current Device: {current_device}")
    print(f"üìõ GPU Name: {torch.cuda.get_device_name(current_device)}")
    
    # Memory info
    total_memory = torch.cuda.get_device_properties(current_device).total_memory
    allocated_memory = torch.cuda.memory_allocated(current_device)
    cached_memory = torch.cuda.memory_reserved(current_device)
    free_memory = total_memory - allocated_memory
    
    print(f"üíæ Total Memory: {total_memory / 1024**3:.2f} GB")
    print(f"üìä Allocated: {allocated_memory / 1024**3:.2f} GB")
    print(f"üóÇÔ∏è Cached: {cached_memory / 1024**3:.2f} GB")
    print(f"‚ú® Free: {free_memory / 1024**3:.2f} GB")
    print(f"üìà Usage: {(allocated_memory / total_memory * 100):.1f}%")
    
    # Try to get more detailed GPU info
    try:
        import pynvml
        pynvml.nvmlInit()
        handle = pynvml.nvmlDeviceGetHandleByIndex(current_device)
        
        # Temperature
        temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
        print(f"üå°Ô∏è Temperature: {temp}¬∞C")
        
        # Power usage
        power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0
        print(f"‚ö° Power Usage: {power:.1f}W")
        
        # Utilization
        util = pynvml.nvmlDeviceGetUtilizationRates(handle)
        print(f"üìä GPU Utilization: {util.gpu}%")
        print(f"üíæ Memory Utilization: {util.memory}%")
        
    except ImportError:
        print("üí° Install pynvml for detailed GPU metrics: !pip install pynvml")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not get detailed GPU info: {e}")
        
else:
    print("‚ùå GPU not available")
    print("üí° Enable GPU: Runtime > Change runtime type > GPU")

# System Memory
system_memory = psutil.virtual_memory()
print(f"\nüíª System Memory: {system_memory.total / 1024**3:.1f} GB")
print(f"üìä System Memory Usage: {system_memory.percent:.1f}%")
print(f"‚ú® Available: {system_memory.available / 1024**3:.1f} GB")

## üîß Step 2: Install Dependencies & GPU Optimizer

In [None]:
# Install required packages with GPU optimizations
print("üì¶ Installing Heady dependencies...")

# Core dependencies
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 -q
!pip install transformers accelerate psutil -q

# Optional: Enhanced GPU monitoring
!pip install pynvml -q

# Heady dependencies
!pip install flask flask-cors requests -q

print("‚úÖ Dependencies installed successfully!")

# Clone Heady repository
import os
if not os.path.exists('HeadyMonorepo'):
    !git clone https://github.com/HeadySystems/HeadyMonorepo.git
    %cd HeadyMonorepo
else:
    %cd HeadyMonorepo
    !git pull

print("üìÅ Heady repository ready!")

## üß† Step 3: Initialize GPU Optimizer

In [None]:
# Initialize the GPU optimizer
import sys
sys.path.append('backend/python_worker')

from heady_project.gpu_optimizer import gpu_optimizer
import time

print("üöÄ Initializing Heady GPU Optimizer...")
print("=" * 50)

# Get initial GPU status
gpu_optimizer.print_gpu_status()

# Get efficiency score
efficiency = gpu_optimizer.get_memory_efficiency_score()
print(f"\nüìà Initial Efficiency Score: {efficiency['overall_score']:.1f}/100")

# Get suggestions
suggestions = gpu_optimizer.suggest_optimizations()
if suggestions:
    print(f"\nüí° {len(suggestions)} Optimization Suggestions:")
    for i, suggestion in enumerate(suggestions, 1):
        print(f"   {i}. {suggestion}")
else:
    print("\n‚úÖ GPU configuration looks good!")

## üîß Step 4: Setup Ngrok Tunnel

In [None]:
# Install and setup ngrok for tunneling
!pip install pyngrok -q

from pyngrok import ngrok, conf
import getpass

print("üåê Setting up Ngrok tunnel...")

# Get ngrok authtoken (secure input)
authtoken = getpass.getpass("Enter your Ngrok authtoken (from https://dashboard.ngrok.com/auth): ")

# Configure ngrok
conf.get_default().auth_token = authtoken

# Start ngrok tunnel
public_url = ngrok.connect(3300).public_url
print(f"\n‚úÖ Ngrok tunnel established!")
print(f"üîó Public URL: {public_url}")
print(f"üîí This URL will give you access to the Heady Admin Console")

# Store URL for later use
%env PUBLIC_URL={public_url}

## üöÄ Step 5: Start Heady Admin Console

In [None]:
# Start the Heady Admin Console with GPU optimization
import os
import subprocess
import threading
import time
from datetime import datetime

# Set environment variables
os.environ['HEADY_API_KEY'] = 'colab_secure_token'
os.environ['GPU_MEMORY_LIMIT'] = '10240'  # 10GB limit

print("üöÄ Starting Heady Admin Console...")
print("=" * 50)
print(f"‚è∞ Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"üîó Access URL: {os.getenv('PUBLIC_URL')}")
print(f"üîë Admin Token: colab_secure_token")
print()

# Optimize GPU before starting
print("üßπ Optimizing GPU memory before startup...")
result = gpu_optimizer.optimize_gpu_memory(aggressive=True)
print(f"‚úÖ {result.message}")

# Start the server
def start_server():
    try:
        # Change to backend directory
        os.chdir('backend')
        
        # Start the Node.js server
        process = subprocess.Popen(
            ['node', 'index.js'],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True
        )
        
        # Print output in real-time
        for line in iter(process.stdout.readline, ''):
            if line.strip():
                print(f"üì° {line.strip()}")
                
    except Exception as e:
        print(f"‚ùå Server error: {e}")

# Start server in background thread
server_thread = threading.Thread(target=start_server, daemon=True)
server_thread.start()

# Give server time to start
time.sleep(5)

print("\n‚úÖ Heady Admin Console is starting up!")
print(f"üåê Access it at: {os.getenv('PUBLIC_URL')}")
print("üîë Use token: colab_secure_token")
print("\n‚è≥ Waiting for server to fully initialize...")

## üìä Step 6: Real-time GPU Monitoring

In [None]:
# Monitor GPU usage in real-time
import matplotlib.pyplot as plt
import IPython.display as display
import time

print("üìä Starting real-time GPU monitoring...")
print("üîÑ This will update every 10 seconds for 2 minutes")
print("‚èπÔ∏è Interrupt the cell to stop monitoring")

# Monitor for 2 minutes (12 intervals of 10 seconds)
monitoring_data = []

try:
    for i in range(12):
        # Clear previous output
        display.clear_output(wait=True)
        
        # Get current metrics
        metrics = gpu_optimizer.get_gpu_metrics()
        efficiency = gpu_optimizer.get_memory_efficiency_score()
        
        # Store data
        data_point = {
            'time': datetime.now().strftime('%H:%M:%S'),
            'gpu_memory_gb': metrics.allocated_memory_gb,
            'gpu_utilization': metrics.utilization_percent,
            'efficiency_score': efficiency['overall_score'],
            'temperature': metrics.temperature_celsius
        }
        monitoring_data.append(data_point)
        
        # Display current status
        print(f"üìä GPU Monitor - Update {i+1}/12")
        print("=" * 40)
        print(f"‚è∞ Time: {data_point['time']}")
        print(f"üíæ GPU Memory: {data_point['gpu_memory_gb']:.2f} GB")
        print(f"üìä GPU Utilization: {data_point['gpu_utilization']:.1f}%")
        print(f"üìà Efficiency Score: {data_point['efficiency_score']:.1f}/100")
        if data_point['temperature'] > 0:
            print(f"üå°Ô∏è Temperature: {data_point['temperature']:.1f}¬∞C")
        
        # Auto-optimize if needed
        optimization_result = gpu_optimizer.auto_optimize_if_needed(memory_threshold_percent=85.0)
        if optimization_result and optimization_result.success:
            print(f"üßπ Auto-optimized: {optimization_result.message}")
        
        # Show suggestions if efficiency is low
        if efficiency['overall_score'] < 70:
            suggestions = gpu_optimizer.suggest_optimizations()[:3]  # Top 3
            if suggestions:
                print("\nüí° Quick Suggestions:")
                for suggestion in suggestions:
                    print(f"   ‚Ä¢ {suggestion}")
        
        print("\n‚è≥ Next update in 10 seconds...")
        
        # Wait for next update
        time.sleep(10)
        
except KeyboardInterrupt:
    print("\n‚èπÔ∏è Monitoring stopped by user")

print(f"\n‚úÖ Monitoring complete! Collected {len(monitoring_data)} data points")

## üßπ Step 7: Manual GPU Optimization

In [None]:
# Manual GPU optimization controls
print("üßπ Heady GPU Optimization Tools")
print("=" * 40)

# Show current status
print("\nüìä Current GPU Status:")
gpu_optimizer.print_gpu_status()

# Optimization options
print("\nüîß Optimization Options:")
print("1. Light optimization (fast, minimal cleanup)")
print("2. Aggressive optimization (slower, thorough cleanup)")
print("3. Monitor GPU for 30 seconds")

# Get user choice
choice = input("\nSelect option (1/2/3): ").strip()

if choice == "1":
    print("\nüßπ Running light optimization...")
    result = gpu_optimizer.optimize_gpu_memory(aggressive=False)
    print(f"‚úÖ {result.message}")
    
elif choice == "2":
    print("\nüßπ Running aggressive optimization...")
    result = gpu_optimizer.optimize_gpu_memory(aggressive=True)
    print(f"‚úÖ {result.message}")
    
elif choice == "3":
    print("\nüìä Monitoring GPU for 30 seconds...")
    data = gpu_optimizer.monitor_gpu_usage(duration_seconds=30, interval_seconds=5)
    
    if data:
        print("\nüìà Monitoring Results:")
        for i, point in enumerate(data):
            print(f"   {i+1}. Memory: {point['gpu_memory_allocated_gb']:.2f}GB, "
                  f"Utilization: {point['gpu_utilization_percent']:.1f}%")
    else:
        print("‚ùå No GPU available for monitoring")
        
else:
    print("‚ùå Invalid choice")

# Show final status
print("\nüìä Final GPU Status:")
gpu_optimizer.print_gpu_status()

## üéØ Step 8: Test NLP Services with GPU


In [None]:
# Test NLP services to verify GPU usage
import sys
sys.path.append('backend/python_worker')

from heady_project.nlp_service import nlp_service
import time

print("üß† Testing NLP Services with GPU")
print("=" * 40)

# Test text
test_text = """
The Heady Systems ecosystem represents a comprehensive approach to AI-driven infrastructure management. 
It integrates multiple specialized nodes including LENS for monitoring, MEMORY for persistent storage, 
BRAIN for intelligent processing, and CONDUCTOR for orchestration. This sacred geometry approach 
ensures optimal resource utilization and system harmony.
"""

print("\nüìù Testing Text Summarization...")
start_time = time.time()
summary = nlp_service.summarize_text(test_text, max_length=50, min_length=20)
summarization_time = time.time() - start_time

print(f"‚úÖ Summary: {summary}")
print(f"‚è±Ô∏è Time taken: {summarization_time:.2f} seconds")

# Get GPU status after summarization
gpu_status = nlp_service.get_gpu_status()
print(f"\nüìä GPU Status after summarization:")
print(f"   Available: {gpu_status['gpu_available']}")
print(f"   Memory Used: {gpu_status['allocated_memory_gb']:.2f}GB")
print(f"   Efficiency: {gpu_status['efficiency_score']:.1f}/100")

print("\nüí¨ Testing Text Generation...")
start_time = time.time()
prompt = "The future of AI infrastructure is"
response = nlp_service.generate_response(prompt, max_length=50)
generation_time = time.time() - start_time

print(f"‚úÖ Generated: {response}")
print(f"‚è±Ô∏è Time taken: {generation_time:.2f} seconds")

# Final GPU status
final_status = nlp_service.get_gpu_status()
print(f"\nüìä Final GPU Status:")
print(f"   Memory Used: {final_status['allocated_memory_gb']:.2f}GB")
print(f"   Efficiency: {final_status['efficiency_score']:.1f}/100")

if final_status['suggestions']:
    print(f"\nüí° Suggestions:")
    for suggestion in final_status['suggestions'][:3]:
        print(f"   ‚Ä¢ {suggestion}")

print("\n‚úÖ NLP services test complete!")

## üìã Summary & Access Information

In [None]:
# Final summary and access information
print("üéâ Heady Colab Setup Complete!")
print("=" * 50)

# Access information
public_url = os.getenv('PUBLIC_URL', 'Not set')
print(f"\nüåê Admin Console URL: {public_url}")
print(f"üîë Admin Token: colab_secure_token")

# Final GPU status
print(f"\nüìä Final GPU Status:")
final_metrics = gpu_optimizer.get_gpu_metrics()
final_efficiency = gpu_optimizer.get_memory_efficiency_score()

if final_metrics.gpu_available:
    print(f"   GPU: {final_metrics.gpu_name}")
    print(f"   Memory Used: {final_metrics.allocated_memory_gb:.2f}GB / {final_metrics.total_memory_gb:.2f}GB")
    print(f"   Efficiency Score: {final_efficiency['overall_score']:.1f}/100")
    print(f"   Utilization: {final_metrics.utilization_percent:.1f}%")
else:
    print("   GPU: Not available")

# Optimization history
print(f"\nüßπ Optimization History: {len(gpu_optimizer.optimization_history)} optimizations performed")
for i, opt in enumerate(gpu_optimizer.optimization_history[-3:], 1):  # Last 3
    print(f"   {i}. {opt.message}")

print(f"\nüí° Pro Tips:")
print("   ‚Ä¢ Run the optimization cell periodically if GPU memory gets high")
print("   ‚Ä¢ Monitor the efficiency score - keep it above 70 for best performance")
print("   ‚Ä¢ The system auto-optimizes, but manual optimization can help")
print("   ‚Ä¢ Check temperature - if >80¬∞C, consider reducing workload")

print(f"\n‚ú® Your Heady Admin Console is ready at: {public_url}")
print("üîë Use the token: colab_secure_token")