# üß† HeadyBuddy Brain ‚Äî A100 GPU Intelligence Engine

This notebook runs HeadyBuddy's AI brain on Colab's A100 GPU.
It serves a FastAPI chat endpoint that the HeadyBuddy widget calls.

### Setup:
1. **Runtime ‚Üí Change runtime type ‚Üí A100 GPU**
2. Run all cells
3. Copy the tunnel URL ‚Üí HeadyBuddy auto-connects

### Architecture:
```
HeadyBuddy Widget (any site) ‚Üí API call ‚Üí This Colab (A100 GPU)
                                              ‚Üì
     User sees response      ‚Üê  LLM generates intelligent reply
```

In [None]:
# Cell 1: Install dependencies
!pip install -q transformers accelerate torch flask flask-cors pyngrok
!pip install -q bitsandbytes sentencepiece protobuf

# Optional: set your ngrok auth token for persistent tunnel
NGROK_AUTH_TOKEN = ""  # Paste your token here for stable URL

print("‚úÖ Dependencies installed")

In [None]:
# Cell 2: Load the LLM
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

MODEL_ID = "microsoft/Phi-3.5-mini-instruct"  # Fast, smart, fits A100

print(f"üîß Loading {MODEL_ID}...")
print(f"üñ•Ô∏è GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
print(f"üíæ VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB" if torch.cuda.is_available() else "")

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# Create pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1
)

print(f"\n‚úÖ Model loaded on {model.device}")
print(f"üìä Parameters: {sum(p.numel() for p in model.parameters()) / 1e9:.1f}B")

In [None]:
# Cell 3: HeadyBuddy System Prompt
HEADY_SYSTEM_PROMPT = """You are HeadyBuddy, the AI assistant for the Heady ecosystem. You are helpful, friendly, knowledgeable, and slightly cosmic in personality.

The Heady ecosystem consists of 6 interconnected services built on Sacred Geometry principles:

1. HeadySystems (headysystems.com) ‚Äî Infrastructure backbone, Metatron's Cube architecture, HCFP policy engine, system orchestration
2. HeadyMe (headyme.com) ‚Äî Personal AI companion, Flower of Life pattern, preference management, privacy-first
3. HeadyConnection (headyconnection.org) ‚Äî Social intelligence layer, Sri Yantra pattern, knowledge graph, collaboration
4. HeadyIO (headyio.com) ‚Äî Data orchestration gateway, Torus pattern, API gateway, real-time streaming, webhooks
5. HeadyBuddy (headybuddy.org) ‚Äî That's you! AI assistant, Seed of Life pattern, context-aware help on every site
6. HeadyMCP (headymcp.com) ‚Äî Model Context Protocol hub, Vesica Piscis, 20+ AI tools for IDEs

Key technologies:
- HCFP (Heady Core Functionality Platform) ‚Äî auto-success policy engine, zero violations
- HeadyBattle interceptor ‚Äî security engine
- Cloudflare Workers + WARP tunnel ‚Äî zero-trust deployment
- Sacred Geometry theming ‚Äî cosmic rainbow aesthetic across all sites
- HeadyLens ‚Äî real-time system monitoring via WebSocket

You can help with: system status, service details, architecture questions, troubleshooting, navigation between services, explaining features, and general AI assistance.

Keep responses concise but thorough. Use emoji sparingly. Be warm and knowledgeable."""

def generate_response(user_message, context=None):
    """Generate an intelligent response using the LLM."""
    service_context = ""
    if context and context.get('service'):
        service_context = f"\nThe user is currently on: {context['service']}"
    
    messages = [
        {"role": "system", "content": HEADY_SYSTEM_PROMPT + service_context},
        {"role": "user", "content": user_message}
    ]
    
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    result = pipe(prompt, return_full_text=False)
    response = result[0]['generated_text'].strip()
    
    return response

# Test it
test = generate_response("What is the Heady ecosystem?")
print(f"Test response:\n{test}")

In [None]:
# Cell 4: Start the API server
import os, threading, time, json
from flask import Flask, request, jsonify
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

node_id = os.urandom(4).hex()
gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"
start_time = time.time()
request_count = 0

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        "status": "OPTIMAL",
        "service": "HeadyBuddy Brain",
        "node_id": node_id,
        "hardware": gpu_name,
        "model": MODEL_ID,
        "uptime": round(time.time() - start_time, 1),
        "requests_served": request_count
    })

@app.route('/api/chat', methods=['POST'])
def chat():
    global request_count
    request_count += 1
    
    data = request.json or {}
    message = data.get('message', '')
    context = data.get('context', {})
    
    if not message:
        return jsonify({"error": "No message provided"}), 400
    
    try:
        response = generate_response(message, context)
        return jsonify({
            "response": response,
            "model": MODEL_ID,
            "hardware": gpu_name,
            "node_id": node_id
        })
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/api/cloud-process', methods=['POST'])
def process():
    """Legacy endpoint for backward compatibility."""
    data = request.json or {}
    return jsonify({
        "status": "success",
        "node_id": node_id,
        "hardware": gpu_name,
        "result": f"Processed {data.get('type', 'task')} on {gpu_name}",
        "heady_optimized": True
    })

# Start server
server_thread = threading.Thread(target=lambda: app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False))
server_thread.start()
print(f"\nüöÄ HeadyBuddy Brain API running on port 5000")
print(f"   GPU: {gpu_name}")
print(f"   Model: {MODEL_ID}")
print(f"   Endpoints: /health, /api/chat, /api/cloud-process")

In [None]:
# Cell 5: Create public tunnel
import re, subprocess, time

print("\n" + "="*50)
print("üîó ESTABLISHING TUNNEL TO HEADY ECOSYSTEM...")
print("="*50 + "\n")

public_url = None

if NGROK_AUTH_TOKEN:
    from pyngrok import ngrok, conf
    conf.get_default().auth_token = NGROK_AUTH_TOKEN
    public_url = ngrok.connect(5000).public_url
    print(f"‚úÖ NGROK TUNNEL: {public_url}\n")
else:
    print("Using Cloudflare Quick Tunnel (free, no auth needed)...")
    subprocess.run(['wget', '-q', 'https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb'], check=False)
    subprocess.run(['dpkg', '-i', 'cloudflared-linux-amd64.deb'], check=False, capture_output=True)

    p = subprocess.Popen(
        ['cloudflared', 'tunnel', '--url', 'http://127.0.0.1:5000'],
        stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )

    import select
    deadline = time.time() + 20
    collected = ""
    while time.time() < deadline:
        ready, _, _ = select.select([p.stderr], [], [], 1)
        if ready:
            chunk = p.stderr.read1(4096).decode('utf-8', errors='replace')
            collected += chunk
            match = re.search(r'https://[a-zA-Z0-9-]+\.trycloudflare\.com', collected)
            if match:
                public_url = match.group(0)
                break

    if public_url:
        print(f"‚úÖ CLOUDFLARE TUNNEL: {public_url}\n")
    else:
        print("‚ö†Ô∏è Could not auto-detect URL. Check output:\n")
        print(collected)

if public_url:
    print("="*60)
    print(f"")
    print(f"  üß† HeadyBuddy Brain is LIVE!")
    print(f"")
    print(f"  üìã Your endpoint URL:")
    print(f"  üëâ {public_url}")
    print(f"")
    print(f"  Test it:")
    print(f"  curl {public_url}/health")
    print(f"")
    print(f"  Chat:")
    print(f"  curl -X POST {public_url}/api/chat \\")
    print(f"    -H 'Content-Type: application/json' \\")
    print(f"    -d '{{\"message\": \"What is HeadyBuddy?\"}}'")
    print(f"")
    print(f"  To connect to HeadyBuddy widget:")
    print(f"  Set BRAIN_URL in headybuddy-widget.js to:")
    print(f"  {public_url}/api/chat")
    print(f"")
    print("="*60)

# Keep alive
print("\n‚è≥ Keeping notebook alive... (Ctrl+C to stop)")
while True:
    time.sleep(60)
    print(f"üíì Heartbeat ‚Äî {request_count} requests served, uptime: {round(time.time() - start_time)}s")