# PYTHIA GPU Node — Reasoning Chains & HuggingFace Inference

```
╔══════════════════════════════════════════════════════════════════╗
║  ∞ SACRED GEOMETRY ∞  Organic Systems · Breathing Interfaces    ║
║  NODE: PYTHIA Colab Pro+ GPU                                    ║
║  PURPOSE: Text generation, reasoning chains, prediction         ║
╚══════════════════════════════════════════════════════════════════╝
```

**Capabilities:**
- Text generation & completion via HuggingFace models
- Multi-step reasoning chains with chain-of-thought
- Prediction & inference pipelines
- Semantic analysis & summarization

**Branded domains only:** headysystems.com | headycloud.com | headyconnection.com

In [None]:
# Cell 1: Install dependencies + verify GPU
!pip install -q transformers accelerate sentencepiece fastapi uvicorn pyngrok httpx torch

import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    print(f'GPU Memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB')
else:
    print('WARNING: No GPU — will run on CPU (slower)')

In [None]:
# Cell 2: Configuration
import os

HEADY_CONFIG = {
    'node_id': 'pythia-gpu',
    'node_role': 'reasoning-engine',
    'port': 5001,
    'cloud_layers': {
        'headysystems': 'https://headysystems.com',
        'headyme': 'https://headycloud.com',
        'headyconnection': 'https://headyconnection.com',
    },
    'registration_endpoints': [
        'https://headysystems.com/api/nodes/register',
        'https://headycloud.com/api/nodes/register',
        'https://headyconnection.com/api/nodes/register',
    ],
    'hf_token': os.environ.get('HF_TOKEN', ''),
    'capabilities': ['generate_text', 'reasoning_chain', 'huggingface', 'predict', 'infer', 'summarize'],
}

# Task types this node handles
SUPPORTED_TASKS = {
    'generate_text': 'Text generation and completion',
    'reasoning_chain': 'Multi-step chain-of-thought reasoning',
    'predict': 'Prediction from input features',
    'infer': 'Inference on structured/unstructured data',
    'summarize': 'Text summarization',
    'sentiment': 'Sentiment analysis',
    'classify': 'Text classification',
}

print(f'PYTHIA Node configured')
print(f'Capabilities: {HEADY_CONFIG["capabilities"]}')

In [None]:
# Cell 3: Load models on GPU
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
import json
from datetime import datetime

device = 0 if torch.cuda.is_available() else -1
device_name = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load pipelines
print('Loading text generation model...')
text_generator = pipeline('text-generation', model='TinyLlama/TinyLlama-1.1B-Chat-v1.0', device=device, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
print('Text generation ready')

print('Loading summarization model...')
summarizer = pipeline('summarization', model='facebook/bart-large-cnn', device=device)
print('Summarization ready')

print('Loading sentiment model...')
sentiment_analyzer = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english', device=device)
print('Sentiment analysis ready')

print('Loading zero-shot classifier...')
classifier = pipeline('zero-shot-classification', model='facebook/bart-large-mnli', device=device)
print('Zero-shot classification ready')

print(f'\nAll PYTHIA models loaded on {device_name}')
if torch.cuda.is_available():
    print(f'GPU Memory used: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB')

In [None]:
# Cell 4: Core PYTHIA functions
import time

def generate_text(prompt, max_length=512, temperature=0.7, top_p=0.9):
    """Generate text from prompt using TinyLlama"""
    start = time.time()
    messages = [{'role': 'user', 'content': prompt}]
    formatted = text_generator.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    result = text_generator(formatted, max_new_tokens=max_length, temperature=temperature, top_p=top_p, do_sample=True)
    generated = result[0]['generated_text'][len(formatted):].strip()
    elapsed = time.time() - start
    return {
        'text': generated,
        'model': 'TinyLlama-1.1B-Chat',
        'tokens': len(generated.split()),
        'latency_ms': round(elapsed * 1000),
        'device': device_name,
    }

def reasoning_chain(question, steps=3):
    """Multi-step chain-of-thought reasoning"""
    start = time.time()
    chain = []
    context = question

    for i in range(steps):
        step_prompt = f"""You are a reasoning engine. Think step by step.
Question: {question}
Previous reasoning: {' '.join([s['thought'] for s in chain]) if chain else 'None'}
Step {i+1}/{steps}: Think about the next logical step."""
        result = generate_text(step_prompt, max_length=200)
        chain.append({
            'step': i + 1,
            'thought': result['text'][:500],
            'latency_ms': result['latency_ms'],
        })

    # Final synthesis
    synthesis_prompt = f"""Based on this reasoning chain, provide a final answer.
Question: {question}
Reasoning: {json.dumps([s['thought'] for s in chain])}
Final answer:"""
    final = generate_text(synthesis_prompt, max_length=300)
    elapsed = time.time() - start

    return {
        'question': question,
        'chain': chain,
        'conclusion': final['text'],
        'steps': len(chain),
        'total_latency_ms': round(elapsed * 1000),
        'device': device_name,
    }

def summarize_text(text, max_length=150, min_length=30):
    """Summarize text using BART"""
    start = time.time()
    result = summarizer(text[:1024], max_length=max_length, min_length=min_length, do_sample=False)
    elapsed = time.time() - start
    return {
        'summary': result[0]['summary_text'],
        'model': 'bart-large-cnn',
        'input_length': len(text),
        'latency_ms': round(elapsed * 1000),
        'device': device_name,
    }

def analyze_sentiment(text):
    """Sentiment analysis"""
    start = time.time()
    result = sentiment_analyzer(text[:512])
    elapsed = time.time() - start
    return {
        'label': result[0]['label'],
        'score': round(result[0]['score'], 4),
        'model': 'distilbert-sst2',
        'latency_ms': round(elapsed * 1000),
        'device': device_name,
    }

def classify_text(text, labels):
    """Zero-shot text classification"""
    start = time.time()
    result = classifier(text[:512], candidate_labels=labels)
    elapsed = time.time() - start
    return {
        'labels': result['labels'],
        'scores': [round(s, 4) for s in result['scores']],
        'top_label': result['labels'][0],
        'top_score': round(result['scores'][0], 4),
        'model': 'bart-large-mnli',
        'latency_ms': round(elapsed * 1000),
        'device': device_name,
    }

def predict(input_data):
    """General prediction — routes to appropriate pipeline"""
    task_type = input_data.get('task', 'generate')
    if task_type == 'summarize':
        return summarize_text(input_data.get('text', ''))
    elif task_type == 'sentiment':
        return analyze_sentiment(input_data.get('text', ''))
    elif task_type == 'classify':
        return classify_text(input_data.get('text', ''), input_data.get('labels', ['positive', 'negative', 'neutral']))
    elif task_type == 'reason':
        return reasoning_chain(input_data.get('question', input_data.get('text', '')), input_data.get('steps', 3))
    else:
        return generate_text(input_data.get('prompt', input_data.get('text', '')), input_data.get('max_length', 512))

# Quick test
print('=== PYTHIA Quick Test ===')
print('Sentiment:', analyze_sentiment('Heady is building something beautiful for the world'))
print('Classify:', classify_text('Add PPP pricing for nonprofits', ['access', 'fairness', 'revenue', 'technical']))

In [None]:
# Cell 5: FastAPI server
from fastapi import FastAPI, Request
import uvicorn
import threading
import asyncio
import httpx

app = FastAPI(title='PYTHIA GPU Node', version='1.0.0')

@app.get('/health')
async def health():
    return {
        'status': 'active',
        'node_id': HEADY_CONFIG['node_id'],
        'node_role': HEADY_CONFIG['node_role'],
        'gpu': torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'cpu',
        'gpu_memory_used_gb': round(torch.cuda.memory_allocated(0) / 1e9, 2) if torch.cuda.is_available() else 0,
        'models': ['TinyLlama-1.1B-Chat', 'bart-large-cnn', 'distilbert-sst2', 'bart-large-mnli'],
        'capabilities': HEADY_CONFIG['capabilities'],
        'supported_tasks': list(SUPPORTED_TASKS.keys()),
    }

@app.post('/api/pythia/generate')
async def api_generate(request: Request):
    data = await request.json()
    return generate_text(data.get('prompt', ''), data.get('max_length', 512), data.get('temperature', 0.7))

@app.post('/api/pythia/reason')
async def api_reason(request: Request):
    data = await request.json()
    return reasoning_chain(data.get('question', ''), data.get('steps', 3))

@app.post('/api/pythia/summarize')
async def api_summarize(request: Request):
    data = await request.json()
    return summarize_text(data.get('text', ''), data.get('max_length', 150))

@app.post('/api/pythia/sentiment')
async def api_sentiment(request: Request):
    data = await request.json()
    return analyze_sentiment(data.get('text', ''))

@app.post('/api/pythia/classify')
async def api_classify(request: Request):
    data = await request.json()
    return classify_text(data.get('text', ''), data.get('labels', ['positive', 'negative', 'neutral']))

@app.post('/api/pythia/predict')
async def api_predict(request: Request):
    data = await request.json()
    return predict(data)

@app.post('/api/tasks/execute')
async def execute_task(request: Request):
    """Universal task executor — compatible with HeadyCloud task routing"""
    data = await request.json()
    task_type = data.get('type', 'generate_text')
    payload = data.get('payload', data)
    try:
        if task_type in ('generate_text', 'generate'):
            result = generate_text(payload.get('prompt', payload.get('text', '')), payload.get('max_length', 512))
        elif task_type == 'reasoning_chain':
            result = reasoning_chain(payload.get('question', payload.get('text', '')), payload.get('steps', 3))
        elif task_type == 'summarize':
            result = summarize_text(payload.get('text', ''))
        elif task_type == 'sentiment':
            result = analyze_sentiment(payload.get('text', ''))
        elif task_type in ('classify', 'predict', 'infer'):
            result = predict(payload)
        else:
            result = generate_text(json.dumps(payload)[:500])
        return {'success': True, 'node_id': HEADY_CONFIG['node_id'], 'task_type': task_type, 'result': result}
    except Exception as e:
        return {'success': False, 'error': str(e), 'node_id': HEADY_CONFIG['node_id']}

print('PYTHIA FastAPI ready: /health, /api/pythia/generate, /api/pythia/reason, /api/pythia/summarize, /api/pythia/sentiment, /api/pythia/classify, /api/pythia/predict, /api/tasks/execute')

In [None]:
# Cell 6: ngrok tunnel + auto-register with HeadyCloud
from pyngrok import ngrok, conf

# Authenticate ngrok
conf.get_default().auth_token = "39ZBirdUD63xgta7yN7OFZpE84m_3QZyJTDno1b8Yhv9Nfy8s"

public_url = ngrok.connect(HEADY_CONFIG['port']).public_url
print(f'PYTHIA GPU Node live at: {public_url}')

PUBLIC_URL = public_url

async def register_with_clouds():
    async with httpx.AsyncClient(timeout=15) as client:
        for endpoint in HEADY_CONFIG['registration_endpoints']:
            try:
                resp = await client.post(endpoint, json={
                    'node_id': HEADY_CONFIG['node_id'],
                    'url': PUBLIC_URL,
                    'role': HEADY_CONFIG['node_role'],
                    'capabilities': HEADY_CONFIG['capabilities'],
                    'gpu': torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'cpu',
                })
                print(f'  Registered with {endpoint}: {resp.status_code}')
            except Exception as e:
                print(f'  Pending: {endpoint} ({e})')

async def heartbeat_loop():
    while True:
        await asyncio.sleep(30)
        async with httpx.AsyncClient(timeout=10) as client:
            for layer, url in HEADY_CONFIG['cloud_layers'].items():
                try:
                    await client.post(f'{url}/api/nodes/heartbeat', json={
                        'node_id': HEADY_CONFIG['node_id'],
                        'status': 'active',
                        'url': PUBLIC_URL,
                        'metrics': {
                            'gpu': torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'cpu',
                            'gpu_memory_used': round(torch.cuda.memory_allocated(0) / 1e9, 2) if torch.cuda.is_available() else 0,
                            'models_loaded': 4,
                        }
                    })
                except:
                    pass

asyncio.run(register_with_clouds())
threading.Thread(target=lambda: asyncio.run(heartbeat_loop()), daemon=True).start()
print(f'PYTHIA GPU node live at {PUBLIC_URL} — accepting inference requests')
# 0.0.0.0 is the Colab VM bind address for uvicorn, NOT a local service
uvicorn.run(app, host='0.0.0.0', port=HEADY_CONFIG['port'])