# 🚀 Advanced Processing Server for AI Platform

This notebook provides GPU-accelerated processing capabilities for your AI platform.

## Features:
- Multi-modal AI processing (text, image, video)
- GPU-accelerated embeddings
- Neural search capabilities
- Style transfer and image generation
- Batch processing for efficiency

## 1. Environment Setup

In [None]:
# Check GPU availability
import torch
import tensorflow as tf

print("🔍 Checking available resources...\n")

# PyTorch GPU
if torch.cuda.is_available():
    print(f"✅ PyTorch GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("❌ No PyTorch GPU found")

# TensorFlow GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    print(f"\n✅ TensorFlow GPUs: {len(gpus)}")
    for gpu in gpus:
        print(f"   {gpu}")
else:
    print("\n❌ No TensorFlow GPU found")

# Check TPU
try:
    import jax
    print(f"\n🔧 JAX devices: {jax.devices()}")
except:
    print("\nℹ️ JAX not available (TPU check skipped)")

In [None]:
# Install required packages
!pip install -q transformers diffusers accelerate
!pip install -q sentence-transformers faiss-gpu
!pip install -q opencv-python-headless pillow
!pip install -q fastapi uvicorn pyngrok
!pip install -q einops xformers
!pip install -q torchvision torchaudio

print("✅ All dependencies installed")

## 2. Advanced Processing Engine

In [None]:
import torch
import numpy as np
from typing import Dict, List, Any, Optional, Union
import base64
import io
from PIL import Image
import json
from dataclasses import dataclass
from datetime import datetime
import logging
import asyncio
from concurrent.futures import ThreadPoolExecutor
import hashlib

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class AdvancedProcessor:
    """Main processing engine with GPU acceleration"""
    
    def __init__(self):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        logger.info(f"Processor initialized on {self.device}")
        
        # Model cache
        self.models = {}
        self.model_loading_lock = asyncio.Lock()
        
        # Initialize models lazily
        self.text_model = None
        self.image_model = None
        self.embedding_model = None
        self.style_transfer_model = None
        
        # Processing stats
        self.stats = {
            'tasks_processed': 0,
            'total_processing_time': 0,
            'errors': 0
        }
    
    async def initialize_text_model(self):
        """Initialize text generation model"""
        if self.text_model is None:
            async with self.model_loading_lock:
                if self.text_model is None:  # Double check
                    logger.info("Loading text generation model...")
                    from transformers import AutoModelForCausalLM, AutoTokenizer
                    
                    model_name = "microsoft/DialoGPT-medium"
                    self.text_tokenizer = AutoTokenizer.from_pretrained(model_name)
                    self.text_model = AutoModelForCausalLM.from_pretrained(model_name)
                    self.text_model.to(self.device)
                    self.text_model.eval()
                    
                    # Add padding token
                    self.text_tokenizer.pad_token = self.text_tokenizer.eos_token
                    
                    logger.info("Text model loaded successfully")
    
    async def initialize_image_model(self):
        """Initialize image generation model"""
        if self.image_model is None:
            async with self.model_loading_lock:
                if self.image_model is None:
                    logger.info("Loading image generation model...")
                    from diffusers import StableDiffusionPipeline
                    
                    # Use smaller model for Colab
                    model_id = "CompVis/stable-diffusion-v1-4"
                    self.image_model = StableDiffusionPipeline.from_pretrained(
                        model_id,
                        torch_dtype=torch.float16,
                        revision="fp16",
                        use_auth_token=False
                    )
                    self.image_model = self.image_model.to(self.device)
                    
                    # Enable memory efficient attention
                    try:
                        self.image_model.enable_xformers_memory_efficient_attention()
                    except:
                        logger.warning("xformers not available, using default attention")
                    
                    logger.info("Image model loaded successfully")
    
    async def initialize_embedding_model(self):
        """Initialize embedding model"""
        if self.embedding_model is None:
            async with self.model_loading_lock:
                if self.embedding_model is None:
                    logger.info("Loading embedding model...")
                    from sentence_transformers import SentenceTransformer
                    
                    self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
                    self.embedding_model.to(self.device)
                    
                    logger.info("Embedding model loaded successfully")
    
    async def process_text(self, text: str, model: str = "default", 
                          parameters: Dict[str, Any] = None) -> Dict[str, Any]:
        """Process text generation request"""
        await self.initialize_text_model()
        
        parameters = parameters or {}
        max_length = parameters.get('max_length', 100)
        temperature = parameters.get('temperature', 0.7)
        top_p = parameters.get('top_p', 0.9)
        
        # Tokenize input
        inputs = self.text_tokenizer.encode(text, return_tensors='pt').to(self.device)
        
        # Generate
        with torch.no_grad():
            outputs = self.text_model.generate(
                inputs,
                max_length=max_length,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
                pad_token_id=self.text_tokenizer.eos_token_id
            )
        
        # Decode
        generated_text = self.text_tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        return {
            'generated_text': generated_text,
            'model': model,
            'parameters': parameters
        }
    
    async def generate_image(self, prompt: str, style: Optional[str] = None,
                           size: str = "512x512", **kwargs) -> Dict[str, Any]:
        """Generate image from text prompt"""
        await self.initialize_image_model()
        
        # Parse size
        width, height = map(int, size.split('x'))
        
        # Apply style if provided
        if style:
            style_prompts = {
                'anime': ', anime style, manga, japanese animation',
                'realistic': ', photorealistic, high detail, professional photography',
                'artistic': ', artistic, oil painting, masterpiece',
                'cyberpunk': ', cyberpunk style, neon, futuristic',
                'fantasy': ', fantasy art, magical, ethereal'
            }
            prompt += style_prompts.get(style, '')
        
        # Generate image
        with torch.no_grad():
            image = self.image_model(
                prompt,
                height=height,
                width=width,
                num_inference_steps=kwargs.get('steps', 50),
                guidance_scale=kwargs.get('guidance_scale', 7.5)
            ).images[0]
        
        # Convert to base64
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        img_base64 = base64.b64encode(buffered.getvalue()).decode()
        
        return {
            'image': img_base64,
            'prompt': prompt,
            'size': size,
            'style': style
        }
    
    async def generate_embeddings(self, texts: List[str], 
                                model: str = "sentence-transformers") -> np.ndarray:
        """Generate embeddings for texts"""
        await self.initialize_embedding_model()
        
        # Generate embeddings
        with torch.no_grad():
            embeddings = self.embedding_model.encode(
                texts,
                convert_to_tensor=True,
                device=self.device,
                show_progress_bar=False
            )
        
        return embeddings.cpu().numpy()
    
    async def neural_search(self, query: str, corpus: List[str], 
                          top_k: int = 10) -> List[Dict[str, Any]]:
        """Perform neural search"""
        await self.initialize_embedding_model()
        
        # Generate embeddings
        query_embedding = await self.generate_embeddings([query])
        corpus_embeddings = await self.generate_embeddings(corpus)
        
        # Calculate similarities
        from sklearn.metrics.pairwise import cosine_similarity
        similarities = cosine_similarity(query_embedding, corpus_embeddings)[0]
        
        # Get top k results
        top_indices = np.argsort(similarities)[::-1][:top_k]
        
        results = []
        for idx in top_indices:
            results.append({
                'text': corpus[idx],
                'score': float(similarities[idx]),
                'index': int(idx)
            })
        
        return results
    
    async def batch_process(self, items: List[Dict], processor: str,
                          batch_size: int = 32) -> List[Dict]:
        """Process items in batch"""
        results = []
        
        for i in range(0, len(items), batch_size):
            batch = items[i:i + batch_size]
            
            if processor == "embeddings":
                texts = [item.get('text', '') for item in batch]
                embeddings = await self.generate_embeddings(texts)
                
                for j, embedding in enumerate(embeddings):
                    results.append({
                        'item_id': batch[j].get('id', i + j),
                        'embedding': embedding.tolist()
                    })
            
            elif processor == "text_analysis":
                for item in batch:
                    text = item.get('text', '')
                    # Simple analysis - can be extended
                    results.append({
                        'item_id': item.get('id'),
                        'length': len(text.split()),
                        'sentiment': self._analyze_sentiment(text)
                    })
        
        return results
    
    def _analyze_sentiment(self, text: str) -> str:
        """Simple sentiment analysis"""
        positive_words = {'good', 'great', 'excellent', 'amazing', 'wonderful', 'love'}
        negative_words = {'bad', 'terrible', 'awful', 'hate', 'poor', 'worst'}
        
        text_lower = text.lower()
        pos_count = sum(1 for word in positive_words if word in text_lower)
        neg_count = sum(1 for word in negative_words if word in text_lower)
        
        if pos_count > neg_count:
            return 'positive'
        elif neg_count > pos_count:
            return 'negative'
        else:
            return 'neutral'
    
    def get_stats(self) -> Dict[str, Any]:
        """Get processing statistics"""
        return {
            'device': str(self.device),
            'models_loaded': {
                'text': self.text_model is not None,
                'image': self.image_model is not None,
                'embedding': self.embedding_model is not None
            },
            'stats': self.stats
        }

# Initialize processor
processor = AdvancedProcessor()
print("✅ Advanced processor initialized")

## 3. FastAPI Server

In [None]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
import uvicorn
from pyngrok import ngrok
import nest_asyncio
import threading
import time

# Allow nested asyncio
nest_asyncio.apply()

# API Models
class TextProcessRequest(BaseModel):
    task_id: str
    task_type: str
    payload: Dict[str, Any]
    priority: int = 5

class HealthResponse(BaseModel):
    status: str = "healthy"
    load: float
    memory_available_gb: float
    models_loaded: Dict[str, bool]

# Create FastAPI app
app = FastAPI(title="Colab Advanced Processing Server", version="1.0.0")

@app.get("/")
async def root():
    return {
        "message": "Colab Advanced Processing Server",
        "capabilities": [
            "text_generation",
            "image_generation",
            "embeddings",
            "neural_search",
            "batch_processing"
        ],
        "gpu_available": torch.cuda.is_available()
    }

@app.get("/health", response_model=HealthResponse)
async def health_check():
    """Health check endpoint"""
    # Calculate load (simple estimation)
    if torch.cuda.is_available():
        load = torch.cuda.utilization() / 100.0
        memory_used = torch.cuda.memory_allocated() / 1e9
        memory_total = torch.cuda.get_device_properties(0).total_memory / 1e9
        memory_available = memory_total - memory_used
    else:
        load = 0.3  # Dummy value for CPU
        memory_available = 12.0
    
    return HealthResponse(
        load=load,
        memory_available_gb=memory_available,
        models_loaded={
            'text': processor.text_model is not None,
            'image': processor.image_model is not None,
            'embedding': processor.embedding_model is not None
        }
    )

@app.post("/process")
async def process_task(request: TextProcessRequest):
    """Main processing endpoint"""
    try:
        task_type = request.task_type
        payload = request.payload
        
        result = None
        
        if task_type == "text_generation":
            result = await processor.process_text(
                payload.get('text', ''),
                payload.get('model', 'default'),
                payload.get('parameters', {})
            )
        
        elif task_type == "image_generation":
            result = await processor.generate_image(
                payload.get('prompt', ''),
                payload.get('style'),
                payload.get('size', '512x512'),
                **{k: v for k, v in payload.items() if k not in ['prompt', 'style', 'size']}
            )
        
        elif task_type == "embeddings":
            embeddings = await processor.generate_embeddings(
                payload.get('texts', []),
                payload.get('model', 'sentence-transformers')
            )
            result = {'embeddings': embeddings.tolist()}
        
        elif task_type == "neural_search":
            results = await processor.neural_search(
                payload.get('query', ''),
                payload.get('corpus', []),
                payload.get('top_k', 10)
            )
            result = {'results': results}
        
        elif task_type == "batch_processing":
            processed = await processor.batch_process(
                payload.get('items', []),
                payload.get('processor', ''),
                payload.get('batch_size', 32)
            )
            result = {'processed_items': processed}
        
        else:
            raise ValueError(f"Unknown task type: {task_type}")
        
        # Update stats
        processor.stats['tasks_processed'] += 1
        
        return {
            'task_id': request.task_id,
            'status': 'completed',
            'result': result
        }
        
    except Exception as e:
        processor.stats['errors'] += 1
        logger.error(f"Processing error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/stats")
async def get_stats():
    """Get processing statistics"""
    return processor.get_stats()

# Start server function
def start_server(port=8080):
    """Start the FastAPI server"""
    uvicorn.run(app, host="0.0.0.0", port=port)

print("✅ FastAPI server configured")

## 4. Deploy with ngrok

In [None]:
# Start server in background thread
server_thread = threading.Thread(target=start_server, args=(8080,), daemon=True)
server_thread.start()

# Give server time to start
time.sleep(3)

# Create ngrok tunnel
public_url = ngrok.connect(8080)
print(f"\n🌐 Processing Server URL: {public_url}")
print(f"\n📋 Add this to your Flask app as a Colab resource:")
print(f"\ninstance_id: 'colab_gpu_1'")
print(f"url: '{public_url}'")
print(f"capabilities: ['text_generation', 'image_generation', 'embeddings', 'neural_search']")
print(f"gpu_type: '{torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}'")

# Test the server
import requests
test_response = requests.get(f"{public_url}/health")
print(f"\n🧪 Health Check: {test_response.json()}")

## 5. Test Processing Capabilities

In [None]:
# Test various processing capabilities
import requests
import json

print("🧪 Testing Processing Capabilities\n")

# Test 1: Text Generation
print("1️⃣ Testing Text Generation...")
text_request = {
    "task_id": "test_text_001",
    "task_type": "text_generation",
    "payload": {
        "text": "The future of AI is",
        "parameters": {
            "max_length": 50,
            "temperature": 0.8
        }
    }
}

response = requests.post(f"{public_url}/process", json=text_request)
if response.status_code == 200:
    result = response.json()
    print(f"✅ Generated: {result['result']['generated_text'][:100]}...\n")
else:
    print(f"❌ Error: {response.text}\n")

# Test 2: Embeddings
print("2️⃣ Testing Embeddings...")
embedding_request = {
    "task_id": "test_embed_001",
    "task_type": "embeddings",
    "payload": {
        "texts": [
            "Machine learning is amazing",
            "AI will transform the world",
            "Deep learning models are powerful"
        ]
    }
}

response = requests.post(f"{public_url}/process", json=embedding_request)
if response.status_code == 200:
    result = response.json()
    embeddings = result['result']['embeddings']
    print(f"✅ Generated {len(embeddings)} embeddings")
    print(f"   Embedding shape: {len(embeddings[0])} dimensions\n")
else:
    print(f"❌ Error: {response.text}\n")

# Test 3: Neural Search
print("3️⃣ Testing Neural Search...")
search_request = {
    "task_id": "test_search_001",
    "task_type": "neural_search",
    "payload": {
        "query": "artificial intelligence applications",
        "corpus": [
            "AI is used in healthcare for diagnosis",
            "Machine learning powers recommendation systems",
            "Computer vision enables autonomous vehicles",
            "Natural language processing helps chatbots",
            "Deep learning improves image recognition"
        ],
        "top_k": 3
    }
}

response = requests.post(f"{public_url}/process", json=search_request)
if response.status_code == 200:
    result = response.json()
    search_results = result['result']['results']
    print("✅ Search Results:")
    for i, res in enumerate(search_results):
        print(f"   {i+1}. {res['text']} (score: {res['score']:.3f})")
else:
    print(f"❌ Error: {response.text}")

print("\n✅ All tests completed!")

## 6. Resource Monitoring

In [None]:
import matplotlib.pyplot as plt
import psutil
import GPUtil

def plot_resource_usage():
    """Plot current resource usage"""
    fig, axes = plt.subplots(2, 2, figsize=(12, 8))
    fig.suptitle('Colab Resource Usage', fontsize=16)
    
    # CPU Usage
    ax1 = axes[0, 0]
    cpu_percent = psutil.cpu_percent(interval=1, percpu=True)
    ax1.bar(range(len(cpu_percent)), cpu_percent)
    ax1.set_title('CPU Usage by Core')
    ax1.set_ylabel('Usage %')
    ax1.set_xlabel('CPU Core')
    
    # Memory Usage
    ax2 = axes[0, 1]
    memory = psutil.virtual_memory()
    labels = ['Used', 'Available']
    sizes = [memory.used / 1e9, memory.available / 1e9]
    ax2.pie(sizes, labels=labels, autopct='%1.1f%%')
    ax2.set_title(f'Memory Usage (Total: {memory.total / 1e9:.1f} GB)')
    
    # GPU Usage
    ax3 = axes[1, 0]
    if torch.cuda.is_available():
        gpu = GPUtil.getGPUs()[0]
        gpu_data = {
            'GPU Load': gpu.load * 100,
            'Memory Used': (gpu.memoryUsed / gpu.memoryTotal) * 100
        }
        ax3.bar(gpu_data.keys(), gpu_data.values())
        ax3.set_title(f'GPU Usage ({gpu.name})')
        ax3.set_ylabel('Usage %')
    else:
        ax3.text(0.5, 0.5, 'No GPU Available', ha='center', va='center')
        ax3.set_title('GPU Usage')
    
    # Processing Stats
    ax4 = axes[1, 1]
    stats = processor.get_stats()['stats']
    stat_labels = list(stats.keys())
    stat_values = list(stats.values())
    ax4.bar(stat_labels, stat_values)
    ax4.set_title('Processing Statistics')
    ax4.set_ylabel('Count')
    
    plt.tight_layout()
    plt.show()

# Plot current usage
plot_resource_usage()

# Show current stats
print("\n📊 Current Processing Stats:")
stats = processor.get_stats()
print(json.dumps(stats, indent=2))

## 7. Keep Server Running

In [None]:
# Keep the server running
print("🚀 Server is running!")
print(f"\n📋 Server URL: {public_url}")
print("\n⚠️  Keep this cell running to maintain the server")
print("\n💡 The server will automatically handle requests from your Flask app")

# Monitor loop
while True:
    time.sleep(60)  # Check every minute
    
    # Check health
    try:
        health = requests.get(f"{public_url}/health", timeout=5)
        if health.status_code == 200:
            data = health.json()
            print(f"\r✅ Server healthy | Load: {data['load']:.1%} | Memory: {data['memory_available_gb']:.1f}GB | Tasks: {processor.stats['tasks_processed']}", end='')
    except:
        print("\r❌ Server health check failed", end='')