# 🚀 Jarvis 2.0 - Free GPU Stable Diffusion Server

This notebook runs your optimized Stable Diffusion server on Google Colab's free GPU (Tesla T4 - 16GB VRAM).

**Benefits:**
- ✅ 16GB VRAM (vs your 4GB local GPU)
- ✅ Much faster generation
- ✅ No memory issues
- ✅ Completely free

**Setup:**
1. Runtime → Change runtime type → GPU
2. Run all cells
3. Copy the ngrok URL
4. Update your local app to use the Colab URL

In [None]:
# Install dependencies
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install diffusers transformers accelerate xformers
!pip install flask flask-cors
!pip install pyngrok

In [None]:
# Setup ngrok for public URL
from pyngrok import ngrok
import getpass

# Get your ngrok token from https://dashboard.ngrok.com/get-started/your-authtoken
print("Get your free ngrok token from: https://dashboard.ngrok.com/get-started/your-authtoken")
ngrok_token = getpass.getpass("Enter your ngrok token: ")
ngrok.set_auth_token(ngrok_token)

In [None]:
# Create optimized Stable Diffusion server
server_code = '''
import torch
from diffusers import StableDiffusionPipeline, LMSDiscreteScheduler
from flask import Flask, request, jsonify
from flask_cors import CORS
import base64
import io
from PIL import Image
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
CORS(app)

# Load pipeline with optimizations
device = "cuda"
logger.info(f"Loading Stable Diffusion on {device}...")

pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16,  # Use float16 on powerful GPU
    safety_checker=None,
    requires_safety_checker=False
)

pipe = pipe.to(device)

# Enable all optimizations
pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.enable_attention_slicing()
pipe.enable_vae_slicing()
pipe.enable_xformers_memory_efficient_attention()

logger.info("✅ Pipeline loaded and optimized!")

@app.route("/health", methods=["GET"])
def health():
    return jsonify({"status": "healthy", "device": device, "gpu": torch.cuda.get_device_name(0)})

@app.route("/generate", methods=["POST"])
def generate():
    try:
        data = request.get_json()
        prompt = data.get("prompt", "")
        negative_prompt = data.get("negative_prompt", "ugly, deformed, disfigured, poor details, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, disgusting, blurry, amputation, extra fingers, fewer fingers, extra hands, bad hands, sketches, lowres, normal quality, worstquality, signature, watermark, username, blurry, bad feet, cropped, poorly drawn hands, poorly drawn face, mutation, deformed, worst quality, low quality, jpeg artifacts, extra fingers, fewer digits, extra limbs, extra arms, extra legs, malformed limbs, fused fingers, too many fingers, long neck, mutated hands, bad body, bad proportions, gross proportions, text, error, missing fingers, missing arms, missing legs, extra digit, extra arms, extra leg, extra foot, bad face, asymmetric eyes, cross-eyed, uneven eyes, bad teeth, bad lips, bad nose, bad ears, bad hair, bad skin, scars, moles, wrinkles, old, elderly")
        width = data.get("width", 768)  # Higher default resolution
        height = data.get("height", 768)  # Higher default resolution
        steps = data.get("num_inference_steps", 30)  # Higher quality default
        guidance = data.get("guidance_scale", 8.0)  # Better prompt adherence
        seed = data.get("seed")
        
        generator = None
        if seed is not None:
            generator = torch.Generator(device=device).manual_seed(seed)
        
        logger.info(f"Generating: {prompt} ({width}x{height}, {steps} steps)")
        
        with torch.no_grad():
            result = pipe(
                prompt=prompt,
                negative_prompt=negative_prompt,
                width=width,
                height=height,
                num_inference_steps=steps,
                guidance_scale=guidance,
                generator=generator
            )
        
        image = result.images[0]
        
        # Convert to base64
        buffer = io.BytesIO()
        image.save(buffer, format="PNG")
        img_str = base64.b64encode(buffer.getvalue()).decode()
        
        logger.info(f"✅ Generated successfully! Size: {len(img_str)} chars")
        
        return jsonify({
            "success": True,
            "image": f"data:image/png;base64,{img_str}",
            "prompt": prompt,
            "seed": seed,
            "device": device
        })
        
    except Exception as e:
        logger.error(f"Generation failed: {e}")
        return jsonify({"success": False, "error": str(e)}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)
'''

with open('colab_sd_server.py', 'w') as f:
    f.write(server_code)

print("✅ Server code created!")

In [None]:
# Start the server with ngrok tunnel
import threading
import time
import subprocess

# Start server in background
def run_server():
    subprocess.run(["python", "colab_sd_server.py"])

server_thread = threading.Thread(target=run_server)
server_thread.daemon = True
server_thread.start()

# Wait for server to start
time.sleep(10)

# Create ngrok tunnel
public_url = ngrok.connect(5000)
print(f"\n🎉 Your Colab Stable Diffusion server is running!")
print(f"📍 Public URL: {public_url}")
print(f"🏥 Health check: {public_url}/health")
print(f"🖼️ Generate endpoint: {public_url}/generate")
print(f"\n📋 Copy this URL and update your local app!")

# Keep the server running
try:
    while True:
        time.sleep(60)
        print(f"⏰ Server still running: {public_url}")
except KeyboardInterrupt:
    print("\n🛑 Server stopped")
    ngrok.disconnect(public_url)