# Serving a Stable Diffusion Model

This example runs a Stable Diffusion application with Ray Serve.

This example uses the [stabilityai/stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2) model and [FastAPI](https://fastapi.tiangolo.com/) to build the example

In [None]:
#install dependencies

!pip install "ray==2.6.1" -q
!pip install torch --no-cache-dir -q
!pip install "ray[serve]" requests diffusers transformers fastapi==0.96 -q

In [1]:
import ray

# Initialize Ray Cluster
ray.init(
    address="ray://example-cluster-kuberay-head-svc:10001",
    runtime_env={
        "pip": [
            "IPython",
            "boto3==1.26",
            "botocore==1.29", 
            "datasets",
            "diffusers",
            "fastapi==0.96",
            "accelerate>=0.16.0",
            "transformers>=4.26.0",
            "numpy<1.24", 
            "torch",
        ]
    }
)

0,1
Python version:,3.10.8
Ray version:,2.6.1
Dashboard:,http://10.24.128.195:8265


In [None]:
from io import BytesIO
from fastapi import FastAPI
from fastapi.responses import Response
import torch
from PIL import Image
from ray import serve

app = FastAPI()

@serve.deployment(num_replicas=1, route_prefix="/")
@serve.ingress(app)
class APIIngress:
    def __init__(self, diffusion_model_handle) -> None:
        self.handle = diffusion_model_handle

    @app.get(
        "/imagine",
        responses={200: {"content": {"image/png": {}}}},
        response_class=Response,
    )
    async def generate(self, prompt: str, img_size: int = 512):
        assert len(prompt), "prompt parameter cannot be empty"

        image_ref = await self.handle.generate.remote(prompt, img_size=img_size)
        image = await image_ref
        file_stream = BytesIO()
        image.save(file_stream, "PNG")
        return Response(content=file_stream.getvalue(), media_type="image/png")

    @app.post(        
        "/generate_advanced",
        responses={200: {"content": {"image/png": {}}}},
        response_class=Response,
    )
    async def generate_advanced(self, prompt: str, guidance_scale: float = 7.5, num_inference_steps: int = 15, seed: int= 1024, img_size: int = 512):
        assert len(prompt), "prompt parameter cannot be empty"

        image_ref = await self.handle.generate_advanced.remote(prompt,guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, seed=seed, img_size=img_size)
        image = await image_ref
        file_stream = BytesIO()
        image.save(file_stream, "PNG")
        return Response(content=file_stream.getvalue(), media_type="image/png")
    
@serve.deployment(
    ray_actor_options={"num_gpus": 1},
    autoscaling_config={"min_replicas": 0, "max_replicas": 2},
)
class StableDiffusionV2:
    def __init__(self):
        from diffusers import EulerDiscreteScheduler, StableDiffusionPipeline

        model_id = "stabilityai/stable-diffusion-2"

        scheduler = EulerDiscreteScheduler.from_pretrained(
            model_id, subfolder="scheduler"
        )
        self.pipe = StableDiffusionPipeline.from_pretrained(
            model_id, scheduler=scheduler, revision="fp16", torch_dtype=torch.float16
        )
        self.pipe = self.pipe.to("cuda")

    def image_grid(imgs, rows, cols):
        assert len(imgs) == rows*cols
    
        w, h = imgs[0].size
        grid = Image.new('RGB', size=(cols*w, rows*h))
        grid_w, grid_h = grid.size
        
        for i, img in enumerate(imgs):
            grid.paste(img, box=(i%cols*w, i//cols*h))
        return grid

    def generate(self, prompt: str, img_size: int = 512):
        assert len(prompt), "prompt parameter cannot be empty"

        generator = torch.Generator("cuda").manual_seed(1024)
        
        image = self.pipe(prompt, guidance_scale=7.5, num_inference_steps=15, generator=generator).images[0]

        return image

    def generate_advanced(self, prompt: str, guidance_scale: float = 7.5, num_inference_steps: int = 15, seed: int= 1024, img_size: int = 512):
        assert len(prompt), "prompt parameter cannot be empty"

        generator = torch.Generator("cuda").manual_seed(seed)
        
        image = self.pipe(prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, generator=generator).images[0]

        return image

In [None]:
deployment = APIIngress.bind(StableDiffusionV2.bind())


serve.run(deployment, host="0.0.0.0")

## Shutdown Ray Serve

In [2]:
# from ray import serve

# serve.shutdown()