<a href="https://colab.research.google.com/github/qunash/stable-diffusion-2-gui/blob/main/stable_diffusion_2_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Stable Diffusion 2**
Gradio app for [Stable Diffusion 2](https://huggingface.co/stabilityai/stable-diffusion-2) by [Stability AI](https://stability.ai/).
It uses [Hugging Face](https://huggingface.co/) Diffusers🧨 implementation.

Currently supported pipelines are `text-to-image` and `image-to-image` (768-v-ema.ckpt).

Other pipelines (depth-to-image, inpainting, upscaling) will be added as soon as they are implemented in the Diffusers🧨 library.

<br>

Colab by [anzorq](https://twitter.com/hahahahohohe). If you like it, please consider supporting me:

[<a href="https://www.buymeacoffee.com/anzorq" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" height="32px" width="108px" alt="Buy Me A Coffee"></a>](https://www.buymeacoffee.com/anzorq)<br>
[![GitHub Repo stars](https://img.shields.io/github/stars/qunash/stable-diffusion-2-gui?style=social)](https://github.com/qunash/stable-diffusion-2-gui)

![visitors](https://visitor-badge.glitch.me/badge?page_id=anzorq.sd-2-colab-header)

# Install dependencies (~1.5 mins)

In [None]:
!pip install --upgrade git+https://github.com/huggingface/diffusers.git
!pip install transformers
# !pip install git+https://github.com/huggingface/transformers
!pip install accelerate
!pip install scipy
# !pip install xformers
!pip install -q https://github.com/metrolobo/xformers_wheels/releases/download/1d31a3ac_various_6/xformers-0.0.14.dev0-cp37-cp37m-linux_x86_64.whl
!pip install triton
!pip install ftfy
!pip install gradio -q

# Run the app

In [None]:
#@title ⬇️🖼️
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionImageVariationPipeline, EulerDiscreteScheduler
import gradio as gr
import torch
from PIL import Image

model_id = 'stabilityai/stable-diffusion-2'

scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")

pipe = StableDiffusionPipeline.from_pretrained(
  model_id,
  revision="fp16" if torch.cuda.is_available() else "fp32",
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
  scheduler=scheduler)

pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(
  model_id,
  revision="fp16" if torch.cuda.is_available() else "fp32",
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
  scheduler=scheduler)

# pipe_variations = StableDiffusionImageVariationPipeline.from_pretrained(
#   model_id,
#   revision="fp16" if torch.cuda.is_available() else "fp32",
#   torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
#   scheduler=scheduler)
  

if torch.cuda.is_available():
  pipe = pipe.to("cuda")
  pipe_i2i = pipe_i2i.to("cuda")
  # pipe_variations = pipe_variations.to("cuda")

def error_str(error, title="Error"):
    return f"""#### {title}
            {error}"""  if error else ""

def switch_attention_slicing(attn_slicing):
    if attn_slicing:
      pipe.enable_attention_slicing()
      pipe_i2i.enable_attention_slicing()
    else:
      pipe.disable_attention_slicing()
      pipe_i2i.disable_attention_slicing()

def switch_mem_eff_attn(mem_eff_attn):
    if mem_eff_attn:
      pipe.enable_xformers_memory_efficient_attention()
      pipe_i2i.enable_xformers_memory_efficient_attention()
    else:
      pipe.disable_xformers_memory_efficient_attention()
      pipe_i2i.disable_xformers_memory_efficient_attention()

def inference(inf_mode, prompt, n_images, guidance, steps, width=768, height=768, seed=0, img=None, strength=0.5, neg_prompt=""):

  generator = torch.Generator('cuda').manual_seed(seed) if seed != 0 else None
  prompt = prompt

  try:
    if inf_mode == "Image to Image":
      if img is None:
        return None, gr.update(visible=True, value=error_str("Image is required for Image to Image inference"))

      return img_to_img(prompt, n_images, neg_prompt, img, strength, guidance, steps, width, height, generator), gr.update(visible=False, value=None)
    else:
      return txt_to_img(prompt, n_images, neg_prompt, guidance, steps, width, height, generator), gr.update(visible=False, value=None)
  except Exception as e:
    return None, gr.update(visible=True, value=error_str(e))

def txt_to_img(prompt, n_images, neg_prompt, guidance, steps, width, height, generator):

    result = pipe(
      prompt,
      num_images_per_prompt = n_images,
      negative_prompt = neg_prompt,
      num_inference_steps = int(steps),
      guidance_scale = guidance,
      width = width,
      height = height,
      generator = generator).images
    
    return result

def img_to_img(prompt, n_images, neg_prompt, img, strength, guidance, steps, width, height, generator):

    ratio = min(height / img.height, width / img.width)
    img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
    result = pipe_i2i(
        prompt,
        num_images_per_prompt = n_images,
        negative_prompt = neg_prompt,
        init_image = img,
        num_inference_steps = int(steps),
        strength = strength,
        guidance_scale = guidance,
        width = width,
        height = height,
        generator = generator).images
        
    return result

css = """.main-div div{display:inline-flex;align-items:center;gap:.8rem;font-size:1.75rem}.main-div div h1{font-weight:900;margin-bottom:7px}.main-div p{margin-bottom:10px;font-size:94%}a{text-decoration:underline}.tabs{margin-top:0;margin-bottom:0}#gallery{min-height:20rem}
"""
with gr.Blocks(css=css) as demo:
    gr.HTML(
        f"""
          <div class="main-div">
            <div>
              <h1>Stable Diffusion 2</h1>
            </div><br>
            <p> Model used: <a href="https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/768-v-ema.ckpt" target="_blank">768-v-ema.ckpt</a></p>
            Running on <b>{"GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"}</b>
          </div>
        """
    )
    with gr.Row():
        
        with gr.Column(scale=55):
          with gr.Group():
              with gr.Row():
                prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder=f"Enter prompt").style(container=False)
                generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))

              gallery = gr.Gallery(label="Generated images", show_label=False).style(grid=[2], height=768)
          error_output = gr.Markdown(visible=False)

        with gr.Column(scale=45):
          inf_mode = gr.Radio(label="Inference Mode", choices=["Text to Image", "Image to Image"], value="Text to Image")
          
          with gr.Group(visible=False) as img_to_img_options:
            image = gr.Image(label="Image", height=128, tool="editor", type="pil")
            strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)

          with gr.Group():
            neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")

            n_images = gr.Slider(label="Number of images", value=1, minimum=1, maximum=4, step=1)
            with gr.Row():
              guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
              steps = gr.Slider(label="Steps", value=50, minimum=2, maximum=75, step=1)

            with gr.Row():
              width = gr.Slider(label="Width", value=768, minimum=64, maximum=1024, step=8)
              height = gr.Slider(label="Height", value=768, minimum=64, maximum=1024, step=8)

            seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)
            with gr.Accordion("Memory optimization"):
              attn_slicing = gr.Checkbox(label="Attention slicing (a bit slower, but uses less memory)", value=False)
              # mem_eff_attn = gr.Checkbox(label="Memory efficient attention (xformers) --- WIP, does nothing atm", value=False, interactive=True)

    inf_mode.change(lambda x: gr.update(visible = x == "Image to Image"), inputs=[inf_mode], outputs=[img_to_img_options], queue=False)
    attn_slicing.change(lambda x: switch_attention_slicing(x), inputs=[attn_slicing], queue=False)
    # mem_eff_attn.change(lambda x: switch_mem_eff_attn(x), inputs=[mem_eff_attn], queue=False)
    inputs = [inf_mode, prompt, n_images, guidance, steps, width, height, seed, image, strength, neg_prompt]
    outputs = [gallery, error_output]
    prompt.submit(inference, inputs=inputs, outputs=outputs)
    generate.click(inference, inputs=inputs, outputs=outputs)

    gr.HTML("""
    <div style="border-top: 1px solid #303030;">
      <br>
      <p>Space by: <a href="https://twitter.com/hahahahohohe"><img src="https://img.shields.io/twitter/follow/hahahahohohe?label=%40anzorq&style=social" alt="Twitter Follow"></a></p><br>
      <p>Enjoying this app? Please consider <a href="https://www.buymeacoffee.com/anzorq">supporting me</a></p>
      <a href="https://www.buymeacoffee.com/anzorq" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 45px !important;width: 162px !important;" ></a><br><br>
      <p><img src="https://visitor-badge.glitch.me/badge?page_id=anzorq.sd-2-colab" alt="visitors"></p>
    </div>
    """)

demo.launch(debug=True, share=True, height=768)
