In [2]:
# Install required libraries
!pip install diffusers transformers torch torchvision gradio accelerate xformers --upgrade

# Install ControlNet (optional, for pose control)
!pip install git+https://github.com/huggingface/diffusers.git@main
!pip install controlnet-aux

# Download Stable Diffusion model (this might take a few minutes)
from diffusers import StableDiffusionPipeline, ControlNetModel
import torch

# Load the base Stable Diffusion model
pipe = StableDiffusionPipeline.from_pretrained("hakurei/waifu-diffusion", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
# Load ControlNet model for pose control (optional)
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_openpose", torch_dtype=torch.float16)
pipe.controlnet = controlnet

Collecting transformers
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting torch
  Downloading torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.21.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting gradio
  Downloading gradio-5.20.0-py3-none-any.whl.metadata (16 kB)
Collecting accelerate
  Downloading accelerate-1.4.0-py3-none-any.whl.metadata (19 kB)
Collecting xformers
  Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/577 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

scheduler%2Fscheduler_config.json:   0%|          | 0.00/341 [00:00<?, ?B/s]

(…)ure_extractor%2Fpreprocessor_config.json:   0%|          | 0.00/518 [00:00<?, ?B/s]

tokenizer%2Fspecial_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

text_encoder%2Fconfig.json:   0%|          | 0.00/620 [00:00<?, ?B/s]

safety_checker%2Fconfig.json:   0%|          | 0.00/4.89k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.36G [00:00<?, ?B/s]

tokenizer%2Fmerges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.46G [00:00<?, ?B/s]

tokenizer%2Fvocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer%2Ftokenizer_config.json:   0%|          | 0.00/819 [00:00<?, ?B/s]

vae%2Fconfig.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

unet%2Fconfig.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/999 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/1.45G [00:00<?, ?B/s]

In [None]:
import gradio as gr
from PIL import Image
import numpy as np

def generate_image(prompt, pose_image=None, steps=50, guidance_scale=11.2, controlnet_strength=0.41):
    """
    Generate an anime-style image using Stable Diffusion, with optional pose control via ControlNet.
    """
    # Define a negative prompt to avoid NSFW content and distortions
    negative_prompt = "NSFW, revealing clothing, suggestive pose, nudity, inappropriate content, blurry, distorted, pixelated, low quality, artifacts, extra limbs, deformed face"

    # Prepare the pipeline with or without ControlNet
    if pose_image is not None:
        # Convert pose image to the format ControlNet expects
        pose_image = Image.fromarray(np.array(pose_image)).convert("RGB")
        generator = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            image=pose_image,
            controlnet_strength=controlnet_strength,
            num_inference_steps=steps,
            guidance_scale=guidance_scale,
            controlnet_conditioning_scale=controlnet_strength,
            height=768,
            width=768
        )
    else:
        # Basic text-to-image generation without ControlNet
        generator = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            num_inference_steps=steps,
            guidance_scale=guidance_scale,
            height=768,
            width=768
        )

    # Return the generated image
    return generator.images[0]

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Anime Character Generator with Stable Diffusion")
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="Describe your anime character and pose",
                value="A young anime girl with long blonde hair and sparkling blue eyes, sitting calmly on a wooden bench in a sunny park, wearing a fully covered Japanese school uniform with a white sailor blouse, blue accents, red tie, and a modest blue skirt, in the style of Kyoto Animation, highly detailed, vibrant colors, clean outlines, soft sunlight filtering through trees, lush green park background with a pathway, serene and peaceful atmosphere"
            )
            pose_upload = gr.Image(label="Optional: Upload a pose reference image (stick figure or 3D model)")
            steps_slider = gr.Slider(minimum=10, maximum=50, value=50, label="Inference Steps")
            guidance_slider = gr.Slider(minimum=5, maximum=15, value=11.2, label="Guidance Scale")
            controlnet_slider = gr.Slider(minimum=0, maximum=1, value=0.41, label="ControlNet Strength (0 = off, 1 = full)")
            submit_btn = gr.Button("Generate Image")
        with gr.Column():
            output_image = gr.Image(label="Generated Anime Character")

    submit_btn.click(
        fn=generate_image,
        inputs=[prompt_input, pose_upload, steps_slider, guidance_slider, controlnet_slider],
        outputs=output_image
    )

# Launch the Gradio interface
demo.launch(debug=True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://9225da5697aaa12dba.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Token indices sequence length is longer than the specified maximum sequence length for this model (86 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['background with a pathway , serene and peaceful atmosphere']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['background with a pathway , serene and peaceful atmosphere']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- oriented atmosphere "']


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['sky']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['sky']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['sky']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['sky']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['sky']


  0%|          | 0/33 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['sky']


  0%|          | 0/36.3 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 2108, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 1655, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/anyio/to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
           ^^^^^^^^^^

  0%|          | 0/36.3 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 2108, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 1655, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/anyio/to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
           ^^^^^^^^^^