# Dreambooth fine-tuning for Stable Diffusion using d🧨ffusers

This notebook shows how to "teach" Stable Diffusion a new concept via Dreambooth using 🤗 Hugging Face [🧨 Diffusers library](https://github.com/huggingface/diffusers).

![Dreambooth Example](https://dreambooth.github.io/DreamBooth_files/teaser_static.jpg)
_By using just 3-5 images you can teach new concepts to Stable Diffusion and personalize the model on your own images_

Differently from Textual Inversion, this approach trains the whole model, which can yield better results to the cost of bigger models.

For a general introduction to the Stable Diffusion model please refer to this [colab](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb).



## Initial setup

## Section 1 - BLIP


In [23]:
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForQuestionAnswering
import torch

In [24]:

processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")


In [25]:
# #@title Install the required libs
# !pip install -U -qq git+https://github.com/huggingface/diffusers.git
# !pip install -qq accelerate tensorboard transformers ftfy gradio
# !pip install -qq "ipywidgets>=7,<8"
# !pip install -qq bitsandbytes
# !pip install accelerate

In [26]:
#@title Import required libraries
import argparse
import itertools
import math
import os
from contextlib import nullcontext
import random

import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from torch.utils.data import Dataset

import PIL
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from diffusers import AutoencoderKL, DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, UNet2DConditionModel
from diffusers.optimization import get_scheduler
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
from PIL import Image
from torchvision import transforms
from tqdm.auto import tqdm
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
import bitsandbytes as bnb

In [3]:
from diffusers import StableDiffusionInpaintPipeline
device = "cuda"
model_path = "Thrinath/cloth-textures"

pipe = StableDiffusionInpaintPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

In [4]:
import requests
from io import BytesIO

def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = PIL.Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size

    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid


def download_image(url):
    #response = requests.get(url)
    return PIL.Image.open(url).convert("RGB")


In [None]:
import gradio as gr
import os
from PIL import Image

# Define directory to save uploaded images
upload_dir = "/content/sample_data/uploaded_images"
os.makedirs(upload_dir, exist_ok=True)
raw_image = None
t_i_image = None

# Function to load images from a directory
def load_images_from_directory( limit=4):
  directory = "/content/sample_data/masked_images"
  image_files = [os.path.join(directory, file) for file in os.listdir(directory) if file.endswith(('.png', '.jpg', '.jpeg'))]
  return image_files

# Function to update the global mask image variable
def select_image(image_path):
  global t_i_image
  t_i_image = Image.open(image_path).convert("RGB").resize((512, 512))
  return f"Selected image: {image_path}"

def load_image(image):
  global raw_image

  raw_image = Image.open(image).convert('RGB')
  file_path = os.path.join(upload_dir, image.name)
  return file_path

def answer_question(question):
  global raw_image
  if raw_image is None:
    return "Please upload an image first."

  # Placeholder function for answering a question
  #question = "which pokemon character is on the t-shirt?"
  inputs = processor(raw_image, question, return_tensors="pt")
  out = model.generate(**inputs)
  answer = processor.decode(out[0], skip_special_tokens=True)
  return f"Answer: {answer}"

def run_demo(prompt, num_samples):
  global t_i_image

  guidance_scale=7.5

  generator = torch.Generator(device="cuda").manual_seed(0) # change the seed to get different results

  if (t_i_image != None):
    images = pipe(
      prompt=prompt,
      image=t_i_image,
      mask_image=t_i_image,
      guidance_scale=guidance_scale,
      generator=generator,
      num_images_per_prompt=num_samples,
      ).images
    # Placeholder function for running the demo
    output_images = images
    return output_images
  else:
    return []

def save_text(text):
  return f"Text content added: {text}"

# Define Gradio interface
with gr.Blocks() as demo:
  with gr.Row(variant="compact"):
    with gr.Column(scale=4):
      gr.Markdown("### Please upload a referece image")
      upload_button = gr.Image(label="Upload Image", type="filepath")
      upload_button.upload(load_image, upload_button)

      gr.Markdown("### Input your question")
      question_input = gr.Textbox(label="Question")
      question_run_button = gr.Button("Get Answer")
      answer_output = gr.Textbox(label="Answer", interactive=False)
      question_run_button.click(answer_question, question_input, answer_output)

    with gr.Column(scale=4):
      prompt_text = gr.Textbox(label="Input Prompt")
      num_samples = gr.Slider(label="Number of Samples", minimum=1, maximum=10, step=1, value=3)

      gr.Markdown("### Clothing Style")
      selected_image_markdown = gr.Markdown(value="No image selected", visible=True)
      # image_files = load_images_from_directory(upload_dir)
      # gallery = gr.Gallery(label="Image Gallery", value=image_files)
      # gallery.select(select_image, gallery, None)

      image_files = load_images_from_directory(upload_dir)
      #gallery = gr.Gallery(label="Clothing Style", value=image_files)
      for image_file in image_files:
        with gr.Row():
          hidden_image_path = gr.Textbox(value=image_file, visible=False)
          gr.Image(image_file, label="Image")
          select_button = gr.Button("Select")
          select_button.click(select_image, [hidden_image_path], selected_image_markdown)

      run_button = gr.Button("Run")
      output_gallery = gr.Gallery(label="Output Images")
      run_button.click(run_demo, [prompt_text, num_samples], output_gallery)

  with gr.Column(scale=1):
            content_text = gr.Textbox(label="Collect all properties", lines=5)
            content_text.submit(save_text, content_text, content_text)

demo.launch(debug=True)


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://a3d09237f0346a9c12.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/gradio/queueing.py", line 532, in process_events
    response = await route_utils.call_process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/route_utils.py", line 276, in call_process_api
    output = await app.get_blocks().process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 1928, in process_api
    result = await self.call_function(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 1514, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/usr/local/lib/python3.10/dist-packages/anyio/to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
    return await future
  File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 807, in run
    r

  0%|          | 0/50 [00:00<?, ?it/s]

  return F.conv2d(input, weight, bias, self.stride,


  0%|          | 0/50 [00:00<?, ?it/s]