<a href="https://colab.research.google.com/github/JMQuehl/stable-diffusion/blob/colab/colab_notebooks/Stable_Diffusion_All_in_One.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Stable Diffusion** 
In order to use this colab, first get an access token for the stable diffusion weights hosted on huggingface (https://huggingface.co/CompVis/stable-diffusion). If you accepted the license for this model and generated a key, put that key into the access_token field below.

If you get cuda_out_of_memory or similar errors, try using the lower precision model. There is an (optional) SFW filter, which you can enable below in order to ensure that no offensive content might be created.

In [None]:
access_token = "put_your_token_here" #@param{type: 'string'}
use_lower_precision = False #@param{type: 'boolean'}
use_pipeline_with_safety_filter = False #@param{type: 'boolean'}

# 1. Setup

First, please make sure you are using a GPU runtime to run this notebook, so inference is much faster. If the following command fails, use the `Runtime` menu above and select `Change runtime type`.

In [None]:
!nvidia-smi

Prepare and set up libraries for Stable Diffusion

In [None]:
!pip install diffusers==0.2.4
!pip install transformers scipy ftfy
!pip install "ipywidgets>=7,<8"
!wget https://raw.githubusercontent.com/huggingface/diffusers/4674fdf807cdefd4db1758067c0207872d805f8c/examples/inference/image_to_image.py

from google.colab import output
output.enable_custom_widget_manager()

Prepare Stable Diffusion Pipelines for Text2Image and Image2Image

In [None]:
import torch
import gc
from diffusers import StableDiffusionPipeline
from image_to_image import StableDiffusionImg2ImgPipeline, preprocess

if use_lower_precision: 
    pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=access_token) 
    im2im_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=access_token)
else:
    pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=access_token)
    im2im_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=access_token)


if not use_pipeline_with_safety_filter:
  def dummy_checker(images, **kwargs): return images, False
  pipe.safety_checker = dummy_checker
  im2im_pipe.safety_checker = dummy_checker

pipe = pipe.to("cuda")
im2im_pipe = im2im_pipe.to("cuda")

Define Helper Function(s)

In [None]:
from PIL import Image
def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid


# 2. Settings

In [None]:
import os
from google.colab import files
import shutil
#@markdown #####**General-Settings:**
prompt = " Amazing, complex, intricate and highly detailed treehouse in a snow covered bonsai tree on top of a table, steampunk, vibrant colors, vibrant, beautiful, contrast, neon highlights, Highly detailed, ray tracing, digital painting, artstation, concept art, smooth, sharp focus, illustration, art by Beeple, Mike Winklemann, 8 k" #@param{type: 'string'}
width = 512 #@param{type: 'number'}
height = 512 #@param{type: 'number'}
steps = 50 #@param{type: 'number'}
use_seed = False #@param{type: 'boolean'}
seed = 1240415754 #@param{type: 'number'}
guidance_scale = 7.5 #@param{type: 'number'}
num_cols = 2 #@param{type: 'number'}
num_rows = 2 #@param{type: 'number'}
batch_size = num_rows * num_cols

#@markdown #####**Init_Image-Settings:**
use_init_image = False #@param{type: 'boolean'}
init_image_strength = 0.6 #@param{type: 'number'}
upload_new_image = True #@param{type: 'boolean'}
#@markdown if a new image is uploaded that takes precedence over the batch image.
use_image_from_previous_batch_as_init = True #@param{type: 'boolean'}
batch_image_number = 0 #@param{type: 'number'}
resize = False #@param{type: 'boolean'}

if height > 512 or width > 512:
  print("Warning: If height or width are larger than 512 coherence might be lost!")

generator = torch.Generator("cuda")

if use_init_image :
  if use_image_from_previous_batch_as_init and "all_images" in globals():
    init_image = all_images[batch_image_number]

  if not "init_image" in globals() or upload_new_image: 
    upload_folder = 'upload'
    if os.path.isdir(upload_folder):
        shutil.rmtree(upload_folder)
    os.mkdir(upload_folder)
    # upload images
    uploaded = files.upload()

    for filename in uploaded.keys() :
      dst_path = os.path.join(upload_folder, filename)
      print(f'move {filename} to {dst_path}')
      shutil.move(filename, dst_path)
    filename = os.path.join(upload_folder, list(uploaded.keys())[0])
    init_image = Image.open(filename).convert("RGB")


  if (resize):
    init_image = init_image.resize((width, height))
  init_image_cuda = preprocess(init_image)
  print ("Init image will be: ")
  display(init_image)

# 3. Run Standard Pipeline

If using a seed this image will always be the same for the same *prompt*



In [None]:
gc.collect()
torch.cuda.empty_cache()
from torch import autocast

if not use_seed :
  import random
  random.seed()
  seed = random.randint(0, 2**32)

generator = torch.Generator("cuda").manual_seed(seed)
print("using seed: {seed}".format(seed = seed))

prompts = [prompt] * num_cols

all_images = []
for i in range(num_rows):
  with autocast("cuda"):
    if not use_init_image or "init_image_cuda" not in globals():
      images = pipe(prompts, num_inference_steps=steps, height=height, width=width, generator=generator, guidance_scale=guidance_scale)["sample"]  # image here is in [PIL format](https://pillow.readthedocs.io/en/stable/)
    else:
      images = pipe(prompts, num_inference_steps=steps, generator=generator, guidance_scale=guidance_scale, init_image=init_image_cuda, strength=init_image_strength)["sample"]
  all_images.extend(images)

grid = image_grid(all_images, rows=num_rows, cols=num_cols)
grid

Set Upscaler Settings

In [None]:
image_to_enhance = 1 #@param{type: 'number'}
pil_img = all_images[image_to_enhance]

esrgan_model_name = "RealESRGAN_x4plus" #@param ["RealESRGAN_x4plus", "RealESRNet_x4plus", "RealESRGAN_x4plus_anime_6B", "RealESRGAN_x2plus", "realesr-animevideov3"]
outscale = 3.5 #@param{type: 'number'}
face_enhance = True #@param{type: 'boolean'}
all_images[image_to_enhance]

# 4. Prepare ESRGAN for upscaling an image

In [None]:
# Clone Real-ESRGAN and enter the Real-ESRGAN
!git clone https://github.com/xinntao/Real-ESRGAN.git
%cd Real-ESRGAN
# Set up the environment
!pip install basicsr
!pip install facexlib
!pip install gfpgan
!pip install -r requirements.txt
!python setup.py develop
# Download the pre-trained model
!wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P experiments/pretrained_models
!wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth -P experiments/pretrained_models
!wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth -P experiments/pretrained_models
!wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth -P experiments/pretrained_models
!wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth -P experiments/pretrained_models


Define ESRGAN-Function

In [None]:
import cv2
import glob
import os
import numpy
from basicsr.archs.rrdbnet_arch import RRDBNet

from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow

tile = 0
tile_pad = 10
pre_pad = 0
gpu_id = None
fp32 = True
alpha_upsampler = "realesrgan" # realesrgan | bicubic

torch.cuda.empty_cache()

# determine models according to model names
if esrgan_model_name in ['RealESRGAN_x4plus', 'RealESRNet_x4plus']:  # x4 RRDBNet model
    model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
    netscale = 4
elif esrgan_model_name in ['RealESRGAN_x4plus_anime_6B']:  # x4 RRDBNet model with 6 blocks
    model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
    netscale = 4
elif esrgan_model_name in ['RealESRGAN_x2plus']:  # x2 RRDBNet model
    model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
    netscale = 2
elif esrgan_model_name in ['realesr-animevideov3']:  # x4 VGG-style model (XS size)
    model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
    netscale = 4

# determine model paths
model_path = os.path.join('experiments/pretrained_models', esrgan_model_name + '.pth')
if not os.path.isfile(model_path):
    model_path = os.path.join('realesrgan/weights', esrgan_model_name + '.pth')
if not os.path.isfile(model_path):
    raise ValueError(f'Model {esrgan_model_name} does not exist.')

# restorer
upsampler = RealESRGANer(
    scale=netscale,
    model_path=model_path,
    model=model,
    tile=tile,
    tile_pad=tile_pad,
    pre_pad=pre_pad,
    half=not fp32,
    gpu_id=gpu_id)

if face_enhance:  # Use GFPGAN for face enhancement
    from gfpgan import GFPGANer
    face_enhancer = GFPGANer(
        model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
        upscale=outscale,
        arch='clean',
        channel_multiplier=2,
        bg_upsampler=upsampler)

img = numpy.array(pil_img)
img = img[:, :, ::-1].copy()

if len(img.shape) == 3 and img.shape[2] == 4:
    img_mode = 'RGBA'
else:
    img_mode = None

try:
    if face_enhance:
        _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
    else:
        output, _ = upsampler.enhance(img, outscale=args.outscale)
except RuntimeError as error:
    print('Error', error)
    print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')

cv2_imshow(output)
#plt.imshow(cv2.cvtColor(output, cv2.COLOR_BGR2RGB))

# 5. Run Upscaler

In [None]:
img = numpy.array(pil_img)
img = img[:, :, ::-1].copy()

if len(img.shape) == 3 and img.shape[2] == 4:
    img_mode = 'RGBA'
else:
    img_mode = None

try:
    if face_enhance:
        _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
    else:
        output, _ = upsampler.enhance(img, outscale=args.outscale)
except RuntimeError as error:
    print('Error', error)
    print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')

cv2_imshow(output)