In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%%capture
!pip install diffusers
!pip install torch
!pip install accelerate
!pip install transformers
!pip install PIL
!pip install ipywidgets
import os
import datetime
import accelerate
from PIL import Image
import torch
from transformers import CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler
from diffusers import UniPCMultistepScheduler #use different scheduler
from ipywidgets import widgets
from ipywidgets import Layout

# For GFPGAN
!pip install basicsr
!pip install facexlib
!pip install -r requirements.txt
!python setup.py develop
!pip install realesrgan #only need this if we want to enhance the background as well

# Initialise some stuff
dictModels = {"Deliberate": "XpucT/Deliberate",
              "Reliberate": "stablediffusionapi/Reliberate"}
widModel = widgets.Dropdown(options=dictModels.keys(), description="Model", disabled=False)
widPrompt = widgets.Text(description="Prompt", disabled=False,layout=Layout(width='90%'))
widNegPrompt = widgets.Text(description="Negative Prompt", disabled=False,layout=Layout(width='90%', description_width='100px'))
widCFG = widgets.BoundedIntText(description="CFG", disabled=False, value=7.5, min=0, max=100, step=0.01)
widSteps = widgets.BoundedIntText(description="Steps", disabled=False, value=30, min=1, max=200, step=1)
widWidth = widgets.BoundedIntText(description="Width", disabled=False, value=512, min=1, step=1, max=2000)
widHeight = widgets.BoundedIntText(description="Height", disabled=False, value=512, min=1, step=1, max=2000)
widSeed = widgets.BoundedIntText(description="Seed", disabled=False, value=0, step=1) #need to set a max value here
widNoImages = widgets.BoundedIntText(description="Batch Size", disabled=False, value=1, step=1) #need to set a max value here
widGFP = widgets.ToggleButton(description="GFPGAN Upscale", value=True, icon='check')
try:
  os.mkdir("/content/gdrive/MyDrive/SD_test/outputs")
except:
  pass

try:
  os.mkdir("/content/gdrive/MyDrive/SD_test/gfpgan")
  !git clone https://github.com/TencentARC/GFPGAN.git /content/gdrive/MyDrive/SD_test/gfpgan
except:
  pass
gfPath = "/content/gdrive/MyDrive/SD_test/gfpgan/inference_gfpgan.py"

In [None]:
display (widModel)

Dropdown(description='Model', options=('Deliberate', 'Reliberate'), value='Deliberate')

In [None]:
# Set model
%%capture
modPath = dictModels[widModel.value]
vae = AutoencoderKL.from_pretrained(modPath, subfolder="vae") #to transform latent back into image
tokenizer = CLIPTokenizer.from_pretrained(modPath, subfolder="tokenizer") #Tokenizer and encoder to create embeddings
text_encoder = CLIPTextModel.from_pretrained(modPath, subfolder="text_encoder")
unet = UNet2DConditionModel.from_pretrained(modPath,subfolder="unet") # Main unet model
torch_device = "cuda" #Move to GPU to increase speed
vae.to(torch_device)
text_encoder.to(torch_device)
unet.to(torch_device)
# Set scheduler
scheduler = UniPCMultistepScheduler.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="scheduler")

# Define function to gnerate images
def txt2img(prompt, negPrompt, imgWidth, imgHeight, guidance_scale, num_inference_steps, seed):
  # Tokenize the text - tokenizer defined above
  text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
  neg_text_input = tokenizer(negPrompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
  batch_size = len(prompt)
  generator = torch.manual_seed(seed)  # Seed generator to create the inital latent noise
  # Create embeddings from the prompt
  with torch.no_grad():
    text_embeddings = text_encoder(text_input.input_ids.to(torch_device))[0]
    uncond_embeddings = text_encoder(neg_text_input.input_ids.to(torch_device))[0]

  #concatenate the conditional and unconditional embeddings into a batch to avoid doing two forward passes:
  text_embeddings = torch.cat([uncond_embeddings, text_embeddings])

  # Create random noise
  # Latent image (size is smaller than the final image but will be transformed back later)
  latents = torch.randn((batch_size,unet.in_channels, imgHeight //8, imgWidth // 8),generator=generator,)
  latents = latents.to(torch_device)
  # Denoise the image
  #Start by scaling the noise by noise scale value, sigma (require for some improved schedulers)
  latents = latents * scheduler.init_noise_sigma
  from tqdm.auto import tqdm
  scheduler.set_timesteps(num_inference_steps) #set scheduler's timestep
  for t in tqdm(scheduler.timesteps):
    # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes
    latent_model_input = torch.cat([latents]*2)
    latent_model_input = scheduler.scale_model_input(latent_model_input,timestep=t)
    #predict the noise residual
    with torch.no_grad():
      noise_pred = unet(latent_model_input,t,encoder_hidden_states=text_embeddings).sample
    #perform gudance
    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
    #compute the previous noist sample x_t -> x_t - 1
    latents = scheduler.step(noise_pred,t,latents).prev_sample

  #Decode the image - use the vae to decode the latent representation into an image and get the decoded output with sample
  # scale and decode the image latents with vae
  latents = 1 / 0.18215 * latents
  with torch.no_grad():
    image = vae.decode(latents).sample

  # convert to PIL.image
  image = (image / 2 + 0.5).clamp(0, 1)
  image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
  images = (image * 255).round().astype("uint8")
  pil_images = [Image.fromarray(image) for image in images]
  return pil_images

In [None]:
display(widPrompt,widNegPrompt,widCFG,widSteps,widWidth,widHeight,widNoImages,widGFP) #Need to arrange properly

Text(value='', description='Prompt', layout=Layout(width='90%'))

Text(value='', description='Negative Prompt', layout=Layout(width='90%'))

BoundedIntText(value=7, description='CFG', step=0)

BoundedIntText(value=30, description='Steps', max=200, min=1)

BoundedIntText(value=512, description='Width', max=2000, min=1)

BoundedIntText(value=512, description='Height', max=2000, min=1)

BoundedIntText(value=1, description='Batch Size')

ToggleButton(value=True, description='GFPGAN Upscale', icon='check')

In [None]:
generator = torch.Generator(device=torch_device)

# Make output folder
now = datetime.datetime.now().timetuple()[0:5]
pngPath = "/content/gdrive/MyDrive/SD_test/outputs/" + "-".join(str(x) for x in now) + "/"
try:
  os.mkdir(pngPath)
except:
  pass

for i in range(1, widNoImages.value+1):
  print(i)
  seed = generator.seed() #create random seed
  genimage = txt2img(prompt=[widPrompt.value], negPrompt=[widNegPrompt.value], imgWidth=widWidth.value,
                  imgHeight=widHeight.value, guidance_scale=widCFG.value, num_inference_steps=widSteps.value, seed=seed)
  savePath = pngPath + str(seed) + ".png"
  genimage[0].save(savePath,"PNG")

if widGFP:
  os.system("python " + gfPath + " -i " + pngPath + " -o " + pngPath)

1


  latents = torch.randn((batch_size,unet.in_channels, imgHeight //8, imgWidth // 8),generator=generator,)


  0%|          | 0/100 [00:00<?, ?it/s]

2


  0%|          | 0/100 [00:00<?, ?it/s]

3


  0%|          | 0/100 [00:00<?, ?it/s]

4


  0%|          | 0/100 [00:00<?, ?it/s]

5


  0%|          | 0/100 [00:00<?, ?it/s]

6


  0%|          | 0/100 [00:00<?, ?it/s]

7


  0%|          | 0/100 [00:00<?, ?it/s]

8


  0%|          | 0/100 [00:00<?, ?it/s]

9


  0%|          | 0/100 [00:00<?, ?it/s]

10


  0%|          | 0/100 [00:00<?, ?it/s]

v03  Implemented random seed

v04. Model selection

V05. Prompt input box, negative prompts

V06. Basic saving with seed filename

V07. txt2img function

v08. Batch generation, save to folder for batch

v09. GFPGAN upscale (2x upscale)

From write_own_pipeline.ipynb