In [None]:
!git clone https://github.com/huggingface/diffusers.git
!cd /content/diffusers && pip install .
!pip install transformers scipy ftfy accelerate
!pip install "ipywidgets>=7,<8"

### Latent Generation


Allows to tweak your prompts on a specific result you liked by generating own latents.

In order to reuse the seeds we need to generate the latents ourselves. Otherwise, the pipeline will do it internally and we won't have a way to replicate them.
Latents are the initial random Gaussian noise that gets transformed to actual images during the diffusion process.
To generate them, we'll use a different random seed for each latent, and we'll save them so we can reuse them later.


Based on: https://colab.research.google.com/github/pcuenca/diffusers-examples/blob/main/notebooks/stable-diffusion-seeds.ipynb#scrollTo=cf996058

In [None]:
import torch
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler

scheduler = EulerDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-2-1-base", subfolder="scheduler")

# revision and dtype make sure that we use lower GPU memory but maybe sacrifice some of the quality
pipe = StableDiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-2-1-base",
    scheduler=scheduler, 
    revision="fp16", 
    torch_dtype=torch.float16,
    requires_safety_checker=False
).to("cuda")

In [None]:
from PIL import Image

def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

In [None]:
num_images = 4

width = 512
height = 512

generator = torch.Generator(device="cuda")

latents = None
seeds = []
for _ in range(num_images):
    # Get a new random seed, store it and use it as the generator state
    seed = generator.seed()
    seeds.append(seed)
    generator = generator.manual_seed(seed)
    
    image_latents = torch.randn(
        (1, pipe.unet.in_channels, height // 8, width // 8),
        generator = generator,
        device = "cuda"
    )
    latents = image_latents if latents is None else torch.cat((latents, image_latents))
    
# latents should have shape (4, 4, 64, 64) in this case
latents.shape

In [None]:
# Now send latents to pipe
prompt = "Labrador in the style of Vermeer"

with torch.autocast("cuda"):
    images = pipe(
        [prompt] * num_images,
        guidance_scale=7.5,
        latents = latents,
    ).images

image_grid(images, 2, 2)

### We want to have more of style 3

In [None]:
seed = seeds[2]   # Third
seed

In [None]:
# Regenerate latents given seed
generator.manual_seed(seed)

latents = torch.randn(
    (1, pipe.unet.in_channels, height // 8, width // 8),
    generator = generator,
    device = "cuda"
)

In [None]:
with torch.autocast("cuda"):
    image = pipe(
        [prompt] * 1,
        guidance_scale=7.5,
        latents = latents,
    ).images
    
image[0]

In [None]:
prompt = "Clown in the style of Vermeer"

with torch.autocast("cuda"):
    image = pipe(
        [prompt] * 1,
        guidance_scale=7.5,
        latents = latents,
    ).images
    
image[0]