In [None]:
import os
import torch
import transformers

from configparser import ConfigParser
from torch import autocast
from diffusers import StableDiffusionPipeline
from PIL import Image
from datetime import datetime

parser = ConfigParser()
_ = parser.read('stable-diff.cfg')
HF_TOKEN = parser.get('hugging_face', 'token')

def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

pipe = StableDiffusionPipeline.from_pretrained(
    "CompVis/stable-diffusion-v1-4", 
    revision="fp16", 
    torch_dtype=torch.float16,
    use_auth_token=HF_TOKEN
)
pipe = pipe.to("cuda")
pipe.enable_attention_slicing()

num_runs = 20
num_images = 3
prompt = ["A knight with a sword and shield facing an incoming avalanche of skulls, comic style, colorful, scenic, epic, detailed"] * num_images

for _ in range(num_runs):
    with autocast("cuda"):
        images = pipe(prompt, guidance_scale=7.5, num_inference_steps=100, height=512, width=512).images

    grid = image_grid(images, rows=1, cols=num_images)
    display(grid)
    grid.save(f"{parser.get('general', 'out_dir')}/{datetime.now()} {prompt[0]}.png")