In [None]:
from diffusers import AutoPipelineForText2Image
import torch
import numpy as np
from pathlib import Path
import json
from PIL import Image

In [None]:
model = 'runwayml/stable-diffusion-v1-5'
steps = 50

In [None]:
pipe = AutoPipelineForText2Image.from_pretrained(model, torch_dtype=torch.float16).to('cuda')

In [None]:
def get_reprs(prompt):
    reprs = []
    imgs = []
    def get_repr(module, input, output):
        reprs.append(output[0].cpu().numpy())
    def get_img(module, input, output):
        img = pipe.vae.decode(output[0] / 0.18215).sample[0].detach().cpu()  # where does 0.18215 come from?
        img = ((img/2+.5) * 255).clamp(0, 255).numpy().astype(np.uint8).squeeze().transpose(1, 2, 0)
        imgs.append(img)
    def latents_callback(i, t, latents):
        latents = 1 / 0.18215 * latents
        image = pipe.vae.decode(latents).sample[0]
        image = (image / 2 + 0.5).clamp(0, 1)
        image = image.cpu().permute(1, 2, 0).numpy()
        imgs.extend(pipe.numpy_to_pil(image))

    with pipe.unet.mid_block.register_forward_hook(get_repr):
        pipe(prompt, callback=latents_callback, callback_steps=1)
    return reprs, imgs

prompts = ['A photo of a cat.', 'A photo of a dog.']
for i, prompt in enumerate(prompts):
    print(f'Prompt: {prompt}')
    save_path = Path(f'output{i}/')
    save_path.mkdir(exist_ok=True)
    reprs, imgs = get_reprs(prompt)
    # saving representations
    with open(save_path / 'repr.bin', 'wb') as f:
        f.write(np.array(np.stack(reprs), dtype=np.float32).tobytes())
    # saving result
    for j, img in enumerate(imgs, 1):
        img.save(save_path / f'{j}.png')
    # save config
    with open(save_path / 'config.json', 'w') as f:
        f.write(json.dumps({'model': model, 'steps': steps, 'prompt': prompt}))
    with open(save_path / '.gitignore', 'w') as f:
        f.write('*')
