# El arte de la imaginación mecanizada - Ejercicio 1

#### Prácticas y pensamiento artísticos en torno al giro tecnológico y a Inteligencias Artificiales, Bilbo, 11 de julio de 2023

Basado muy fuertemente en [Grokking Stable Diffusion](https://colab.research.google.com/drive/1dlgggNa5Mz8sEAGU0wFCHhGLFooW_pf1?usp=sharing) de [Jonathan Whitaker](https://github.com/johnowhitaker) y en [Stable Diffusion with 🧨 diffusers](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb) de [🤗 Hugging Face](https://github.com/huggingface/diffusers)



In [None]:
#@markdown #Comprobar GPU
!nvidia-smi -L

In [None]:
#@markdown #Librerías + Modelos + Funciones de ayuda

from IPython.display import clear_output

!pip install diffusers==0.3.0
!pip install transformers scipy ftfy

from google.colab import files
from transformers import CLIPTextModel, CLIPTokenizer
from tqdm.auto import tqdm
from torch import autocast
from PIL import Image
from huggingface_hub import notebook_login
from diffusers import AutoencoderKL, UNet2DConditionModel, LMSDiscreteScheduler
import torch
import re
clear_output()


vae = AutoencoderKL.from_pretrained(
    "runwayml/stable-diffusion-v1-5", subfolder="vae")
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
unet = UNet2DConditionModel.from_pretrained(
    "runwayml/stable-diffusion-v1-5", subfolder="unet")
scheduler = LMSDiscreteScheduler(
    beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
clear_output()

torch_device = "cuda" if torch.cuda.is_available() else "cpu"
vae = vae.to(torch_device)
text_encoder = text_encoder.to(torch_device)
unet = unet.to(torch_device)


def latents_to_pil(latents):
    latents = (1 / 0.18215) * latents
    with torch.no_grad():
        images = vae.decode(latents).sample
    images = (images / 2 + 0.5).clamp(0, 1)
    images = images.detach().cpu().permute(0, 2, 3, 1).numpy()
    images = (images * 255).round().astype("uint8")
    pil_images = [Image.fromarray(image) for image in images]

    return pil_images


def image_grid(imgs, cols):
    grid_w = min([cols, len(imgs)])
    grid_h = len(imgs)//cols + 1
    w, h = imgs[0].size
    grid = Image.new('RGB', size=(grid_w*w, grid_h*h))
    #grid_w, grid_h = grid.size
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i % cols*w, i//cols*h))

    return grid


def get_seed_gen(seed):
    if seed == 0:
      seed = torch.randint(2**32, (1, 1))[0, 0].item()
    generator = torch.Generator(device=torch_device)
    generator.manual_seed(seed)
    return seed, generator


def render(steps, scale, generator, embeddings, batch_size):

    width = 512
    height = 512

    scheduler.set_timesteps(steps)


    latents = torch.randn(
        (batch_size, unet.in_channels, height // 8, width // 8),
        generator=generator,
        device=torch_device
    )
    # latents = latents.to(torch_device)  # [batch_size, 4, 64, 64]
    latents = latents * scheduler.sigmas[0]

    with autocast("cuda"):

        for i, t in tqdm(enumerate(scheduler.timesteps), total=steps):

            sigma = scheduler.sigmas[i]
            latent_model_input = torch.cat(
                [latents] * 2)  # [batch_size*2, 4, 64, 64]
            latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)

            with torch.no_grad():
                # [2, 4, 64, 64]
                noise_pred = unet(latent_model_input, t,
                                  encoder_hidden_states=embeddings).sample

            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
            noise_pred = noise_pred_uncond + scale * \
                (noise_pred_text - noise_pred_uncond)  # [1, 4, 64, 64]

            latents = scheduler.step(
                noise_pred, i, latents).prev_sample  # [1, 4, 64, 64]
            # print(f"i={i} t={t}, sigma={sigma}")

    return latents_to_pil(latents)


token_EOS_value = 49407


In [None]:
#@markdown #Exploración no guiada

num_images = 1

prompt = "potruflo, award winning photograph"  # @param {type:"string"}
semilla = 498748461  # @param {type:"number"}
guia = 7.5  # -@param {type:"slider", min:4.5, max:21, step:1.5}
pasos = 35  # @param {type:"slider", min:25, max:100, step:5}


prompts = [""] * num_images  # Unconditional
prompts.extend([prompt] * num_images)  # Prompts


inputs = tokenizer(prompts, padding="max_length", max_length=tokenizer.model_max_length,
                   truncation=True, return_tensors="pt")  # [num_images * 2, 77]

with torch.no_grad():
  text_embeddings = text_encoder(inputs.input_ids.to(torch_device))[
      0]  # [num_images * 2, 77, 768]

token_list = inputs.input_ids[1].tolist()


semilla, generator = get_seed_gen(semilla)
images = render(pasos, guia, generator, text_embeddings, num_images)
filename = f"1_{prompt}_{semilla}_{pasos}.jpg"

print(f"Semilla: {semilla}")
print(f"Fichero: {filename}\n")

grid = image_grid(images, cols=3)
grid.save(filename, quality=100, subsampling=0)
files.download(filename)
grid
