In [2]:
# Install dependencies.
!pip install -q --upgrade bitsandbytes transformers accelerate diffusers

In [None]:
# Set the details for your model here:
import torch

from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline, AutoencoderKL, KDPM2AncestralDiscreteScheduler

vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
base = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0",
    vae=vae,
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True,
)

base2 = StableDiffusionXLImg2ImgPipeline(**base.components)

_ = base.to("cuda")
_ = base2.to("cuda")

torch.cuda.empty_cache()

In [None]:
import os
import zipfile
import random

prompt = f"Steampunk photo of antropomorphic suricata, pixar, cute, in white mecha armor looking happy, daylight lush nature park background"
quality = "intricate details even to the smallest particle, extreme detail of the enviroment, sharp portrait, well lit, interesting outfit, beautiful shadows, bright, photoquality, ultra realistic, masterpiece, 8k"
negative_prompt = "helmet, ugly, old, boring, photoshopped, tired, wrinkles, scar, gray hair, big forehead, crosseyed, dumb, stupid, cockeyed, disfigured, blurry, assymetrical, unrealistic, grayscale, black and white, bald, high hairline, balding, receeding hairline, grayscale, bad anatomy, unnatural irises, no pupils, blurry eyes, dark eyes, extra limbs, deformed, disfigured eyes, out of frame, no irises, assymetrical face, broken fingers, extra fingers, disfigured hands"
num_samples = 1
guidance_scale = 8
num_inference_steps = 30
height = 1024
width = 1024
seed = random.randint(1, 99999)

prompt = prompt + ". " + quality

# Set this to the folder you want to save the image to in Google Drive
output_dir = "drive/MyDrive/manually_generated"
os.makedirs(output_dir, exist_ok=True)
denoising_split = 0.5

all_latents = []

def variate(step, timestep, latents):
    all_latents.append(latents)

images = base(
    prompt,
    height=height,
    width=width,
    negative_prompt=negative_prompt,
    num_images_per_prompt=num_samples,
    num_inference_steps=num_inference_steps,
    guidance_scale=guidance_scale,
    generator=torch.manual_seed(2222),
    callback = variate
).images

colors = ["white", "blue", "red", "black", "green", "orange", "yellow", "purple", "pink", "golden", "diamond", "energy"]

for step, latents in enumerate(all_latents):
    denoising_split = (step+1) / num_inference_steps
    part = base2(
        prompt.replace("white", colors[step%len(colors)]),
        negative_prompt=negative_prompt,
        image = latents,
        num_inference_steps=num_inference_steps//2,
        guidance_scale=15,
        denoising_start=denoising_split
    ).images
    print("step", step)
    for image in part:
        display(image)

for image in images:
    print("final result")
    display(image)