In [1]:
prompt_clip = "A photograph of a serene urban scene features a green clock on a pole, surrounded by a brick sidewalk, trees, and parked bicycles. Two men converse on the sidewalk, wearing various attire, including red shirts, grey hoodies, and sneakers. Cars and a white truck are parked along the street, with a parking meter on the curb."

prompt_t5 = "The scene depicts a vibrant and bustling urban setting, with a tall, green clock standing prominently on the sidewalk. The clock is surrounded by a variety of people, including two men conversing on the sidewalk, one wearing a red shirt and the other a grey hoodie. The men are standing on a brick-paved sidewalk, lined with trees that have sparse foliage. In the background, a white truck is parked on the curb, with bicycles chained to a pole nearby. A parking meter stands on the sidewalk, with a yellow top and a few coins inserted. To the left of the clock, a white work truck is parked, with a red vehicle parked behind it. A brick building with a white canopy stands nearby, with a black sign above the entrance. The building's exterior is made of white bricks, with a few windows and a door. A tall, metal light post stands nearby, casting a warm glow over the scene. The atmosphere is lively, with the sound of people chatting and the hum of traffic in the background. The sun is shining brightly, casting long shadows across the sidewalk and buildings. The overall impression is one of a thriving urban community, with a mix of old and new architecture and a sense of energy and activity."

In [None]:
import torch
from diffusers import StableDiffusionXLPipeline
import os

repo_id = "stabilityai/stable-diffusion-xl-base-1.0"
path_root = os.getcwd()
cache_dir = os.path.join(path_root, "cache")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

pipeline_text2image = StableDiffusionXLPipeline.from_pretrained(
    repo_id, torch_dtype=torch.float16, variant="fp16", use_safetensors=True,
    cache_dir=cache_dir
).to(device)

In [None]:
import torch, os
from diffusers import DiffusionPipeline

base_repo_id = "stabilityai/stable-diffusion-xl-base-1.0"
refiner_repo_id = "stabilityai/stable-diffusion-xl-refiner-1.0"
path_root = os.getcwd()
cache_dir = os.path.join(path_root, "cache")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# .to(device)

base = DiffusionPipeline.from_pretrained(
    base_repo_id, torch_dtype=torch.float16, variant="fp16", use_safetensors=True,
    cache_dir=cache_dir
)
base.enable_model_cpu_offload(gpu_id=0)

refiner = DiffusionPipeline.from_pretrained(
    refiner_repo_id,
    text_encoder_2=base.text_encoder_2,
    vae=base.vae,
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
    cache_dir=cache_dir
)
refiner.enable_model_cpu_offload(gpu_id=0)

# base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True)
# refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)

In [None]:

img_base = base(
    prompt=prompt_clip,
    num_inference_steps=50,
    denoising_end=0.8,
    guidance_scale=8.0,
    output_type="latent"
).images 
image = refiner( 
    prompt=prompt_clip,
    num_inference_steps=50,
    denoising_start=0.8,
    guidance_scale=8.0,
    image=img_base
).images[0]
image 

In [None]:
import generator as gen
import os, torch

base_repo_id = "stabilityai/stable-diffusion-xl-base-1.0"
refiner_repo_id = "stabilityai/stable-diffusion-xl-refiner-1.0"
path_root = os.getcwd()
cache_dir = os.path.join(path_root, "cache")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

base, refiner = gen.get_sdxl_pipeline(base_repo_id, refiner_repo_id, cache_dir, gpu_id=0)

In [None]:
prompts = [prompt_clip, prompt_clip]
num_inference_steps = 50

images = gen.run_sdxl_t2i(base, refiner, num_inference_steps, prompts, manual_seed=True, seed=8)

In [None]:
img_path = "/home/jovyan/3MDBench/data/IMAGEs/VISUAL_GENOME/selected/1.jpg"
prompts = [prompt_clip, prompt_clip]
num_inference_steps = 50
img_width = img_height = 1024

images = gen.run_sdxl_imi(refiner, num_inference_steps, img_width, img_height, prompts, img_path)

In [None]:
images[0]