In [None]:
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [None]:
import torch
import math
import matplotlib.pyplot as plt
import numpy as np

from PIL import Image
from diffusers import FluxPipeline
from torch import Tensor
from torchvision import transforms

DTYPE = torch.bfloat16
pipe = FluxPipeline.from_pretrained("/root/autodl-tmp/Flux-dev", torch_dtype=DTYPE)
pipe.to("cuda")

In [None]:
@torch.inference_mode()
def decode_imgs(latents, pipeline):
    imgs = (latents / pipeline.vae.config.scaling_factor) + pipeline.vae.config.shift_factor
    imgs = pipeline.vae.decode(imgs)[0]
    imgs = pipeline.image_processor.postprocess(imgs, output_type="pil")
    return imgs

@torch.inference_mode()
def encode_imgs(imgs, pipeline):
    latents = pipeline.vae.encode(imgs).latent_dist.sample()
    latents = (latents - pipeline.vae.config.shift_factor) * pipeline.vae.config.scaling_factor
    latents = latents.to(dtype=DTYPE)
    return latents

def get_noise(
    num_samples: int,
    height: int,
    width: int,
    device: torch.device,
    dtype: torch.dtype,
    seed: int,
):
    return torch.randn(  # [B, 16, H // 8, W // 8], latents after VAE
        num_samples,
        16,
        2 * math.ceil(height / 16),
        2 * math.ceil(width / 16),
        device=device,
        dtype=dtype,
        generator=torch.Generator(device=device).manual_seed(seed),
    )

In [None]:
def time_shift(mu: float, sigma: float, t: Tensor):
    return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)

def get_lin_function(
    x1: float = 256, y1: float = 0.5, x2: float = 4096, y2: float = 1.15
):
    m = (y2 - y1) / (x2 - x1)
    b = y1 - m * x1
    return lambda x: m * x + b

def get_schedule(
    num_steps: int,
    image_seq_len: int,
    base_shift: float = 0.5,
    max_shift: float = 1.15,
    shift: bool = True,
) -> list[float]:
    timesteps = torch.linspace(1, 0, num_steps + 1)
    if shift:
        mu = get_lin_function(y1=base_shift, y2=max_shift)(image_seq_len)
        timesteps = time_shift(mu, 1.0, timesteps)
    return timesteps.tolist()

timesteps = get_schedule( # shape: [num_inference_steps]
            num_steps=50,
            image_seq_len=(1024 // 16) * (1024 // 16), # vae_scale_factor = 16
            shift=True,  # Set True for Flux-dev, False for Flux-schnell
        )

print(timesteps)

In [None]:
@torch.inference_mode()
def forward_denoise(pipeline, num_steps, prompt, height, width, 
                    guidance_scale=3.5, 
                    seed=0,
                    base_shift=0.5,
                    max_shift=1.15):
    timesteps = get_schedule( # shape: [num_inference_steps]
            num_steps=num_steps,
            image_seq_len=(height // 16) * (width // 16), # vae_scale_factor = 16
            base_shift = base_shift,
            max_shift = max_shift,
            shift=True,  # Set True for Flux-dev, False for Flux-schnell
        )
    
    prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(prompt=prompt, prompt_2=prompt)

    noise = get_noise( # save, shape [num_samples, 16, resolution // 8, resolution // 8]
        num_samples=1,
        height=height,
        width=width,
        device="cuda",
        dtype=DTYPE,
        seed=seed,
    )

    latent_image_ids = FluxPipeline._prepare_latent_image_ids(
        noise.shape[0],
        noise.shape[2],
        noise.shape[3],
        noise.device,
        DTYPE,
    )

    packed_latents = FluxPipeline._pack_latents( # shape [num_samples, (resolution // 16 * resolution // 16), 16 * 2 * 2]
        noise,
        batch_size=noise.shape[0],
        num_channels_latents=noise.shape[1],
        height=noise.shape[2],
        width=noise.shape[3],
    )
    loaded_lora = False
    threshold = 0.65
    # Reversed denoising loop in latent space
    with pipeline.progress_bar(total=len(timesteps)-1) as progress_bar:
        for t_curr, t_prev in zip(timesteps[:-1], timesteps[1:]):
            t_vec = torch.full((packed_latents.shape[0],), t_curr, dtype=packed_latents.dtype, device=packed_latents.device)
            guidance_vec = torch.full((packed_latents.shape[0],), guidance_scale, device=packed_latents.device, dtype=packed_latents.dtype)
            
            # if t_vec[0] > threshold and not loaded_lora:
            #     pipeline.unload_lora_weights()
            # elif not loaded_lora: 
            #     pipeline.unload_lora_weights()
            #     pipeline.load_lora_weights("/root/autodl-tmp/data/2rf_general_ut_13k.safetensors", adapter_name="reflow")
            #     pipeline.set_adapters(["reflow"], adapter_weights=[1.0])
            #     loaded_lora = True
                
            # if t_vec[0] > threshold and not loaded_lora:
            #     pipeline.unload_lora_weights()
            #     pipeline.load_lora_weights("/root/autodl-tmp/data/2rf_general_ut_13k.safetensors", adapter_name="reflow")
            #     pipeline.set_adapters(["reflow"], adapter_weights=[1.0])
            #     loaded_lora = True
            # elif loaded_lora and t_vec[0] <= threshold: 
            #     pipeline.unload_lora_weights()
            #     loaded_lora = False
            
            print(f"time step: {t_vec[0]}, loaded_lora: {loaded_lora}")
            
            pred = pipeline.transformer(
                    hidden_states=packed_latents, # shape: [batch_size, seq_len, num_channels_latents], e.g. [1, 4096, 64] for 1024x1024
                    timestep=t_vec,        # range: [0, 1]
                    guidance=guidance_vec, # scalar guidance values for each sample in the batch
                    pooled_projections=pooled_prompt_embeds, # CLIP text embedding
                    encoder_hidden_states=prompt_embeds,     # T5 text embedding
                    txt_ids=text_ids,
                    img_ids=latent_image_ids,
                    joint_attention_kwargs=None,
                    return_dict=pipeline,
                )[0]
            packed_latents = packed_latents.to(torch.float32)
            pred = pred.to(torch.float32)
            packed_latents = packed_latents + (t_prev - t_curr) * pred
            packed_latents = packed_latents.to(DTYPE)
            # img_latents = packed_latents - t_curr * pred
            # img_latents = FluxPipeline._unpack_latents( # save, shape [num_samples, 16, resolution//8, resolution//8]
            #     img_latents,
            #     height=height,
            #     width=width,
            #     vae_scale_factor=pipeline.vae_scale_factor,
            # )
            # img_pred = decode_imgs(img_latents, pipe)[0]
            # img_pred.save(f"./samples/process/newyork_m1.15_{t_curr:03f}.png")
            progress_bar.update()
            
    img_latents = FluxPipeline._unpack_latents( # save, shape [num_samples, 16, resolution//8, resolution//8]
            packed_latents,
            height=height,
            width=width,
            vae_scale_factor=pipeline.vae_scale_factor,
    )
    return img_latents

In [None]:
@torch.inference_mode()
def stochasitc_forward_denoise(pipeline, num_steps, prompt, height, width, guidance_scale=3.5, seed=0):
    generator = torch.Generator(device="cuda").manual_seed(seed)
    
    timesteps = get_schedule( # shape: [num_inference_steps]
            num_steps=num_steps,
            image_seq_len=(height // 16) * (width // 16), # vae_scale_factor = 16
            shift=True,  # Set True for Flux-dev, False for Flux-schnell
        )
    
    prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(prompt=prompt, prompt_2=prompt)

    noise = get_noise( # save, shape [num_samples, 16, resolution // 8, resolution // 8]
        num_samples=1,
        height=height,
        width=width,
        device="cuda",
        dtype=DTYPE,
        seed=seed,
    )

    latent_image_ids = FluxPipeline._prepare_latent_image_ids(
        noise.shape[0],
        noise.shape[2],
        noise.shape[3],
        noise.device,
        DTYPE,
    )

    packed_latents = FluxPipeline._pack_latents( # shape [num_samples, (resolution // 16 * resolution // 16), 16 * 2 * 2]
        noise,
        batch_size=noise.shape[0],
        num_channels_latents=noise.shape[1],
        height=noise.shape[2],
        width=noise.shape[3],
    )

    overshot_func = lambda t, dt: t * (1 + dt)
    
    # Reversed denoising loop in latent space
    with pipeline.progress_bar(total=len(timesteps)-1) as progress_bar:
        for t_curr, t_prev in zip(timesteps[:-1], timesteps[1:]):
            t_vec = torch.full((packed_latents.shape[0],), t_curr, dtype=packed_latents.dtype, device=packed_latents.device)
            guidance_vec = torch.full((packed_latents.shape[0],), guidance_scale, device=packed_latents.device, dtype=packed_latents.dtype)
            pred = pipeline.transformer(
                    hidden_states=packed_latents, # shape: [batch_size, seq_len, num_channels_latents], e.g. [1, 4096, 64] for 1024x1024
                    timestep=t_vec,        # range: [0, 1]
                    guidance=guidance_vec, # scalar guidance values for each sample in the batch
                    pooled_projections=pooled_prompt_embeds, # CLIP text embedding
                    encoder_hidden_states=prompt_embeds,     # T5 text embedding
                    txt_ids=text_ids,
                    img_ids=latent_image_ids,
                    joint_attention_kwargs=None,
                    return_dict=pipeline,
                )[0]
            t = 1.0 - t_curr
            step_size = t_curr - t_prev
            t_next = min(t + step_size, 1)
            
            pred = pred.to(torch.float32)
            packed_latents = packed_latents.to(torch.float32)
            t_overshoot = min(overshot_func(t_next, step_size), 1)

            sample_overshoot = packed_latents - (t_overshoot - t) * pred 

            a = t_next / t_overshoot
            b = ((1 - t_next) ** 2 - (a - t_next) ** 2) ** (0.5)

            print(f"t = {t_curr}, overshoot t = {1 - t_overshoot}, a = {a}, b = {b}")
            
            noise = torch.randn(packed_latents.shape, 
                                dtype=packed_latents.dtype, 
                                device=packed_latents.device, 
                                generator=generator)
            packed_latents = sample_overshoot * a + noise * b
            packed_latents = packed_latents.to(DTYPE)
            progress_bar.update()
    
    img_latents = FluxPipeline._unpack_latents( # save, shape [num_samples, 16, resolution//8, resolution//8]
            packed_latents,
            height=height,
            width=width,
            vae_scale_factor=pipeline.vae_scale_factor,
    )
    return img_latents

In [None]:
prompts_and_seeds = [
    {
        "prompt": "A high resolution photo of a scientist, white background, photo-realistic, high-detail.",
        "seed": 123,
        "name": "scientist-photo"
    },
    {
        "prompt": "A vibrant, starry night sky illuminates a lively street café, with warm golden lights spilling from its windows. The café is nestled on a narrow cobblestone street, surrounded by rustic buildings with swirling, textured brushstrokes. Bold, dynamic colors—deep blues and glowing yellows—fill the scene. People are seated at small round tables, sipping coffee, and chatting. The atmosphere is cozy and inviting, yet full of movement and energy, capturing the timeless essence of a Van Gogh painting.",
        "seed": 123,
        "name": "starry-night-cafe"
    },
    {
        "prompt": "An exquisite gothic queen vampiress with dark blue hair and crimson red eyes: Her sensuous white skin gleams in the atmospheric, dense fog, creating an epic and dramatic mood. This hyper-realistic portrait is filled with morbid beauty, from her gothic attire to the intense lighting that highlights every intricate detail. The scene combines glamour with dark, mysterious elements, blending fantasy and horror in a visually stunning way.",
        "seed": 123,
        "name": "gothic-vampiress"
    },
    {
        "prompt": "Jewelry design, a ring with bright rose-cut blue diamonds, surrounded by small lily-of-the-valley flower-shaped diamonds, golden stems form the ring of the ring. The center of the base is a beautiful rose gold, with a detachable black ring on both sides.",
        "seed": 123,
        "name": "jewelry-ring-design"
    },
    {
        "prompt": "Sci-fi entity with a mix of organic and mechanical elements: This oil painting-style portrait features a figure with a heavily brush-stroked texture, focusing on the upper body. The entity's gaze is locked, evoking a sense of horror tied to technology. The black and chrome color scheme, inspired by Tsutomu Nihei’s dystopian environments, creates a chaotic, hyper-detailed composition filled with raw, ultra-realistic elements.",
        "seed": 123,
        "name": "sci-fi-entity"
    },
    {
        "prompt": "Two cats dressed as samurais engaging in a duel, inspired by Akira Kurosawa's movie style. The photorealistic artwork is rich in high-detail textures, capturing the intensity and elegance of their battle. The scene is both humorous and masterful, blending realism with fantasy.",
        "seed": 123,
        "name": "samurai-cats-duel"
    },
    {
        "prompt": "A red race car rendered in the style of Sam Spratt, blending historical illustrations with old masters' monochromatic realism. Influences from Genndy Tartakovsky and Masaccio give the car a soggy, gritty texture, evoking a sense of timeless speed and power.",
        "seed": 123,
        "name": "racecar-gritty"
    },
    {
        "prompt": "An Asian girl wearing an elegant top or dress, set against a vibrant, neon-infused night. The style draws from iconic album covers with soft-focus portraits, combining light purple and amber hues. The scene feels both modern and timeless, with chicano-inspired elements adding depth to the image.",
        "seed": 24,
        "name": "asian-girl-neon"
    },
    {
        "prompt": "Close-up of a red rose breaking through a cube of cracked ice. The frosted surface glows with a cinematic light, with blood dripping from the petals, creating a stark contrast. The melting ice enhances the Valentine’s Day theme, with sharp focus and intricate, dramatic details.",
        "seed": 123,
        "name": "rose-in-ice"
    },
    {
        "prompt": "A painting of a beautiful woman in an abstract, non-representational style. The image uses bold colors and shapes to express emotions and feelings. The imaginative composition features stunning details, blending artistic elements into a visually captivating piece.",
        "seed": 123,
        "name": "abstract-woman-painting"
    },
    {
        "prompt": "Minimal home office design with warm sunlight and artificial lighting. The soft atmosphere is enhanced by carefully placed objets, creating a clean, inviting, and warm workspace with a focus on simplicity and functionality.",
        "seed": 8,
        "name": "minimal-home-office"
    },
    {
        "prompt": "Front view of a Mediterranean terrace captured at sunset. Terracotta tiles, wrought iron details, and lush plantings create a warm, inviting atmosphere. Shot with a Panasonic Lumix S1R, 50mm f/1.4 lens, capturing the tranquil ambiance perfectly.",
        "seed": 256,
        "name": "mediterranean-terrace-sunset"
    },
    {
        "prompt": "A steampunk city with towering skyscrapers and intricate clockwork mechanisms. Steam billows from chimneys, and airships navigate skylanes above. The city is alive with gears and pistons, all rendered in 32K UHD with dynamic angles and highly detailed professional photography.",
        "seed": 213,
        "name": "steampunk-city"
    },
    {
        "prompt": "A hummingbird flying near a flower in a forest. The masterpiece captures the dynamic motion of the bird, with natural light casting a soft glow on its feathers. The photorealistic scene is hyper-detailed, from the bird’s delicate wings to the vibrant surroundings, evoking a sense of wonder in wildlife photography.",
        "seed": 9,
        "name": "hummingbird-flight"
    },
    {
        "prompt": "A hyper-detailed resin ring, featuring an intricate sci-fi city inside, illuminated by glowing LEDs. The ring is set against a cyberpunk background with vivid colors and futuristic elements, blending jewelry design with imaginative, sci-fi architecture.",
        "seed": 284,
        "name": "resin-ring-sci-fi"
    },
    {
        "prompt": "A cinematic shot of a cyberpunk industrial city, featuring tall futuristic buildings illuminated by neon lights. The scene is set at sunset, with warm colors and high detail. The high-resolution image captures the striking contrast between modern architecture and the glowing futuristic skyline.",
        "seed": 375,
        "name": "cyberpunk-city-sunset"
    },
    {
        "prompt": "Photorealistic black semi-glossy retail shop facade, captured with a Canon EOS 5D. The geometric outline of the building contrasts with the motion-blurred crowd and car tail lights. Dramatic lighting effects highlight the sleek, award-winning design in a rainy, ultra-realistic scene.",
        "seed": 5,
        "name": "retail-shop-facade"
    },
    {
        "prompt": "An 80s synthwave-inspired purple cat wearing bright orange sunglasses. The scene features a vivid sunset in the background, with neon colors and high-detail design, creating a nostalgic, futuristic vibe with a playful, retro twist.",
        "seed": 39,
        "name": "synthwave-cat"
    },
    {
        "prompt": "3D rendering of a Gundam face from the front, with a tactile paper-work style. The vibrant color palette includes burgundy, yellow, and blue. Emphasis on rough-textured paper, creating a handmade, masterpiece-like effect.",
        "seed": 326,
        "name": "gundam-face-render"
    },
    {
        "prompt": "A poster titled 'A Family Veterinarian,' featuring a watercolor portrait of a West Highland Terrier. The barnboard background adds warmth, with the tagline 'Caring is our duty' below, evoking trust and compassion in this heartwarming design.",
        "seed": 13,
        "name": "family-veterinarian-poster"
    },
    {
        "prompt": "A lion painted in whimsical watercolors, featuring a mix of dark yellow, green, and dark purple tones. The characterful pen and ink style, combined with light orange and navy colors, creates a dynamic, bold animal portrait with a surreal twist.",
        "seed": 283,
        "name": "whimsical-lion"
    },
    {
        "prompt": "IMG_1018.CR2, aerial view of the Tower of London at midday under cloudy skies, soft lighting highlighting the detailed stonework, high contrast with the surrounding modern buildings, ultra-detailed textures, no blur.",
        "seed": 123,
        "name": "tower-of-london-aerial"
    },
    {
        "prompt": "IMG_1018.CR2: aerial view of the Great Wall of China in autumn, vivid colors of orange and red foliage, golden hour sunlight casting long shadows along the wall, ultra-detailed textures of the stone, clear skies, no deformations.",
        "seed": 123,
        "name": "great-wall-autumn"
    },
    {
        "prompt": "Panoramic view of the Eiffel Tower and Paris, illuminated by warm golden lights, dramatic lighting casting shadows, deep blue sky, ultra-detailed architecture.",
        "seed": 456,
        "name": "eiffel-tower-paris"
    },
    {
        "prompt": "A morning shot of the U.S. Capitol Building, with its iconic dome glowing in the soft light of dawn against the clear sky. The symmetry of the building’s classical architecture is highlighted, framed by the grand avenues leading up to it, as the city begins to stir with the first signs of life.",
        "seed": 9837,
        "name": "capitol-morning"
    }
]


# 定义 steps_list
steps_list = [4, 8, 16, 28, 50]
# steps_list = [1, 2, 3, 4, 6, 8, 12, 16, 28, 50]

for item in prompts_and_seeds:
    prompt = item["prompt"]
    seed = item["seed"]
    name = item["name"]
    for num_steps in steps_list:
        img_latents = forward_denoise(
            pipe,
            num_steps=num_steps,
            prompt=prompt,
            height=896,
            width=1152,
            guidance_scale=3.5,
            seed=seed,
            max_shift=1.15
        )
        out = decode_imgs(img_latents, pipe)[0]
        out.save(f"./samples/city/{name}_baseline_{num_steps}steps_seed{seed}.png")

    
# NOTE: 4 / 8-step max-shift=1.95

In [None]:
base_shift = [0.5]
# max_shift = np.arange(1.0, 2.0, 0.1).tolist() 
max_shift = [1.15]

prompts_and_seeds = [
    {
        "prompt": "A high-resolution night panoramic photo of Paris, centered on the illuminated Eiffel Tower glowing against the dark sky. The iconic structure radiates golden light, its intricate latticework clearly visible as it ascends into the night. In the foreground, the Seine River reflects the shimmering lights of the tower, creating a sparkling mirror-like effect on the water. The scene is framed by historic Parisian architecture, softly lit by street lamps, adding a sense of timeless elegance to the atmosphere. The sky above is a deep blue, with faint stars twinkling, while the city buzzes with the energy of nightlife, captured in the movement of cars and pedestrians below. The rich contrast between the warm lights of the Eiffel Tower and the cool tones of the night sky enhances the drama and beauty of the scene, showcasing Paris in all its romantic splendor.",
        "seed": 2337,
        "name": "paris-eiffel-night"
    },
    {
        "prompt": "A high resolution panoramic photo: stretches across New York City, revealing its iconic skyline in remarkable detail. Towering skyscrapers like the Empire State Building and One World Trade Center dominate the scene, rising above the bustling streets below. The Hudson River mirrors the city’s bright lights, creating a shimmering reflection on the water’s surface. The Brooklyn Bridge arches gracefully across the river, linking Manhattan to the outer boroughs. As the sun sets, a warm glow bathes the city, casting the buildings in golden light while the sky gradually darkens, highlighting the contrast between the illuminated structures and the encroaching night. The composition captures the vibrant energy and grandeur of the metropolis in perfect harmony.",
        "seed": 4582,
        "name": "newyork-skyline-sunset"
    },
    {
        "prompt": "A vibrant daytime scene of Hong Kong's skyline, featuring the towering International Commerce Centre standing tall against a bright, clear blue sky. The iconic Victoria Harbour shimmers under the midday sun, with ferries and boats crisscrossing the water. The dynamic contrast between modern skyscrapers and traditional buildings along the harborfront highlights the city’s unique blend of cultures. Soft clouds drift lazily above, casting gentle shadows over the buildings. The scene is full of life with bustling streets and vivid colors. Ultra-detailed, sharp, with perfect lighting that enhances every architectural detail. High-resolution, immersive, and realistic.",
        "seed": 5490,
        "name": "hongkong-daytime"
    },
    {
        "prompt": "A stunning night view of Tokyo, showcasing the iconic Tokyo Tower illuminated in warm golden lights, contrasting with the modern glass skyscrapers in the background. The scene captures the vibrant energy of the bustling city streets below, filled with neon signs and colorful billboards. Reflections from nearby buildings shimmer on the wet pavement after a light rain. A deep blue sky with a hint of clouds frames the skyline. Cinematic lighting emphasizes the city's dynamic architecture, with soft shadows and rich detail in every building. High-resolution, ultra-detailed, vibrant, and immersive.",
        "seed": 5470,
        "name": "tokyo-night"
    },
    {
        "prompt": "A lively midday bird's-eye view of Havana's Malecón during the dry season, with vibrant 1950s classic cars cruising along the waterfront. The bright blue sky stretches above, uninterrupted by clouds, as the sun casts sharp shadows on the colorful, weathered buildings lining the street. The turquoise waters of the Caribbean Sea shimmer under the sunlight, gently crashing against the seawall. The contrast between the vibrant cars, pastel-colored architecture, and the sparkling ocean creates a dynamic, nostalgic atmosphere. Ultra-detailed, high-resolution, with crisp lighting and rich textures highlighting the architecture and vehicles.",
        "seed": 800,
        "name": "havana-malecon"
    },
    {
        "prompt": "An awe-inspiring early morning view of Shanghai's Pudong skyline from The Bund, with the iconic Oriental Pearl Tower and sleek skyscrapers rising through the soft morning mist. The hazy sunlight gently filters through, casting a warm glow on the futuristic buildings while the mist adds a sense of mystery and depth. The Huangpu River reflects the emerging skyline, with faint ripples on its surface. The contrast between the modern architecture and the tranquil atmosphere creates a serene yet powerful scene. Ultra-detailed, high-resolution, with soft lighting and rich textures highlighting the misty landscape and towering structures.",
        "seed": 1252,
        "name": "shanghai-pudong-morning"
    },
    {
        "prompt": "A breathtaking view of the Golden Gate Bridge at sunset, with its iconic red-orange towers rising above a thick layer of fog. The golden and pink hues of the setting sun paint the sky in vibrant colors, casting a soft, warm glow on the bridge’s steel structure. The misty clouds swirl gently around the base of the bridge, partially obscuring the bay below, while the upper portion stands clear against the colorful sky. The fading sunlight creates long shadows and a serene, dreamlike atmosphere. Ultra-detailed, cinematic composition, with rich textures and dramatic lighting enhancing the scene.",
        "seed": 9415,
        "name": "goldengate-sunset"
    },
    {
        "prompt": "A breathtaking twilight view of Tokyo with Mount Fuji standing majestically in the background, its snow-capped peak glowing under the fading sunlight. The vibrant city lights of Tokyo illuminate the bustling metropolis below, with skyscrapers and winding roads creating a dynamic, interconnected scene. The sky transitions from a deep orange near the horizon to a rich blue as night falls, blending seamlessly with the misty silhouette of the distant mountains. The contrast between the natural beauty of Mount Fuji and the vibrant energy of the cityscape creates a stunning, cinematic composition. Ultra-detailed, high-resolution, with soft yet dramatic lighting.",
        "seed": 3257,
        "name": "tokyo-fuji-twilight"
    },
    {
        "prompt": "A breathtaking panoramic view unfolds over Rio de Janeiro, capturing the city's dynamic urban landscape in stunning detail. Christ the Redeemer stands majestically atop Corcovado Mountain, a sentinel overlooking the sprawling city below. Sugarloaf Mountain, with its iconic peak, rises prominently in the distance, while the coastline gently curves around Guanabara Bay. The sunset bathes the scene in a golden hue, casting a warm glow over the entire city. The bustling streets pulse with life, contrasting beautifully with the calm, reflective waters of the bay. Architectural elements are meticulously rendered, from modern high-rises to historic buildings, all contributing to the vibrant and lively atmosphere that defines Rio. The composition balances natural and man-made beauty, showcasing the grandeur of this world-famous city.",
        "seed": 123945,
        "name": "rio-sunset-panorama"
    }
]

steps_list = [4, 8, 16]

for num_steps in steps_list:
	for base in base_shift:
		for max in max_shift:
			img_latents = forward_denoise(pipe, num_steps=num_steps, prompt=prompt, 
										height=896, width=1152, guidance_scale=3.5, seed=123945, 
										base_shift=base, max_shift=max)
			out = decode_imgs(img_latents, pipe)[0]
			out.save(f"./samples/hyper/rio_{num_steps}step_{base:.2f}_{max:.2f}.png")

In [None]:
# prompt = "A AQUACOLTOK watercolor painting of a small, cozy café with outdoor seating, surrounded by flowering plants. The scene captures the charm of a quiet morning with the sun shining on the cobblestone streets. High quality, detailed architecture."

# prompt = "A AQUACOLTOK watercolor painting of UT campus"

# prompt = "A photo of UT campus"


# prompt = "A photo of sks Corgi puppy wearing sunglasses, smiling at the camera, close-up look, highly detailed, ultra-realistic." # 33

# prompt = "A sks Corgi puppy wearing a superhero costume, flying through the city, dynamic pose, ultra-realistic, high resolution." # 4459

# prompt = "Photograph of a sks Corgi puppy wearing a golden crown, sitting majestically on a royal throne in an opulent palace hall, surrounded by luxurious decorations, best quality, high resolution" # 6964

prompt = "Photograph of a sks corgi puppy running through snow in a winter wonderland, with snowflakes gently falling, best quality, high resolution", # 6489

# steps_list = [4, 6, 8, 12, 16, 28, 50]

prompts_and_seeds = [
    {
        "prompt": "A cat holding a sign 'hello world'.",
        "seed": 123,
        "name": "cat-hello-world"
    },
    {
        "prompt": "In the middle of a bustling nighttime city, a large electronic billboard displays the words 'WELCOME TO THE FUTURE' in bold, glowing letters. The city is alive with moving cars, pedestrians, and neon lights reflecting off the wet streets, while the glowing text dominates the skyline.",
        "seed": 123,
        "name": "future-billboard-city"
    },
    {
        "prompt": "On a peaceful, sunlit beach, the words 'Life's a Journey' are written in the wet sand near the shore. Small waves are gently washing up, partially erasing some of the letters, while seagulls fly overhead and the sun sets in the distance.",
        "seed": 123,
        "name": "beach-lifes-journey"
    },
    {
        "prompt": "High on the side of a rugged mountain, the words 'Reach for the Stars' are carved into the rocky cliff face. Snow-capped peaks can be seen in the background, with mist swirling around the base of the cliff. The carved text is weathered and looks ancient, as if etched over centuries.",
        "seed": 123,
        "name": "mountain-reach-stars"
    },
    {
        "prompt": "In a dense, fog-filled forest at twilight, the words 'Lost in Time' appear to glow softly in the mist, floating among the tall trees. The ethereal light from the text casts subtle shadows on the ground, with the fog swirling around the glowing letters and distant forest shapes barely visible.",
        "seed": 123,
        "name": "forest-lost-time"
    },
    {
        "prompt": "Beneath the ocean's surface, the words 'Deep Blue' are formed out of vibrant coral. Fish swim around the coral letters, and shafts of sunlight penetrate the clear blue water, illuminating the colorful sea life and the delicate texture of the coral.",
        "seed": 123,
        "name": "deep-blue-coral"
    },
    {
        "prompt": "In the vast, golden dunes of a desert, the words 'Endless Horizons' are shaped by the shifting sands. Long shadows are cast by the setting sun, emphasizing the curves of the letters. The wind gently blows sand across the words, partially covering some parts as the desert stretches out endlessly.",
        "seed": 999,
        "name": "desert-endless-horizons"
    },
    {
        "prompt": "In the middle of a frozen arctic landscape, the words 'Frozen in Time' are engraved into a massive iceberg. The icy letters glisten in the soft light of the polar sun, while chunks of ice float in the frigid water, and snowflakes gently fall from the sky.",
        "seed": 999,
        "name": "arctic-frozen-time"
    },
    {
        "prompt": "In a gritty urban alley, the words 'Urban Jungle' are spray-painted in bold, graffiti-style letters on a brick wall. The surrounding area is filled with street art, faded posters, and signs of urban decay, with the graffiti text standing out in bright, contrasting colors.",
        "seed": 999,
        "name": "urban-jungle-graffiti"
    },
    {
        "prompt": "On the ground of a quiet forest in autumn, the words 'Falling Leaves' are arranged using colorful fallen leaves. The orange, red, and yellow leaves form the letters, blending into the vibrant forest floor, while trees with golden leaves tower overhead, and a gentle breeze rustles through the scene.",
        "seed": 0,
        "name": "forest-falling-leaves"
    },
    {
        "prompt": "In a vast, clear night sky filled with stars, the words 'Dream Beyond' are written with glowing stardust, as if they were formed from the stars themselves. The Milky Way is visible in the background, and the glowing text seems to blend with the constellations, casting a faint, magical light on the landscape below.",
        "seed": 0,
        "name": "stardust-dream-beyond"
    }
]

steps_list = [4, 6, 8, 12, 16, 28, 50]

for i, num_steps in enumerate(steps_list):
    img_latents = forward_denoise(pipe, num_steps=num_steps, prompt=prompt, 
                                  height=1024, width=1024, guidance_scale=3.5, seed=6489, max_shift=1.15)
    # img_latents = stochasitc_forward_denoise(pipe, num_steps=num_steps, prompt=prompt, 
    #                               height=1024, width=1024, guidance_scale=3.5, seed=123) 
    out = decode_imgs(img_latents, pipe)[0]
    # out.save(f"./aqua_imgs/lion_{num_steps}_steps.png")
    out.save(f"/root/dreambooth_flux/samples/sks_dog_1019/snow_baseline_{num_steps}steps.png")

In [35]:
pipe.unload_lora_weights()

pipe.load_lora_weights("/root/autodl-tmp/data/sks_dog_baseline.safetensors", adapter_name="reflow")
# pipe.set_adapters(["reflow"], adapter_weights=[1.0])
# pipe.load_lora_weights("/root/autodl-tmp/data/Hyper-FLUX.1-dev-8steps-lora.safetensors", adapter_name="reflow")

In [None]:
pipe.unload_lora_weights()

# pipe.load_lora_weights("/root/autodl-tmp/data/flux_dev_anime.safetensors", adapter_name="style")
# pipe.load_lora_weights("/root/autodl-tmp/data/2rf-skewed_t-accelerator.safetensors", adapter_name="acclerator")

# # pipe.set_adapters(["style"], adapter_weights=[0.9])
# pipe.set_adapters(["style", "acclerator"], adapter_weights=[1., 1.])

# pipe.fuse_lora(adapter_names=["accelerate", "water"], lora_scale=1.0)

In [None]:
torch.load("/root/autodl-tmp/data/1rf_sights/prompt/prompt_00044.pt")["prompt"]

In [None]:
# prompt = "panoramic view of the Eiffel Tower and Paris, illuminated by warm golden lights, dramatic lighting casting shadows, deep blue sky, ultra-detailed architecture"

# prompt = "aerial view of the Great Wall of China in autumn, vivid colors of orange and red foliage, golden hour sunlight casting long shadows along the wall, ultra-detailed textures of the stone, clear skies, no deformations"

prompt = torch.load("/root/autodl-tmp/data/1rf_sights/prompt/prompt_00044.pt")["prompt"]

# 44, 71, 91, 128

steps_list = [1, 2, 3, 4, 6, 8, 12, 16, 28, 50]

for i, num_steps in enumerate(steps_list):
    img_latents = forward_denoise(pipe, num_steps=num_steps, prompt=prompt, 
                                  height=1024, width=1024, guidance_scale=3.5, seed=123)
    out = decode_imgs(img_latents, pipe)[0]
    out.save(f"./samples/general_infer_t/tower_{num_steps}_steps.png")