In [None]:
!pip install xfuser

In [None]:
!pip install huggingface_hub


In [None]:
from huggingface_hub import login

login(token="")


In [None]:
import torch
import os
from diffusers import StableDiffusion3Pipeline
from IPython.display import display
from xfuser import xDiTParallel
from xfuser import xFuserArgs
from xfuser.config import FlexibleArgumentParser
import sys
os.environ['RANK'] = '0'
os.environ['WORLD_SIZE'] = '1'
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '29500'
os.environ['LOCAL_RANK'] = '0'


def run_sd3_with_xdit(
    prompt="a serene mountain landscape at sunset, detailed, photorealistic",
    height=1024,
    width=1024,
    steps=20,
    cfg=7.0,
    seed=42
):


    model_id = "stabilityai/stable-diffusion-3-medium-diffusers"

    print(f"üì¶ Loading SD3 Medium with FP8 T5 encoder...")

    parser = FlexibleArgumentParser(description="xFuser Arguments")
    args_list = xFuserArgs.add_cli_args(parser)

    sys.argv = [
        'jupyter',
        '--model', model_id,
        '--height', str(height),
        '--width', str(width),
        '--num_inference_steps', str(steps),
        '--prompt', prompt,
        '--seed', str(seed),
        '--data_parallel_degree', '1',
        '--ulysses_degree', '1',
        '--ring_degree', '1',
        '--pipefusion_parallel_degree', '1',
        '--tensor_parallel_degree', '1',
    ]

    args = args_list.parse_args()
    engine_args = xFuserArgs.from_cli_args(args)
    engine_config, input_config = engine_args.create_config()

    pipe = StableDiffusion3Pipeline.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        text_encoder_3=None,
    )

    pipe.enable_model_cpu_offload()

    pipe = pipe.to("cuda:0")

    print("‚öôÔ∏è  Wrapping with xDiT engine...")
    pipe = xDiTParallel(pipe, engine_config, input_config)

    print(f"üé® Generating: '{prompt}'")
    print(f"   Resolution: {width}x{height} | Steps: {steps} | CFG: {cfg}")

    with torch.inference_mode():
        output = pipe(
            height=input_config.height,
            width=input_config.width,
            prompt=input_config.prompt,
            num_inference_steps=input_config.num_inference_steps,
            output_type=input_config.output_type,
            guidance_scale=cfg,
            generator=torch.Generator(device="cuda").manual_seed(input_config.seed),
        )

    torch.cuda.empty_cache()
    return output.images[0]

image = run_sd3_with_xdit(
    prompt="a serene mountain landscape at sunset, detailed, photorealistic",
    height=1024,
    width=1024,
    steps=20,
    cfg=7.0,
    seed=42
)

display(image)
image.save("output.png")
print("‚úÖ Image saved to output.png")

if torch.cuda.is_available():
    print(f"üìä VRAM Used: {torch.cuda.max_memory_allocated(0) / 1024**3:.2f} GB")