# SDXL txt2img and img2img Inference

This notebook demonstrates text-to-image (txt2img) and image-to-image (img2img) inference using the project's SDXL engine (`ml.src.diffusion.engine.SDXLEngine`).

Requirements:
- GPU with VRAM 8GB+ recommended
- The repository checked out locally and this notebook opened within it

Outputs are saved under `ml/notebooks/ml/outputs/<timestamp>`.


In [None]:
import sys, os
from pathlib import Path
from datetime import datetime

# Locate repository root (contains 'ml/src')
repo_root = Path.cwd()
if not (repo_root / 'ml' / 'src').exists():
    cand = repo_root
    for _ in range(6):
        if (cand / 'ml' / 'src').exists():
            repo_root = cand
            break
        cand = cand.parent
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))
print('Repo root:', repo_root)

# Ensure outputs directory exists
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
out_dir = repo_root / 'ml' / 'notebooks' / 'ml' / 'outputs' / f'notebook-{timestamp}'
out_dir.mkdir(parents=True, exist_ok=True)
print('Output dir:', out_dir)


In [None]:
# Optional: install dependencies from requirements (skip if already installed)
import subprocess
req = repo_root / 'ml' / 'requirements.txt'
if req.exists():
    print('Installing dependencies from', req)
    try:
        subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', '-r', str(req)], check=False)
    except Exception as e:
        print('pip install failed or skipped:', e)
else:
    print('requirements.txt not found at', req)


In [None]:
import torch
from PIL import Image
from ml.src.diffusion.engine import SDXLEngine

print('PyTorch:', torch.__version__, '| CUDA available:', torch.cuda.is_available())


## Configuration
- Adjust the prompt and parameters below.
- Width/height should typically be multiples of 8 (SDXL commonly uses 1024x1024).
- Seed controls reproducibility.


In [None]:
BASE_ID = 'stabilityai/stable-diffusion-xl-base-1.0'
WIDTH = 1024
HEIGHT = 1024
STEPS = 30
GUIDANCE = 7.5
SEED = 12345
NEGATIVE_PROMPT = None  # e.g., 'blurry, low quality, artifacts'

PROMPT = 'Depict Aldar Kose • Middle-aged Kazakh trickster • traditional chapan robe • kalpak hat • gentle, knowing smile • rooted in Kazakh green steppe heritage, folk textiles, and oral storytelling traditions, cinematic, highly detailed, 8k'
IMG2IMG_STRENGTH = 0.99  # 0.0 = copy init, 1.0 = ignore init


## Initialize Engine
This loads the SDXL txt2img and img2img pipelines and applies memory optimizations automatically (VAE tiling/slicing, xFormers if available).


In [None]:
engine = SDXLEngine({
    'base_id': BASE_ID,
    'width': WIDTH,
    'height': HEIGHT,
    'steps': STEPS,
    'guidance': GUIDANCE,
    'negative_prompt': NEGATIVE_PROMPT,
})

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Engine ready on', device, '| dtype:', engine.get_dtype())


## Helpers
- Display images inline
- Save images to the timestamped output directory
- Concatenate images side-by-side for quick comparison


In [None]:
from IPython.display import display

def show(img: Image.Image, title: str | None = None):
    display(img)

def save(img: Image.Image, name: str) -> Path:
    path = out_dir / name
    img.save(path)
    return path

def side_by_side(left: Image.Image, right: Image.Image) -> Image.Image:
    w = left.width + right.width
    h = max(left.height, right.height)
    canvas = Image.new('RGB', (w, h))
    canvas.paste(left, (0, 0))
    canvas.paste(right, (left.width, 0))
    return canvas


In [None]:
txt2img = engine.generate(
    prompt=PROMPT,
    seed=SEED,
    width=WIDTH,
    height=HEIGHT,
    steps=STEPS,
    guidance=GUIDANCE
)

## txt2img
Generate an image from the text prompt.


In [None]:
images = []
prev = None

for i in range(8):
    seed_i = 42 + i
    img = engine.generate(
        prompt=PROMPT,
        seed=seed_i,
        width=WIDTH,
        height=HEIGHT,
        steps=STEPS,
        guidance=GUIDANCE,
    )
    images.append(img)
    prev = img
    save(img, f"frame_{i:02d}.png");

In [None]:
def make_contact_sheet(images, cols=4, max_thumb=256):
    if len(images) == 0:
        return None
    thumbs = []
    for img in images:
        ratio = min(max_thumb / img.width, max_thumb / img.height, 1.0)
        w = int(img.width * ratio)
        h = int(img.height * ratio)
        thumbs.append(img.copy().resize((w, h)))
    rows = (len(thumbs) + cols - 1) // cols
    cell_w = max(t.width for t in thumbs)
    cell_h = max(t.height for t in thumbs)
    sheet = Image.new("RGB", (cell_w * cols, cell_h * rows), color=(0, 0, 0))
    for idx, t in enumerate(thumbs):
        r = idx // cols
        c = idx % cols
        sheet.paste(t, (c * cell_w, r * cell_h))
    return sheet

sheet = make_contact_sheet(images, cols=4, max_thumb=256)
if sheet is not None:
    show(sheet, "Contact sheet of frames")
    save(sheet, "frames_contact_sheet.png")


## img2img (using previous result as init)
Use the txt2img output as the init image and adjust `IMG2IMG_STRENGTH` to control identity/style adherence.


In [None]:
img2img_from_prev = engine.generate(
    prompt=PROMPT,
    seed=SEED,
    img2img_start=txt2img,
    strength=IMG2IMG_STRENGTH,
    width=WIDTH,
    height=HEIGHT,
    steps=STEPS,
    guidance=GUIDANCE,
)
cmp = side_by_side(txt2img, img2img_from_prev)
show(cmp, 'Left: txt2img | Right: img2img (from previous)')
save(img2img_from_prev, 'img2img_from_previous.png')
save(cmp, 'comparison.png')


## img2img (custom init image)
Optionally set `init_path` to an external image to perform img2img against a specific reference (leave as `None` to reuse the txt2img result).


In [None]:
init_path = None  # e.g., repo_root / 'ml' / 'assets' / 'identity' / 'aldar.jpg'
if init_path:
    init_path = Path(init_path)
    init_img = Image.open(init_path).convert('RGB')
    print('Loaded init image:', init_path)
else:
    init_img = txt2img
    print('Using txt2img as init image')

img2img_custom = engine.generate(
    prompt=PROMPT,
    seed=SEED,
    img2img_start=init_img,
    strength=IMG2IMG_STRENGTH,
    width=WIDTH,
    height=HEIGHT,
    steps=STEPS,
    guidance=GUIDANCE,
)
show(img2img_custom, 'img2img (custom init)')
save(img2img_custom, 'img2img_custom.png')


## Tips and Notes
- `IMG2IMG_STRENGTH` controls adherence to the init image: lower values preserve more identity/composition.
- Use a `NEGATIVE_PROMPT` to steer away from artifacts (e.g., "blurry, low quality, deformed").
- If you encounter CUDA OOM, try smaller resolution (e.g., 768x768), reduce steps, or ensure xFormers is installed.
- Randomize or vary `SEED` for different outputs.


## Multi-frame sequence (txt2img + iterative img2img)

This section generates a longer sequence of frames to demonstrate consistency over time.
- Frame 0 is generated with txt2img.
- Frames 1..N use img2img with the previous frame as the init image.
- Set `FRAME_SEED_MODE` to `fixed` to keep the seed the same across frames, or `increment` to vary it per frame.


In [None]:
N_FRAMES = 8
FRAME_SEED_MODE = "increment"  # "fixed" or "increment"
SEED_BASE = SEED
STRENGTH = IMG2IMG_STRENGTH  # alias for per-sequence tweaking


In [None]:
images = []
prev = None

for i in range(N_FRAMES):
    seed_i = SEED_BASE if FRAME_SEED_MODE == "fixed" else (SEED_BASE + i)
    if i == 0:
        img = engine.generate(
            prompt=PROMPT,
            seed=seed_i,
            width=WIDTH,
            height=HEIGHT,
            steps=STEPS,
            guidance=GUIDANCE,
        )
    else:
        img = engine.generate(
            prompt=PROMPT,
            seed=seed_i,
            img2img_start=prev,
            strength=STRENGTH,
            width=WIDTH,
            height=HEIGHT,
            steps=STEPS,
            guidance=GUIDANCE,
        )
    images.append(img)
    prev = img
    save(img, f"frame_{i:02d}.png")

In [None]:
def make_contact_sheet(images, cols=4, max_thumb=256):
    if len(images) == 0:
        return None
    thumbs = []
    for img in images:
        ratio = min(max_thumb / img.width, max_thumb / img.height, 1.0)
        w = int(img.width * ratio)
        h = int(img.height * ratio)
        thumbs.append(img.copy().resize((w, h)))
    rows = (len(thumbs) + cols - 1) // cols
    cell_w = max(t.width for t in thumbs)
    cell_h = max(t.height for t in thumbs)
    sheet = Image.new("RGB", (cell_w * cols, cell_h * rows), color=(0, 0, 0))
    for idx, t in enumerate(thumbs):
        r = idx // cols
        c = idx % cols
        sheet.paste(t, (c * cell_w, r * cell_h))
    return sheet

sheet = make_contact_sheet(images, cols=4, max_thumb=256)
if sheet is not None:
    show(sheet, "Contact sheet of frames")
    save(sheet, "frames_contact_sheet.png")
