# Lightning AI SDXL LoRA Trainer (kohya)

Este cuaderno prepara un entorno dentro de Lightning AI (ruta raíz ``/teamspace/studios/this_studio``)
y utiliza la integración reciente con [kohya-ss/sd-scripts](https://github.com/kohya-ss/sd-scripts).
Los pasos principales son:

1. Instalar/usar un entorno gestionado por [uv](https://github.com/astral-sh/uv).
2. Detectar el repositorio backend y exponer el módulo `sd_scripts` con las utilidades de `utils.kohya`.
3. Instalar las dependencias necesarias (backend, kohya y extras de este fork).
4. Configurar las rutas de trabajo del LoRA y definir los parámetros básicos del entrenamiento.
5. Generar los ficheros `config.toml` y `dataset.toml` con `utils.process`.
6. Construir el comando de entrenamiento con `build_training_command` y, opcionalmente, lanzarlo desde el cuaderno.

Completa los valores marcados como ``<...>`` antes de ejecutar el entrenamiento real.


In [None]:
from pathlib import Path
import os
import subprocess
import sys

LIGHTNING_ROOT = Path("/teamspace/studios/this_studio")
UV_BIN = Path.home() / ".local" / "bin" / "uv"

if not UV_BIN.exists():
    print("Instalando uv…")
    subprocess.run(
        ["/bin/bash", "-lc", "curl -LsSf https://astral.sh/uv/install.sh | sh"],
        check=True,
    )
else:
    print("uv ya está instalado")

os.environ["PATH"] = f"{UV_BIN.parent}:{os.environ['PATH']}"
os.environ.setdefault("UV_PROJECT_ENVIRONMENT", str(LIGHTNING_ROOT / ".venv"))
print("Entorno de uv:", os.environ["UV_PROJECT_ENVIRONMENT"])

project_root = Path.cwd().resolve()
if not (project_root / "utils").exists():
    candidate = project_root / "LoRA_Easy_Training_scripts_Backend"
    if candidate.exists():
        project_root = candidate.resolve()
if not (project_root / "utils").exists():
    raise FileNotFoundError("No se encontró la carpeta 'utils' junto al notebook.")

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
print("Repositorio detectado en:", project_root)

from utils.kohya import ensure_on_path, get_repo_root

ensure_on_path()
kohya_root = get_repo_root()
print("Repositorio kohya:", kohya_root)

runtime_store = project_root / "runtime_store"
runtime_store.mkdir(parents=True, exist_ok=True)
print("Carpeta runtime_store lista en:", runtime_store)


In [None]:
    import subprocess

    commands: list[list[str]] = []

    base_requirements = project_root / "requirements.txt"
    if base_requirements.exists():
        commands.append(["uv", "pip", "install", "--upgrade", "-r", str(base_requirements)])

    for local_pkg in ("custom_scheduler", "lycoris"):
        pkg_path = project_root / local_pkg
        if pkg_path.exists():
            commands.append(["uv", "pip", "install", "--upgrade", str(pkg_path)])

    kohya_requirements = kohya_root / "requirements.txt"
    if kohya_requirements.exists():
        commands.append(["uv", "pip", "install", "--upgrade", "-r", str(kohya_requirements)])
    else:
        print("No se encontró requirements.txt dentro de sd_scripts; se omite la instalación específica.")

    for command in commands:
        print("Ejecutando:", " ".join(command))
        subprocess.run(command, check=True)

    accelerate_config = LIGHTNING_ROOT / ".cache" / "huggingface" / "accelerate" / "default_config.yaml"
    if not accelerate_config.exists():
        accelerate_config.parent.mkdir(parents=True, exist_ok=True)
        accelerate_config.write_text(
            "command_file: null
"
            "commands: null
"
            "compute_environment: LOCAL_MACHINE
"
            "deepspeed_config: {}
"
            "distributed_type: 'NO'
"
            "downcase_fp16: 'NO'
"
            "dynamo_backend: 'NO'
"
            "fsdp_config: {}
"
            "gpu_ids: '0'
"
            "machine_rank: 0
"
            "main_process_ip: null
"
            "main_process_port: null
"
            "main_training_function: main
"
            "megatron_lm_config: {}
"
            "mixed_precision: bf16
"
            "num_machines: 1
"
            "num_processes: 1
"
            "rdzv_backend: static
"
            "same_network: true
"
            "tpu_name: null
"
            "tpu_zone: null
"
            "use_cpu: false"
        )
        print("Archivo de configuración de accelerate creado en:", accelerate_config)
    else:
        print("Configuración de accelerate existente en:", accelerate_config)


In [None]:
from pathlib import Path

from utils.process import process_args, process_dataset_args
from lightning_ai.sdxl_lora_trainer import (
    BASE_MODEL_NAMES,
    BASE_MODEL_PRESETS,
    MODEL_CACHE_ROOT,
    VAE_MODEL_NAMES,
    VAE_PRESETS,
    ensure_model_assets,
    ensure_model_file,
)

lora_name = "mi_lora_kohya"
workspace_root = (LIGHTNING_ROOT / "Loras" / lora_name).resolve()
dataset_workspace = workspace_root / "dataset"
output_workspace = workspace_root / "output"
base_model_dir = (MODEL_CACHE_ROOT / "base").resolve()
vae_dir = (MODEL_CACHE_ROOT / "vae").resolve()
for path in (workspace_root, dataset_workspace, output_workspace, base_model_dir, vae_dir):
    path.mkdir(parents=True, exist_ok=True)

print("Directorio de trabajo del LoRA:", workspace_root)
print("Carpeta del dataset:", dataset_workspace)
print("Carpeta de salida:", output_workspace)
print("Carpeta local para modelos base:", base_model_dir)
print("Carpeta local para VAEs:", vae_dir)

load_diffusers = True
training_model = "Pony Diffusion V6 XL"
vae_choice = "Stability AI SDXL VAE"
custom_base_model_path = ""
custom_vae_path = ""

if training_model and training_model not in BASE_MODEL_NAMES:
    raise ValueError(
        f"Modelo base desconocido '{training_model}'. Opciones válidas: {', '.join(BASE_MODEL_NAMES)}"
    )
if vae_choice and vae_choice not in VAE_MODEL_NAMES:
    raise ValueError(
        f"VAE desconocido '{vae_choice}'. Opciones válidas: {', '.join(VAE_MODEL_NAMES)}"
    )

if custom_base_model_path:
    base_target = Path(custom_base_model_path).expanduser().resolve()
    print("Usando modelo base personalizado en:", base_target)
else:
    preset = BASE_MODEL_PRESETS[training_model]
    if load_diffusers:
        base_target = ensure_model_assets(preset.diffusers_id, "base", training_model)
    else:
        base_target = ensure_model_file(
            preset.single_file_url,
            "base",
            training_model,
            preset.single_file_name,
        )
    print("Modelo base listo en:", base_target)

if custom_vae_path:
    vae_target = Path(custom_vae_path).expanduser().resolve()
    print("Usando VAE personalizado en:", vae_target)
elif vae_choice:
    vae_preset = VAE_PRESETS[vae_choice]
    if load_diffusers:
        vae_target = ensure_model_assets(vae_preset.diffusers_id, "vae", vae_choice)
    else:
        vae_target = ensure_model_file(
            vae_preset.single_file_url,
            "vae",
            vae_choice,
            vae_preset.single_file_name,
        )
    print("VAE listo en:", vae_target)
else:
    vae_target = None
    print("No se utilizará VAE adicional.")

base_v_prediction = False
if not custom_base_model_path and training_model:
    base_v_prediction = BASE_MODEL_PRESETS[training_model].default_vpred

print("Predicción V activada:", base_v_prediction)


def _stringify(target):
    if target is None:
        return None
    return target if isinstance(target, str) else target.as_posix()

config_args = {
    "pretrained_model_name_or_path": _stringify(base_target),
    "vae": _stringify(vae_target),
    "output_dir": output_workspace.as_posix(),
    "logging_dir": (output_workspace / "logs").as_posix(),
    "network_module": "networks.lora",
    "network_dim": 32,
    "network_alpha": 32,
    "train_batch_size": 1,
    "gradient_accumulation_steps": 1,
    "max_train_epochs": 1,
    "learning_rate": 1e-4,
    "unet_lr": 1e-4,
    "text_encoder_lr": 5e-6,
    "optimizer_type": "AdamW8bit",
    "save_every_n_steps": 200,
    "mixed_precision": "bf16",
    "cache_latents": True,
    "clip_skip": 2,
    "seed": 42,
    "load_diffusers_format": load_diffusers,
    "v_prediction": base_v_prediction,
}

dataset_args = {
    "general": {
        "resolution": 1024,
        "shuffle_caption": True,
        "caption_extension": ".txt",
        "keep_tokens": 1,
    },
    "subsets": [
        {
            "image_dir": dataset_workspace.as_posix(),
            "caption_dropout_rate": 0.0,
            "caption_dropout_every_n_epochs": 0,
            "num_repeats": 1,
        }
    ],
}

config_lines, config_path = process_args(config_args)
dataset_lines, dataset_path = process_dataset_args(dataset_args)

print("config.toml generado en:", config_path)
print("dataset.toml generado en:", dataset_path)
print("Entradas de config.toml:")
for line in config_lines:
    print("  ", line)
print("Secciones de dataset.toml:")
for line in dataset_lines.get("general", []):
    print("  ", line)
for index, subset in enumerate(dataset_lines.get("subsets", []), start=1):
    print(f"  [[datasets.subsets]] #{index}")
    for value in subset:
        print("    ", value)




In [None]:
import shlex

from utils.kohya import (
    build_training_command,
    get_training_script,
    inherit_environment,
    normalise_extra_args,
)

train_mode = "lora"
is_sdxl = True
is_flux = False
extra_cli_arguments = "--min_snr_gamma=5.0"
use_accelerate = True

script_path = get_training_script(train_mode, is_sdxl, is_flux)
command = build_training_command(
    sys.executable,
    script_path,
    config_path.resolve(),
    dataset_path.resolve(),
    normalise_extra_args(extra_cli_arguments),
    use_accelerate,
)

print("Script seleccionado:", script_path.name)
print("Comando completo:")
print(shlex.join(command))

run_training = False
if run_training:
    completed = subprocess.run(
        command,
        cwd=str(script_path.parent),
        env=inherit_environment(),
        check=True,
    )
    print("Proceso finalizado con código:", completed.returncode)
else:
    print("Define `run_training = True` para lanzar el entrenamiento una vez revisados los parámetros.")
