# MUSUBI TUNER

# Install Musubi Tuner

In [None]:
import os
import shutil

# root_dir
root_dir          = "/workspace"
drive_dir         = os.path.join(root_dir, "drive", "MyDrive")
repo_dir          = os.path.join(root_dir, "musubi-tuner")
training_dir      = os.path.join(root_dir, "fine_tune")
pretrained_model  = os.path.join(root_dir, "pretrained_model")
vae_dir           = os.path.join(root_dir, "vae")
lora_dir          = os.path.join(root_dir, "network_weight")
config_dir        = os.path.join(training_dir, "config")
output_dir        = os.path.join(training_dir, "outputs")
tools_dir         = os.path.join(repo_dir, "tools")
finetune_dir      = os.path.join(repo_dir, "finetune")
accelerate_config = os.path.join(repo_dir, "accelerate_config", "config.yaml")

repo_url          = "https://github.com/BelieveDiffusion/musubi-tuner"
branch            = "dev_20251223" 

def clone_repo(url, dir, branch):
    if not os.path.exists(dir):
       !git clone -b {branch} {url} {dir}

def install_dependencies():
    !apt update -yqq
    !apt install aria2 -yqq
    !pip install -e .
    !pip install wandb

    from accelerate.utils import write_basic_config

    if not os.path.exists(accelerate_config):
        write_basic_config(save_location=accelerate_config)

def prepare_environment():
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
    os.environ["SAFETENSORS_FAST_GPU"] = "1"
    os.environ["PYTHONWARNINGS"] = "ignore"

def main():
    os.chdir(root_dir)
    clone_repo(repo_url, repo_dir, branch)
    os.chdir(repo_dir)
    for dir in [training_dir, config_dir, pretrained_model, vae_dir, output_dir]:
        os.makedirs(dir, exist_ok=True)
    install_dependencies()
    prepare_environment()

main()

# Download Qwen

In [6]:
import os
import re
import requests
import subprocess
from urllib.parse import urlparse, unquote
from pathlib import Path

os.chdir(root_dir)

HUGGINGFACE_TOKEN = ""
MODEL_URL    = "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_bf16.safetensors"
#MODEL_URL    = "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_edit_2509_bf16.safetensors"
VAE_URL      = "https://huggingface.co/Qwen/Qwen-Image/resolve/main/vae/diffusion_pytorch_model.safetensors"
TE_URL     = "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/text_encoders/qwen_2.5_vl_7b.safetensors"

def get_supported_extensions():
    return tuple([".ckpt", ".safetensors", ".pt", ".pth"])

def get_filename(url, bearer_token, quiet=True):
    headers = {"Authorization": f"Bearer {bearer_token}"}
    response = requests.get(url, headers=headers, stream=True)
    response.raise_for_status()

    if 'content-disposition' in response.headers:
        content_disposition = response.headers['content-disposition']
        filename = re.findall('filename="?([^"]+)"?', content_disposition)[0]
    else:
        url_path = urlparse(url).path
        filename = unquote(os.path.basename(url_path))

    return filename

def parse_args(config):
    args = []

    for k, v in config.items():
        if k.startswith("_"):
            args.append(f"{v}")
        elif isinstance(v, str) and v is not None:
            args.append(f'--{k}={v}')
        elif isinstance(v, bool) and v:
            args.append(f"--{k}")
        elif isinstance(v, float) and not isinstance(v, bool):
            args.append(f"--{k}={v}")
        elif isinstance(v, int) and not isinstance(v, bool):
            args.append(f"--{k}={v}")

    return args

def aria2_download(dir, filename, url, token):
    user_header = f"Authorization: Bearer {token}"

    aria2_config = {
        "console-log-level"         : "error",
        "summary-interval"          : 10,
        "header"                    : user_header if "huggingface.co" in url else None,
        "continue"                  : True,
        "max-connection-per-server" : 16,
        "min-split-size"            : "1M",
        "split"                     : 16,
        "dir"                       : dir,
        "out"                       : filename,
        "_url"                      : url,
    }
    aria2_args = parse_args(aria2_config)
    subprocess.run(["aria2c", *aria2_args])

def download(url, dst, token):

    if url.startswith("/workspace"):
        return url

    filename = get_filename(url, token, quiet=False)
    filepath = os.path.join(dst, filename)

    if "huggingface.co" in url:
        if "/blob/" in url:
            url = url.replace("/blob/", "/resolve/")
                
        aria2_download(dst, filename, url, token)
    else:
        aria2_download(dst, filename, url, token)

    return filepath

def main():
    global model_path, vae_path, te_path

    model_path = vae_path = te_path = None

    download_targets = {
        "model" : (MODEL_URL, pretrained_model),
        "vae"   : (VAE_URL, vae_dir),
        "te"  : (TE_URL, pretrained_model),
    }
    selected_files = {}

    for target, (url, dst) in download_targets.items():
        if url:
            downloader = download(url, dst, HUGGINGFACE_TOKEN)
            selected_files[target] = downloader

            if target == "model":
                model_path = selected_files["model"] if not downloader else downloader
            elif target == "vae":
                vae_path = selected_files["vae"] if not downloader else downloader
            elif target == "te":
                te_path = selected_files["te"] if not downloader else downloader

    for category, path in {
        "model": model_path,
        "vae": vae_path,
        "te": vae_path,
    }.items():
        if path is not None and os.path.exists(path):
            print(f"Selected {category}: {path}")

main()

# Directory Config

In [9]:
import os

train_data_dir = "/workspace/fine_tune/train_data"

os.makedirs(train_data_dir, exist_ok=True)
print(f"Your train data directory : {train_data_dir}")

# Data Gathering

## Unzip Dataset
If your dataset is in a `zip` file and has been uploaded to a location, use this section to extract it. The dataset will be downloaded and automatically extracted to `train_data_dir` if `unzip_to` is empty.

In [10]:
import os
import zipfile
import shutil
from pathlib import Path

zipfile_url  = ""
unzip_to     = ""

if unzip_to:
    os.makedirs(unzip_to, exist_ok=True)
else:
    unzip_to = train_data_dir

def extract_dataset(zip_file, output_path):
    with zipfile.ZipFile(zip_file, "r") as zip_ref:
        zip_ref.extractall(output_path)
        
def remove_files(train_dir, files_to_move):
    for filename in os.listdir(train_dir):
        file_path = os.path.join(train_dir, filename)
        if filename in files_to_move:
            if not os.path.exists(file_path):
                shutil.move(file_path, training_dir)
            else:
                if os.path.isdir(file_path):
                    shutil.rmtree(file_path)
                else:
                    os.remove(file_path)

zip_file = download(zipfile_url, root_dir, HUGGINGFACE_TOKEN)
extract_dataset(zip_file, unzip_to)
os.remove(zip_file)

files_to_move = (
    "meta_cap.json",
    "meta_cap_dd.json",
    "meta_lat.json",
    "meta_clean.json",
    "__MACOSX",
)

remove_files(train_data_dir, files_to_move)

# Bucketing and Latents Caching and Training
This code will create buckets based on the `bucket_resolution` provided for multi-aspect ratio training, and then convert all images within the `train_data_dir` to latents.

In [None]:
accelerate_conf = {
    "mixed_precision": "bf16",
    "num_cpu_threads_per_process": 1,
    "num_processes": 8,
    "num_machines": 1,
    "multi_gpu": True,
    "gpu_ids": "0,1,2,3,4,5,6,7"
}

train_conf = {
    "dit": "/workspace/pretrained_model/qwen_image_bf16.safetensors",
    "vae": "/workspace/vae/diffusion_pytorch_model.safetensors",
    "text_encoder": "/workspace/pretrained_model/qwen_2.5_vl_7b.safetensors",
    "dataset_config": "/workspace/musubi-tuner/dataset_1024_bs2.toml",
    "sdpa": True,
    "mixed_precision": "bf16",
    "timestep_sampling": "shift",
    "weighting_scheme": None,
    "discrete_flow_shift": 2.2,
    "optimizer_type": "adamw8bit",
    "learning_rate": 1e-4,
    "gradient_checkpointing": True,
    "max_data_loader_n_workers": 2,
    "persistent_data_loader_workers": True,
    "network_module": "networks.lora_qwen_image",
    "network_dim": 128,
    "network_alpha": 128,
    "max_train_epochs": 16,
    "save_every_n_epochs": 1,
    "seed": 42,
    "output_name": "Beauty_09_FB",
    "output_dir": "/workspace/fine_tune/outputs",
    "log_with": "wandb",
    "log_tracker_name": "Beauty_09_FB",
    "lr_scheduler": "constant",
    "max_grad_norm": 0.0,
    "wandb_api_key": "",
    "sample_every_n_steps": 25,
    "sample_at_first": True,
    "sample_prompts": "/workspace/musubi-tuner/prompts.txt",
}

def generate_args(config):
    args = ""
    for k, v in config.items():
        if k.startswith("_"):
            args += f'"{v}" '
        elif isinstance(v, str):
            args += f'--{k}="{v}" '
        elif isinstance(v, bool) and v:
            args += f"--{k} "
        elif isinstance(v, float) and not isinstance(v, bool):
            args += f"--{k}={v} "
        elif isinstance(v, int) and not isinstance(v, bool):
            args += f"--{k}={v} "
    return args.strip()

accelerate_args = generate_args(accelerate_conf)
train_args = generate_args(train_conf)

final_args = f"accelerate launch {accelerate_args} qwen_image_train_network.py {train_args}"

os.chdir(repo_dir)
!{final_args}

# Cache latents
#!python "src/musubi_tuner/qwen_image_cache_latents.py" --dataset_config "/workspace/musubi-tuner/dataset_1024_bs2.toml" --vae "/workspace/vae/diffusion_pytorch_model.safetensors" --skip_existing

# Cache text encodings
#!python "src/musubi_tuner/qwen_image_cache_text_encoder_outputs.py" --dataset_config "/workspace/musubi-tuner/dataset_1024_bs2.toml" --text_encoder "/workspace/pretrained_model/qwen_2.5_vl_7b.safetensors" --skip_existing