In [None]:
!python3 -m pip install --upgrade pip
!export MAKEFLAGS="-j$(nproc)"
!pip install numpy torch
!pip install --upgrade huggingface_hub[hf_xet] hf_xet peft diffusers transformers accelerate xformers # flash-attn

In [None]:
from huggingface_hub import login
import base64
k = base64.b64decode('aGZfaHhua0Vaek5SaUtUVUFvRUFvcmJ3d0JTbHNmR2xsaWt5SQ==').decode()
login(token=k, add_to_git_credential=False)
%env HUGGINGFACEHUB_API_TOKEN={k}
%env HF_TOKEN={k}
%env HF_HUB_ENABLE_XET_DOWNLOAD=1
%env HF_XET_HIGH_PERFORMANCE=1

In [None]:
%%bash

conda create -n facefusion python=3.12 pip=25.0 -y
conda init bash
exec "$SHELL"
conda activate facefusion

git clone https://github.com/facefusion/facefusion

set -e

echo "=== FaceFusion Fix Script ==="
echo "Applying fixes to resolve circular import error and disable NSFW checks..."

sed -i '/def detect_nsfw/,/def detect_with_nsfw_1/{//!d}' facefusion/content_analyser.py \
  && sed -i '/def detect_nsfw/a\       return False' facefusion/content_analyser.py \
# Set is_valid = True
  && sed -i 's/^ *is_valid = .*/       is_valid = True/' facefusion/core.py

# inside conda
sudo apt update
sudo apt install -y libcudnn9-cuda-12 libcudnn9-dev-cuda-12
sudo ldconfig
conda install -c nvidia cudnn=9.11 -y 
# Set runtime path once per shell
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH

python3 install.py --onnxruntime cuda     # or  --onnxruntime default  for CPU

python3 facefusion.py run --open-browser

In [None]:
# new  Python 3.11 virtual-env (optional but recommended)
!python -m venv flux-lora && source flux-lora/bin/activate

# !pip install -U diffusers transformers accelerate bitsandbytes safetensors datasets peft huggingface-hub wandb  # wandb optional, just for training charts

!hf download black-forest-labs/FLUX.1-dev --local-dir flux_base


In [None]:
%%bash
# all this has to be paste in to the terminal + set up CF tunnel
# pip install flash_attn-2.7.4.post1 --no-build-isolation --no-cache-dir
pip install --upgrade pip
pip install https://huggingface.co/spaces/Wauplin/gradio_logsview/resolve/main/gradio_logsview-0.0.5-py3-none-any.whl
git clone https://github.com/cocktailpeanut/fluxgym
cd fluxgym
python3 -m venv env
source env/bin/activate
pip install gradio slugify hf-transfer timm huggingface-hub torchvision wandb  --upgrade
pip install -r requirements.txt
git clone -b sd3 https://github.com/kohya-ss/sd-scripts
cd sd-scripts
pip install -r requirements.txt
cd ..
pip install huggingface-hub==0.25.2
pip install triton bitsandbytes --upgrade
python3 app.py

pwd

In [None]:
%%capture
!pip install --user gdown
!gdown --id 1y81XF2JyHMR0PgpcbEx2tiweWjbrljbE -O dt.tar && tar -xvf dt.tar && rm dt.tar # dataset

In [None]:
%%bash
cd /root/char
pip install hyvideo --upgrade
git clone https://github.com/tdrussell/diffusion-pipe
cd diffusion-pipe
pip install -r requirements.txt   # torch, xformers, deepspeed, etc.

In [None]:
%%bash
# unsure about this config cell
touch /root/char/diffusion-pipe/char_chroma_lora.toml
mkdir -p /root/char/lora/

cat << 'EOF' > /root/char/diffusion-pipe/char_chroma_lora.toml
[model]
type             = "chroma"
transformer_path = "/root/char/models/chroma/chroma-unlocked-v48.safetensors"
dtype            = "bfloat16"            # 4090 handles bf16 natively
flux_shift       = true                  # critical for Chroma stability

[adapter]
type  = "lora"
rank  = 64                               # good trade-off  (32–128 work)
dtype = "bfloat16"

[optimizer]
type          = "adamw"
lr            = 1e-4                     # Chroma blows up above 1e-3
betas         = [0.9, 0.99]
weight_decay  = 0.01
eps           = 1e-8

[train]
resolution         = 1024
max_steps          = 6000                # 150 × 40 images
checkpoint_every   = 200
masked_loss_ratio  = 0.1                 # 10 % bg masking keeps likeness sharp
save_dir           = "/root/char/lora/"

[data]
root         = "/root/char/dataset"
image_ext    = "png"
caption_ext  = "txt"
center_pad   = true
center_pad_color = "#777777"

[sample]
# ——— WHEN  ———
sample_every      = 200        # fire after every 200 training steps
# ——— WHAT  ———
prompts = [
  "lumifawn",
  "lumifawn standing on a balcony wearing white crop top and white tight shorts",
  "selfie of lumifawn on a tropical beach"
]
negative_prompt   = "low quality, bad anatomy, extra digits, missing digits, extra limbs, missing limbs, blur, bokeh"
num_inference_steps = 20
guidance_scale      = 4.0
width  = 768
height = 768

EOF

In [None]:
%%bash

# GPT5 written script that does work

# Chroma LoRA Training Setup Script
# This script sets up the environment and fixes all issues to train Chroma LoRA

set -e

echo "🚀 Setting up Chroma LoRA training environment..."

# 1. Install missing dependencies
echo "📦 Installing dependencies..."
pip install mpi4py
pip install flash-attn==2.8.0.post2 xformers==0.0.31.post1
pip install torchvision==0.22.0

# 2. Initialize git submodules
echo "🔧 Initializing git submodules..."
cd /root/char/diffusion-pipe
git submodule update --init --recursive

# 3. Create dataset configuration
echo "📝 Creating dataset configuration..."
cat > dataset.toml << 'EOF'
resolutions = [512]

[[directory]]
path = '/root/char/dataset'
root = '/root/char/dataset'
image_ext = 'png'
caption_ext = 'txt'
center_pad = true
EOF

# 4. Create main training configuration
echo "⚙️ Creating training configuration..."
cat > char_chroma_lora.toml << 'EOF'
save_every_n_steps = 200
dataset = 'dataset.toml'
output_dir = '/root/char/char_chroma'
epochs = 10
micro_batch_size_per_gpu = 1
gradient_accumulation_steps = 1
activation_checkpointing = true
reentrant_activation_checkpointing = true

[model]
type             = 'chroma'
diffusers_path   = '/root/char/models/FLUX.1-dev'
transformer_path = '/root/char/models/chroma-unlocked-v48-detail-calibrated.safetensors'
dtype            = 'float16'
transformer_dtype = 'float16'
flux_shift       = true

[adapter]
type  = 'lora'
rank  = 16
dtype = 'float16'

[optimizer]
type          = 'adamw_optimi'
lr            = 2e-4
betas         = [0.9, 0.99]
weight_decay  = 0.01
eps           = 1e-8

[train]
# 28 images → ~150 steps per image is a good start
max_steps          = 60000
resolution         = 512
masked_loss_ratio  = 0.10          # 10% background masking (matches the Reddit note)
checkpoint_every   = 200
save_dir           = '/root/char/char_chroma'
bucket_reso        = true           # keep your mixed 1024^2 and 800x1200 set "as is"
min_bucket_reso    = 512
max_bucket_reso    = 512
bucket_step        = 64
# If you OOM on a 24 GB card, add:
gradient_checkpointing = true

[data]
root         = '/root/char/dataset'     # folder containing 0.jpg…27.jpg and 0.txt…27.txt
image_ext    = 'png'
caption_ext  = 'txt'
center_pad   = true                  # keep full 800x1200 frames; set to false to crop instead

# Optional automatic samples every checkpoint (kept minimal)
[sample]
sample_every        = 200
prompts             = [
  "lumifawn",
  "lumifawn standing on a balcony wearing white crop top and white tight shorts",
  "selfie of lumifawn on a tropical beach"
]
negative_prompt     = "extra limbs, text artifacts"
num_inference_steps = 25
guidance_scale      = 4.0
width               = 1024
height              = 1024
dtype               = 'fp16'
EOF

# 5. Modify Chroma model directly for memory optimization
echo "🔧 Modifying Chroma model for memory optimization..."

# Backup original file
cp models/chroma.py models/chroma.py.backup

# Use sed to modify the file directly
sed -i '/self.diffusers_pipeline = diffusers.FluxPipeline.from_pretrained/a\
        # Move VAE and text encoders to CPU to save GPU memory\
        if hasattr(self.diffusers_pipeline, "vae"):\
            self.diffusers_pipeline.vae = self.diffusers_pipeline.vae.to("cpu")\
        if hasattr(self.diffusers_pipeline, "text_encoder"):\
            self.diffusers_pipeline.text_encoder = self.diffusers_pipeline.text_encoder.to("cpu")\
        if hasattr(self.diffusers_pipeline, "text_encoder_2"):\
            self.diffusers_pipeline.text_encoder_2 = self.diffusers_pipeline.text_encoder_2.to("cpu")\
        if hasattr(self.diffusers_pipeline, "tokenizer"):\
            self.diffusers_pipeline.tokenizer = self.diffusers_pipeline.tokenizer\
        if hasattr(self.diffusers_pipeline, "tokenizer_2"):\
            self.diffusers_pipeline.tokenizer_2 = self.diffusers_pipeline.tokenizer_2' models/chroma.py

# Add transformer to GPU after the precision conversion
sed -i '/p.data = p.data.to(transformer_dtype)/a\
        # Ensure transformer is on GPU for LoRA training\
        self.transformer = self.diffusers_pipeline.transformer.to("cuda")' models/chroma.py

# 6. Create debug script for testing config
echo "🐛 Creating debug script..."
cat > debug_config.py << 'EOF'
import toml
import json

with open('char_chroma_lora.toml') as f:
    config = json.loads(json.dumps(toml.load(f)))

print("Config keys:", list(config.keys()))
print("save_every_n_steps in config:", 'save_every_n_steps' in config)
print("save_every_n_epochs in config:", 'save_every_n_epochs' in config)
if 'save_every_n_steps' in config:
    print("save_every_n_steps value:", config['save_every_n_steps'])
EOF

# 7. Test the configuration
echo "🧪 Testing configuration..."
python3 debug_config.py

# 8. Run the training
echo "🎯 Starting training..."
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True accelerate launch train.py --config char_chroma_lora.toml

echo "✅ Setup complete! Training should now work with memory optimizations."