In [None]:
# Cell 1: Setup and Installation (Corrected)

# Install core libraries
!pip install -qqq accelerate transformers peft bitsandbytes
!pip install -qqq controlnet_aux opencv-python-headless
!pip install -qqq Pillow torch torchvision python-slugify

# *** FIX: Install the latest diffusers library directly from the source code ***
# This guarantees you meet the minimum required version (e.g., 0.36.0.dev0 or later)
!pip install -qqq git+https://github.com/huggingface/diffusers.git

# Download the official Hugging Face SDXL DreamBooth script (Keep this, as it's the script you are running)
!wget -q https://raw.githubusercontent.com/huggingface/diffusers/main/examples/dreambooth/train_dreambooth_lora_sdxl.py

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for diffusers (pyproject.toml) ... [?25l[?25hdone


In [None]:
# Run this once in a new cell after Cell 1:
! accelerate config default --mixed_precision="fp16"

accelerate configuration saved at /root/.cache/huggingface/accelerate/default_config.yaml


In [None]:
from google.colab import drive
import os
import torch
from PIL import Image
import numpy as np
import cv2
import gc
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, StableDiffusionXLImg2ImgPipeline, DDPMScheduler
from typing import List

# Mount Google Drive
drive.mount('/content/drive')

# --- DEFINITIONS: VERIFY THESE PATHS ---
PROJECT_DIR = "/content/drive/MyDrive/GENAI_Assignment"
DATASET_DIR = os.path.join(PROJECT_DIR, "SUV_Dataset")
LORA_OUTPUT_DIR = os.path.join(PROJECT_DIR, "LoRA_Output")
TEMPLATE_DIR = os.path.join(PROJECT_DIR, "Templates")

# Ensure output directory exists
os.makedirs(LORA_OUTPUT_DIR, exist_ok=True)
os.makedirs(TEMPLATE_DIR, exist_ok=True)

print(f"Project path: {PROJECT_DIR}")
print(f"Dataset path: {DATASET_DIR}")

Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.
Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.


Mounted at /content/drive
Project path: /content/drive/MyDrive/GENAI_Assignment
Dataset path: /content/drive/MyDrive/GENAI_Assignment/SUV_Dataset


In [None]:
# --- CONFIGURATION (Optimized for Low VRAM) ---
INSTANCE_PROMPT = "a high-detail, realistic photograph of a Mahindra XUV700 <silverSUV> car"
OUTPUT_NAME = "sdxl-suv-lora" # This is for printing
MAX_TRAIN_STEPS = 300 # Reduced steps for stability

# --- EXECUTION ---
print("Starting LoRA Fine-Tuning for XUV700 consistency...")

# NOTE: Removed --output_name argument. Using a smaller gradient accumulation.
! accelerate launch train_dreambooth_lora_sdxl.py \
    --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" \
    --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" \
    --instance_data_dir=$DATASET_DIR \
    --output_dir=$LORA_OUTPUT_DIR \
    --instance_prompt="$INSTANCE_PROMPT" \
    --resolution=1024 \
    --train_batch_size=1 \
    --gradient_accumulation_steps=8 \
    --learning_rate=1e-4 \
    --snr_gamma=5.0 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --mixed_precision="fp16" \
    --max_train_steps=$MAX_TRAIN_STEPS \
    --checkpointing_steps=300 \
    --seed="42"

# --- UPDATE THE LORA FILE PATH TO THE SCRIPT'S DEFAULT OUTPUT NAME ---
LORA_FILE_NAME = "pytorch_lora_weights.safetensors"
LORA_FILE = os.path.join(LORA_OUTPUT_DIR, LORA_FILE_NAME)

print(f"\n✅ LoRA training successfully initiated. The final file will be: {LORA_FILE_NAME}")

# Final cleanup after successful run
gc.collect()
torch.cuda.empty_cache()

Starting LoRA Fine-Tuning for XUV700 consistency...
2025-09-28 17:58:49.671030: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759082329.696148    1313 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759082329.702126    1313 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1759082329.716852    1313 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1759082329.716887    1313 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1759082329.716894    13

In [None]:
# --- ESSENTIAL IMPORTS FIX AND HELPER FUNCTIONS ---

from PIL import Image  # Fixes "NameError: name 'Image' is not defined"
import numpy as np    # Ensures NumPy is available for image array manipulation
import cv2            # Ensures OpenCV is available for Canny Edge detection
import os
import gc
import torch

# --- Re-run Variable Setup to be Safe ---
PROJECT_DIR = "/content/drive/MyDrive/GENAI_Assignment"
LORA_OUTPUT_DIR = os.path.join(PROJECT_DIR, "LoRA_Output")
TEMPLATE_DIR = os.path.join(PROJECT_DIR, "Templates")
OUTPUT_NAME = "sdxl-suv-lora"
LORA_FILE_NAME = "pytorch_lora_weights.safetensors"
LORA_FILE = os.path.join(LORA_OUTPUT_DIR, LORA_FILE_NAME)


# --- Helper function for Canny Control Map (Moved from Cell 4) ---
def generate_control_map(image_path):
    """Generates a Canny Edge control map from a template image."""
    image = Image.open(image_path).convert("RGB")
    image = np.array(image)
    # Ensure the image is resized to 1024x1024, which matches the LoRA training resolution
    image = cv2.resize(image, (1024, 1024))

    canny_image = cv2.Canny(image, 100, 200)
    canny_image = canny_image[:, :, None]
    canny_image = np.concatenate([canny_image, canny_image, canny_image], axis=2)
    return Image.fromarray(canny_image)

In [None]:
# --- IMPORTANT: Ensure LORA_FILE and LORA_FILE_NAME are defined in a cell before this one ---

# 1. Load Custom VAE
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

# 2. Load ControlNet Model
controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0",
    torch_dtype=torch.float16
)

# 3. Load Base Pipeline with ControlNet
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    torch_dtype=torch.float16,
    variant="fp16"
)
pipe.enable_model_cpu_offload()

# 4. Load Custom LoRA Weights (THE CRITICAL FIX)
# This uses the confirmed path and forces local file usage.
pipe.load_lora_weights(
    LORA_FILE,
    weight_name=LORA_FILE_NAME,
    adapter_name="SUV_LORA",
    local_files_only=True
)
pipe.set_adapters(["SUV_LORA"], adapter_weights=[1.0])

print("Generation pipeline ready.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Fetching 17 files:   0%|          | 0/17 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/575 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/479 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

text_encoder/model.fp16.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

text_encoder_2/model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

unet/diffusion_pytorch_model.fp16.safete(…):   0%|          | 0.00/5.14G [00:00<?, ?B/s]

vae_1_0/diffusion_pytorch_model.fp16.saf(…):   0%|          | 0.00/167M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!


Generation pipeline ready.


In [None]:
# --- Cell 5: FINAL GENERATION LOOP (RUNS 1 - 6) ---

# --- CONFIGURATION FOR CURRENT RUN: UPDATE THIS VALUE MANUALLY ---
# INSTRUCTION: CHANGE THIS VALUE TO THE RUN YOU NEED (1, 2, 3, 4, 5, or 6)
RUN_ID = 2
SEED = 42 + RUN_ID

# VRAM OPTIMIZED RESOLUTION (Matches your 1024x1024 trained LoRA for consistency)
RESOLUTION_W = 1024
RESOLUTION_H = 1024


# --- PROMPT SELECTION LOGIC ---

if RUN_ID == 1:
    SCENE_NAME = "Mumbai_Dawn"
    TEMPLATE_NAME = "template_1.png"
    PROMPT = f"A cinematic, low-angle photograph of a **silver Mahindra XUV700 <silverSUV>** driving alone on the **Bandra-Worli Sea Link** at **pre-dawn (blue hour)**. Wet asphalt, subtle mist. In the distance, a **silhouetted fishing boat** passes underneath. Photorealistic, Canon EOS R5."

elif RUN_ID == 2:
    SCENE_NAME = "Mumbai_Aerial"
    TEMPLATE_NAME = "template_2.png"
    PROMPT = f"**Drone photograph, extreme top-down perspective** of a silver **Mahindra XUV700 <silverSUV>** crossing the sweeping curve of the **Bandra-Worli Sea Link**. **Mumbai skyline visible**. Traffic visible as blurred **light streaks**. Deep blue water. Ultra-HD, high-fidelity photo, cinematic shot."

elif RUN_ID == 3:
    SCENE_NAME = "Mumbai_Night"
    TEMPLATE_NAME = "template_1.png"
    PROMPT = f"Dynamic, **long-exposure night photograph** of a silver **Mahindra XUV700 <silverSUV>** parked on a hard shoulder of the Sea Link. The bridge is illuminated with **brilliant white LED light**. Red and white light trails from passing traffic, moody atmosphere. Low-angle cinematic view."

elif RUN_ID == 4:
    SCENE_NAME = "Hawa_Mahal_Tourists"
    TEMPLATE_NAME = "template_2.png"
    PROMPT = f"**Golden-hour photograph** of a silver **Mahindra XUV700 <silverSUV>** parked on a cobblestone street directly in front of the **pink-and-red sandstone Hawa Mahal**. **Warm, directional light**. **Tourists** in brightly colored traditional clothing taking pictures. Wide-angle lens, photorealistic."

elif RUN_ID == 5:
    SCENE_NAME = "Hawa_Mahal_Locals"
    TEMPLATE_NAME = "template_1.png"
    PROMPT = f"**Side-street, documentary-style perspective** of a silver **Mahindra XUV700 <silverSUV>** parked near the Hawa Mahal. Foreground features a vibrant, detailed **local market stall** selling crafts, with **local vendors** in traditional attire. Diffused sunlight, documentary street photography style."

elif RUN_ID == 6:
    SCENE_NAME = "Hawa_Mahal_Artistic"
    TEMPLATE_NAME = "template_2.png"
    PROMPT = f"**Artistic, shallow depth-of-field close-up** of the silver **Mahindra XUV700 <silverSUV>** **reflecting the intricate carved windows** of the Hawa Mahal. Silhouette of a **woman in traditional Rajasthani attire** is visible in one reflected window. Extreme detail, photorealistic texture."

else:
    raise ValueError(f"RUN_ID {RUN_ID} is invalid. Please set RUN_ID between 1 and 6.")


# --- SHARED NEGATIVE PROMPT ---
NEGATIVE_PROMPT = "blurry, low quality, cartoon, painting, sketch, visible watermark, low resolution, deformed wheels, misshapen grill, generic SUV, generic sedan, misshapen car, bad anatomy, wrong colors, dull background, noon light, boring composition"


# --- EXECUTION ---
print(f"Generating Image {RUN_ID}: {SCENE_NAME} using template: {TEMPLATE_NAME} at {RESOLUTION_W}x{RESOLUTION_H}...")

# 1. Generate the Control Map
control_image = generate_control_map(os.path.join(TEMPLATE_DIR, TEMPLATE_NAME), width=RESOLUTION_W, height=RESOLUTION_H)

# 2. Generate and Save the Final Image (Single Step, No Refiner)
generator = torch.Generator("cuda").manual_seed(SEED)
final_image = pipe(
    prompt=PROMPT,
    negative_prompt=NEGATIVE_PROMPT,
    image=control_image,
    controlnet_conditioning_scale=0.8,

    # Final VRAM Optimization Settings
    num_inference_steps=20,
    guidance_scale=7.0,

    width=RESOLUTION_W,
    height=RESOLUTION_H,
    generator=generator
).images[0]

# 3. Save the Final Output (Using stable JPEG format to prevent final RAM crash)
output_filename = f"Final_Image_{RUN_ID}_{SCENE_NAME}.png"
final_image.save(os.path.join(LORA_OUTPUT_DIR, output_filename), quality=95)
print(f"✅ Image saved: {output_filename}")

# Clean up VRAM for the next run
del final_image
torch.cuda.empty_cache()

Generating Image 2: Mumbai_Aerial using template: template_2.png...


  0%|          | 0/25 [00:00<?, ?it/s]