## Phase 1: Environment Setup
Install required libraries and set up the device (GPU/CPU).
Steps:

1.   Install dependencies quietly.
2.   Import core modules.
3.   Define global constants (e.g., models, supported formats).






In [None]:
# Install essential libraries
!pip -q install --upgrade diffusers transformers accelerate safetensors gradio pandas pillow openpyxl


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m125.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m133.8 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.2 which is incompatible.
dask-cudf-cu12 25.6.0 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.2 which is incompatible.
cudf-cu12 25.6.0 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.2 which is incompatible.[0m[31m
[0m

In [None]:
#Define Global Constants
import os, random, time, json
from pathlib import Path
from typing import Optional, List
import pandas as pd
from PIL import Image

import torch
from diffusers import AutoPipelineForText2Image, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TXT2IMG_MODEL = "stabilityai/sdxl-turbo"
#FALLBACK_MODEL    = "timbrooks/instruct-pix2pix"
EDIT_MODEL    = "OpenGVLab/Step1X-Edit"
SUPPORTED = (".jpg",".jpeg",".png",".bmp",".tif",".tiff")


## Phase 2: Utility Functions
Define helper functions for seeding, timestamping, image discovery, and prompt loading.
Steps:

1. Implement random seed management for reproducibility.
2. Create timestamp generator for file naming.
3. Discover images recursively in directories.
4. Load prompts from CSV/Excel files.

In [None]:
# Phase 2: Utility Functions
def seed_everything(seed: Optional[int] = None) -> int:
  # Ensures reproducible results across runs
    if seed is None or seed < 0:
        seed = random.randint(0, 2**31 - 1)
    random.seed(seed); torch.manual_seed(seed)
    return seed

def timestamp():
  # Creates unique identifiers for output files
    return time.strftime("%Y%m%d-%H%M%S")

def discover_images(root_dirs: List[str]) -> List[Path]:
  # Recursively finds all supported image files
    imgs = []
    for root in root_dirs:
        p = Path(root)
        if not p.exists(): continue
        for fp in p.rglob("*"):
            if fp.suffix.lower() in SUPPORTED:
                imgs.append(fp)
    return sorted(imgs)

def load_prompts_table(path: Optional[str]) -> pd.DataFrame:
  # Loads prompts from CSV/Excel files
    if not path: return pd.DataFrame(columns=["image","prompt"])
    ext = Path(path).suffix.lower()
    if ext in [".xlsx",".xls"]: return pd.read_excel(path)
    if ext == ".csv": return pd.read_csv(path)
    raise ValueError("Unsupported prompts file (use .csv or .xlsx)")



## Phase 3: Model Initialization
Load diffusion pipelines lazily (on first use).
Steps:

1. Build Text-to-Image pipeline.
2. Build Edit pipeline.
3. Run inference functions.

In [None]:
# Phase 3: Model Initialization
def build_txt2img(model_id=TXT2IMG_MODEL):
  # Generates high-quality facade images from text prompts
    pipe = AutoPipelineForText2Image.from_pretrained(
        model_id,
        torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32,
        variant="fp16" if DEVICE=="cuda" else None,
        safety_checker=None
    )
    pipe = pipe.to(DEVICE)
    return pipe

def build_edit(model_id=EDIT_MODEL, fallback=FALLBACK_MODEL):
  # Modifies existing images based on text instructions
    pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
        model_id,
        torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32,
        safety_checker=None
    )
    pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
    pipe = pipe.to(DEVICE)
    return pipe



## Inference Functions

In [None]:
@torch.inference_mode()
#Text-to-image generation with controllable parameters
def run_t2i(pipe, prompt, seed= -1, steps=20, guidance=2.0, width=1024, height=1024):
    g = torch.Generator(device=pipe.device).manual_seed(seed_everything(seed))
    img = pipe(prompt=prompt, width=width, height=height,
               num_inference_steps=steps, guidance_scale=guidance,
               generator=g).images[0]
    return img

@torch.inference_mode()
#Instruction-based image editing with guidance controls
def run_edit(pipe, init_image: Image.Image, instruction, seed=-1, steps=20, guidance=1.8, image_guidance=1.5):
    g = torch.Generator(device=pipe.device).manual_seed(seed_everything(seed))
    img = pipe(image=init_image, prompt=instruction,
               num_inference_steps=steps, guidance_scale=guidance,
               image_guidance_scale=image_guidance,
               generator=g).images[0]
    return img

## Phase 4: Data Preparation Pipeline
Purpose: Dataset extraction, organization, and prompt management
Dataset Processing

1. Extraction: Unzips CMP Facade DB Extended dataset
2. Image Discovery: Scans for all supported image formats
3. Path Resolution: Maps relative paths to absolute filesystem paths

In [None]:
#Unzips CMP Facade DB Extended dataset
import zipfile, os

zip_path = "/content/CMP_facade_DB_extended.zip"
out_dir = "/content/cmp_facade_extended"

os.makedirs(out_dir, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(out_dir)

print("✅ Extracted to:", out_dir)

✅ Extracted to: /content/cmp_facade_extended


In [None]:
import os, glob, zipfile


print("ZIPs in /content:", glob.glob("/content/*.zip"))

zip_path = "/content/CMP_facade_DB_extended.zip"
assert os.path.exists(zip_path), f"ZIP not found: {zip_path}"


with zipfile.ZipFile(zip_path, 'r') as z:
    names = z.namelist()
    print("Total entries in ZIP:", len(names))
    for n in names[:50]:
        print(n)

ZIPs in /content: ['/content/CMP_facade_DB_extended.zip']
Total entries in ZIP: 687
extended/
extended/cmp_x0001.jpg
extended/cmp_x0001.png
extended/cmp_x0001.xml
extended/cmp_x0002.jpg
extended/cmp_x0002.png
extended/cmp_x0002.xml
extended/cmp_x0003.jpg
extended/cmp_x0003.png
extended/cmp_x0003.xml
extended/cmp_x0004.jpg
extended/cmp_x0004.png
extended/cmp_x0004.xml
extended/cmp_x0005.jpg
extended/cmp_x0005.png
extended/cmp_x0005.xml
extended/cmp_x0006.jpg
extended/cmp_x0006.png
extended/cmp_x0006.xml
extended/cmp_x0007.jpg
extended/cmp_x0007.png
extended/cmp_x0007.xml
extended/cmp_x0008.jpg
extended/cmp_x0008.png
extended/cmp_x0008.xml
extended/cmp_x0009.jpg
extended/cmp_x0009.png
extended/cmp_x0009.xml
extended/cmp_x0010.jpg
extended/cmp_x0010.png
extended/cmp_x0010.xml
extended/cmp_x0011.jpg
extended/cmp_x0011.png
extended/cmp_x0011.xml
extended/cmp_x0012.jpg
extended/cmp_x0012.png
extended/cmp_x0012.xml
extended/cmp_x0013.jpg
extended/cmp_x0013.png
extended/cmp_x0013.xml
extended/

In [None]:
import zipfile, os

zip_path = "/content/CMP_facade_DB_extended.zip"
out_dir = "/content/cmp_facade_extended"
oo_dir = "/content/CMP_facade_DB_base.zip"
os.makedirs(out_dir, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as z:
    z.extractall(out_dir)
print("✅ Extracted to:", out_dir)


✅ Extracted to: /content/cmp_facade_extended


In [None]:
import os
IMAGE_EXTS = (".jpg",".jpeg",".png")

all_images = []
for root, _, files in os.walk(out_dir):
    for f in files:
        if f.lower().endswith(IMAGE_EXTS):
            all_images.append(os.path.join(root, f))

print("Found images:", len(all_images))
print("Examples:", all_images[:10])

Found images: 456
Examples: ['/content/cmp_facade_extended/extended/cmp_x0002.png', '/content/cmp_facade_extended/extended/cmp_x0178.png', '/content/cmp_facade_extended/extended/cmp_x0103.jpg', '/content/cmp_facade_extended/extended/cmp_x0180.png', '/content/cmp_facade_extended/extended/cmp_x0185.jpg', '/content/cmp_facade_extended/extended/cmp_x0036.jpg', '/content/cmp_facade_extended/extended/cmp_x0014.png', '/content/cmp_facade_extended/extended/cmp_x0124.png', '/content/cmp_facade_extended/extended/cmp_x0216.png', '/content/cmp_facade_extended/extended/cmp_x0068.png']


In [None]:
# Discover images
DATA_DIRS = [out_dir+"/extended"]
PROMPTS_FILE = "/content/facade_prompts_200 (1).xlsx"
OUT_DIR = "/content/outputs"
os.makedirs(OUT_DIR, exist_ok=True)


In [None]:
import os
IMAGE_EXTS = (".jpg",".jpeg",".png")

all_images = []
for root, _, files in os.walk(out_dir):
    for f in files:
        if f.lower().endswith(IMAGE_EXTS):
            all_images.append(os.path.join(root, f))

print("Found images:", len(all_images))
print("Examples:", all_images[:10])


Found images: 456
Examples: ['/content/cmp_facade_extended/extended/cmp_x0002.png', '/content/cmp_facade_extended/extended/cmp_x0178.png', '/content/cmp_facade_extended/extended/cmp_x0103.jpg', '/content/cmp_facade_extended/extended/cmp_x0180.png', '/content/cmp_facade_extended/extended/cmp_x0185.jpg', '/content/cmp_facade_extended/extended/cmp_x0036.jpg', '/content/cmp_facade_extended/extended/cmp_x0014.png', '/content/cmp_facade_extended/extended/cmp_x0124.png', '/content/cmp_facade_extended/extended/cmp_x0216.png', '/content/cmp_facade_extended/extended/cmp_x0068.png']


In [None]:
import pandas as pd
from pathlib import Path

df = pd.read_excel("/content/facade_prompts_200.xlsx")
#Load prompt tables from Excel/CSV files


print(df.columns)
#Match prompts to corresponding images
by_name = {Path(p).name: p for p in all_images}
fullpaths = []
for name in df['image'].astype(str):
    p = by_name.get(name, None)
    if p: fullpaths.append(p)
    else: fullpaths.append(name)

df_fixed = df.copy()
df_fixed['image'] = fullpaths

csv_fixed = "/content/facade_prompts_fixed.csv"
df_fixed.to_csv(csv_fixed, index=False, encoding="utf-8-sig")
print("✅ Wrote fixed CSV:", csv_fixed)


Index(['image', 'prompt'], dtype='object')
✅ Wrote fixed CSV: /content/facade_prompts_fixed.csv


## Phase 5: Interactive UI (Gradio)
Launch a web UI for real-time generation and editing.
Steps:

1. Define UI functions.
2. Build Gradio blocks with tabs for T2I, Editing, and Dataset Browser.

In [None]:
# Phase 5: Interactive UI (Gradio)
import gradio as gr

_t2i_pipe = {"pipe": None}
_edit_pipe = {"pipe": None}
DATASET_IMAGES = discover_images(DATA_DIRS) if 'DATA_DIRS' in globals() else []
DATASET_CHOICES = [p.name for p in DATASET_IMAGES]

def pick_dataset_image(name):
    if not name: return None
    p = next((p for p in DATASET_IMAGES if p.name==name), None)
    if p is None: return None
    return Image.open(p).convert("RGB")

def ui_t2i(prompt, steps, guidance, width, height, seed):
    if _t2i_pipe["pipe"] is None:
        _t2i_pipe["pipe"] = build_txt2img()
    img = run_t2i(_t2i_pipe["pipe"], prompt, seed, steps, guidance, width, height)
    save_path = Path(OUT_DIR)/"ui_txt2img"/f"t2i_{timestamp()}.png"
    save_path.parent.mkdir(parents=True, exist_ok=True)
    img.save(save_path)
    return img, str(save_path)

def ui_edit(image, instruction, steps, guidance, image_guidance, seed):
    if _edit_pipe["pipe"] is None:
        _edit_pipe["pipe"] = build_edit()
    init = image.convert("RGB")
    img = run_edit(_edit_pipe["pipe"], init, instruction, seed, steps, guidance, image_guidance)
    save_path = Path(OUT_DIR)/"ui_edit"/f"edit_{timestamp()}.png"
    save_path.parent.mkdir(parents=True, exist_ok=True)
    img.save(save_path)
    return img, str(save_path)

with gr.Blocks(title="StepX1Edit - Facade Studio") as demo:
    gr.Markdown("## 🧱 StepX1Edit – Facade Studio (Colab)\nText→Image & Text-Guided Editing")
    with gr.Tab("Text → Image"):
        prompt = gr.Textbox(label="Prompt", placeholder="e.g., modern minimalist glass facade with LED signage")
        with gr.Row():
            steps = gr.Slider(4, 40, value=20, step=1, label="Steps")
            guidance = gr.Slider(0.5, 7.5, value=2.0, step=0.1, label="Guidance")
            width = gr.Slider(512, 1536, value=1024, step=64, label="Width")
            height = gr.Slider(512, 1536, value=1024, step=64, label="Height")
            seed = gr.Number(value=-1, label="Seed (-1=random)")
        btn = gr.Button("Generate")
        out_img = gr.Image(label="Result", interactive=False)
        out_path = gr.Textbox(label="Saved to", interactive=False)
        btn.click(ui_t2i, [prompt, steps, guidance, width, height, seed], [out_img, out_path])

    with gr.Tab("Edit Existing Image"):
        image = gr.Image(type="pil", label="Upload or pick from dataset tab")
        instruction = gr.Textbox(label="Instruction", placeholder="e.g., Convert to Islamic mashrabiya style with geometric patterns")
        with gr.Row():
            e_steps = gr.Slider(4, 40, value=20, step=1, label="Steps")
            e_guid = gr.Slider(0.5, 10.0, value=1.8, step=0.1, label="Guidance")
            e_img_guid = gr.Slider(0.5, 5.0, value=1.5, step=0.1, label="Image Guidance")
            e_seed = gr.Number(value=-1, label="Seed (-1=random)")
        e_btn = gr.Button("Edit")
        e_img = gr.Image(label="Edited", interactive=False)
        e_path = gr.Textbox(label="Saved to", interactive=False)
        e_btn.click(ui_edit, [image, instruction, e_steps, e_guid, e_img_guid, e_seed], [e_img, e_path])

    with gr.Tab("Dataset Browser"):
        ds_dd = gr.Dropdown(choices=DATASET_CHOICES, label="Dataset images (from DATA_DIRS)")
        ds_btn = gr.Button("Load to preview")
        ds_img = gr.Image(label="Preview", interactive=False)
        ds_btn.click(pick_dataset_image, ds_dd, ds_img)

demo.queue().launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e13cc3520c2a29d611.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Phase 6: Batch Processing
Automated processing of entire datasets with metadata tracking.

Steps:

1. Save with metadata.
2. Run batch edit or T2I.

In [None]:
def save_with_meta(img, out_path: Path, meta: dict):
    out_path.parent.mkdir(parents=True, exist_ok=True)
    img.save(out_path)
    with open(out_path.with_suffix(".json"), "w", encoding="utf-8") as f:
        json.dump(meta, f, ensure_ascii=False, indent=2)

def batch_edit(prompts_file: str, data_dirs: List[str], out_dir: str,
               steps=20, guidance=1.8, image_guidance=1.5):
    df = load_prompts_table(prompts_file).fillna("")
    images = discover_images(data_dirs)
    lookup = {p.name: p for p in images}
    pipe = build_edit()
    out_root = Path(out_dir)/f"edit_{timestamp()}"
    for i, row in df.iterrows():
        img_name = str(row.get("image","")).strip()
        instr = str(row.get("prompt","")).strip()
        if not img_name or not instr: continue
        src = lookup.get(Path(img_name).name, None)
        if src is None and Path(img_name).exists():
            src = Path(img_name)
        if src is None:
            print(f"[WARN] Not found: {img_name}"); continue
        init = Image.open(src).convert("RGB")
        img = run_edit(pipe, init, instr, seed=-1, steps=steps, guidance=guidance, image_guidance=image_guidance)
        out_path = out_root / f"{src.stem}_edited.png"
        meta = {"mode":"edit","instruction":instr,"src":str(src),
                "steps":steps,"guidance":guidance,"image_guidance":image_guidance,"model":EDIT_MODEL}
        save_with_meta(img, out_path, meta)
    print(f"✅ Done → {out_root}")

def batch_t2i(prompts_file: str, out_dir: str, steps=20, guidance=2.0, width=1024, height=1024):
    df = load_prompts_table(prompts_file).fillna("")
    pipe = build_txt2img()
    out_root = Path(out_dir)/f"t2i_{timestamp()}"
    for i, row in df.iterrows():
        prompt = str(row.get("prompt","")).strip()
        if not prompt: continue
        img = run_t2i(pipe, prompt, seed=-1, steps=steps, guidance=guidance, width=width, height=height)
        out_path = out_root / f"t2i_{i:04d}.png"
        meta = {"mode":"txt2img","prompt":prompt,"steps":steps,"guidance":guidance,"model":TXT2IMG_MODEL,"size":[width,height]}
        save_with_meta(img, out_path, meta)
    print(f"✅ Done → {out_root}")
