# 5.1 Data Generation

This notebook documents the two synthetic data generation pipelines used in the project:

- **Method A:** SDXL Turbo (text-to-image) for generating full Zoom-like webcam images.
- **Method B:** Inpainting background replacement using Stable Diffusion Inpainting + person segmentation mask.


## Output structure (recommended)
- `data/synthetic/sdxl_turbo/images/`
- `data/synthetic/sdxl_turbo/labels.csv`

- `data/synthetic/inpainting_bg_replace/images/`
- `data/synthetic/inpainting_bg_replace/labels.csv`


In [None]:
# Optional (Colab): install dependencies if you want to run the notebook.
# !pip install -q diffusers transformers accelerate torch pillow pandas
# !pip install -q mediapipe==0.10.13 opencv-python-headless

## Method A: SDXL Turbo (Text-to-Image)

Generates full synthetic Zoom-like webcam images from a randomly sampled behavior vector:
`gaze, headphones, location, privacy, object`.

**Key idea:** prompt is built directly from the behavior vector, and the vector is saved to `labels.csv`.


In [None]:
import os, random
import pandas as pd

METHOD_A_BASE = "data/synthetic/sdxl_turbo"
METHOD_A_IMAGES = os.path.join(METHOD_A_BASE, "images")
METHOD_A_LABELS = os.path.join(METHOD_A_BASE, "labels.csv")

os.makedirs(METHOD_A_IMAGES, exist_ok=True)

NUM_IMAGES_TO_GENERATE = 1000
IMAGE_SIZE = 512
RANDOM_SEED = 42
random.seed(RANDOM_SEED)

In [None]:
GAZE_OPTIONS = {
    "looking_at_camera": "eyes looking at camera",
    "looking_away": "eyes looking away",
    "eyes_closed": "eyes closed"
}

HEADPHONES_OPTIONS = {
    "with_wired": "wired headphones",
    "with_wireless": "wireless earbuds",
    "without": "no headphones"
}

LOCATION_OPTIONS = {"indoor": "indoor", "outdoor": "outdoor"}
PRIVACY_OPTIONS = {"private": "alone", "public": "people behind"}

OBJECT_OPTIONS = {
    "phone": "holding phone clearly visible",
    "pen": "holding pen clearly visible",
    "cup": "holding cup clearly visible",
    "nothing": "empty hands clearly visible",
    "unknown": "hands hidden",
    "other": "holding notebook clearly visible"
}

In [None]:
def generate_random_behavior_vector():
    return {
        "gaze": random.choice(list(GAZE_OPTIONS.keys())),
        "headphones": random.choice(list(HEADPHONES_OPTIONS.keys())),
        "location": random.choice(list(LOCATION_OPTIONS.keys())),
        "privacy": random.choice(list(PRIVACY_OPTIONS.keys())),
        "object": random.choice(list(OBJECT_OPTIONS.keys()))
    }

def build_prompt(bv):
    prompt = (
        "webcam zoom call, student face, "
        f"{GAZE_OPTIONS[bv['gaze']]}, "
        f"{OBJECT_OPTIONS[bv['object']]}, "
        f"{HEADPHONES_OPTIONS[bv['headphones']]}, "
        f"{LOCATION_OPTIONS[bv['location']]}, {PRIVACY_OPTIONS[bv['privacy']]}, "
        "sharp focus, realistic"
    )
    negative_prompt = "blurry eyes, blurry hands, blurry object, cartoon, distorted"
    return prompt, negative_prompt

# Show a few prompt examples
for _ in range(5):
    bv = generate_random_behavior_vector()
    p, np = build_prompt(bv)
    print("Behavior Vector:", bv)
    print("Prompt:", p)
    print("Negative:", np)
    print("-" * 70)

### Generation loop (optional to run)

The following code block performs the full SDXL Turbo generation and writes:
- images to `data/synthetic/sdxl_turbo/images/`
- labels to `data/synthetic/sdxl_turbo/labels.csv`

This is optional to execute for the interim submission (documentation-focused).


In [None]:
# Optional: run full generation (requires GPU + diffusers/torch)
# If you do not want to run, keep this cell as documentation.

import torch
from diffusers import AutoPipelineForText2Image
from PIL import Image

pipe = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/sdxl-turbo",
    torch_dtype=torch.float16,
    variant="fp16"
).to("cuda")

rows = []

for i in range(NUM_IMAGES_TO_GENERATE):
    bv = generate_random_behavior_vector()
    prompt, negative_prompt = build_prompt(bv)

    seed = random.randint(0, 1_000_000)
    generator = torch.Generator(device="cuda").manual_seed(seed)

    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=4,
        guidance_scale=0.0,
        height=IMAGE_SIZE,
        width=IMAGE_SIZE,
        generator=generator
    ).images[0]

    filename = f"sdxl_{i+1:06d}.jpg"
    image.save(os.path.join(METHOD_A_IMAGES, filename), quality=95)

    rows.append({
        "filename": filename,
        "gaze": bv["gaze"],
        "headphones": bv["headphones"],
        "location": bv["location"],
        "privacy": bv["privacy"],
        "object": bv["object"],
        "seed": seed
    })

df_a = pd.DataFrame(rows)
df_a.to_csv(METHOD_A_LABELS, index=False)
df_a.head()

## Method B: Inpainting Background Replacement

Starting from real images (foreground person), we:
1. Create a background mask using selfie segmentation.
2. Use Stable Diffusion Inpainting to generate a new background.
3. Save the resulting image and write metadata to `labels.csv`.

We generate backgrounds for four privacy/environment categories:
- `pub_ppl`, `priv_no_ppl`, `pub_no_ppl`, `priv_ppl`


In [None]:
import os, random, zipfile
import numpy as np
import pandas as pd
from PIL import Image

METHOD_B_BASE = "data/synthetic/inpainting_bg_replace"
METHOD_B_IMAGES = os.path.join(METHOD_B_BASE, "images")
METHOD_B_LABELS = os.path.join(METHOD_B_BASE, "labels.csv")

os.makedirs(METHOD_B_IMAGES, exist_ok=True)

RANDOM_SEED = 42
random.seed(RANDOM_SEED)

In [None]:
import cv2
import mediapipe as mp

def create_background_mask(image_pil: Image.Image) -> Image.Image:
    mp_segmentation = mp.solutions.selfie_segmentation
    with mp_segmentation.SelfieSegmentation(model_selection=1) as segmenter:
        image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
        results = segmenter.process(cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB))
        mask = (results.segmentation_mask < 0.5).astype(np.uint8) * 255
        return Image.fromarray(mask)

In [None]:
import torch
from diffusers import StableDiffusionInpaintPipeline

pipe_inpaint = StableDiffusionInpaintPipeline.from_pretrained(
    "runwayml/stable-diffusion-inpainting",
    torch_dtype=torch.float16,
    variant="fp16"
).to("cuda")

pipe_inpaint.safety_checker = None
pipe_inpaint.requires_safety_checker = False

public_locs = ["restaurant", "coffee shop", "beach", "public garden", "university hallway"]
private_locs = ["bedroom", "clean home office", "living room", "minimalist wall background"]

def sample_privacy_category_and_prompt():
    category = random.choice(["pub_ppl", "priv_no_ppl", "pub_no_ppl", "priv_ppl"])
    if category == "pub_ppl":
        loc = random.choice(public_locs)
        prompt = f"A busy public {loc} background, many people walking behind, blurry crowd, realistic, detailed"
    elif category == "priv_no_ppl":
        loc = random.choice(private_locs)
        prompt = f"A quiet private {loc} background, empty room, no people, realistic lighting"
    elif category == "pub_no_ppl":
        loc = random.choice(public_locs)
        prompt = f"An empty public {loc} area, no people present, realistic, high quality"
    else:
        loc = random.choice(private_locs)
        prompt = f"A private {loc} at home, blurred family members in background, realistic"
    return category, prompt

In [None]:
from google.colab import files

uploaded = files.upload()

rows = []
for fname in uploaded.keys():
    init_image = Image.open(fname).convert("RGB")

    # keep dimensions multiple of 8
    w, h = init_image.size
    new_w, new_h = (w // 8) * 8, (h // 8) * 8
    init_image = init_image.resize((new_w, new_h))

    mask_image = create_background_mask(init_image)

    category, prompt = sample_privacy_category_and_prompt()

    out = pipe_inpaint(
        prompt=prompt,
        image=init_image,
        mask_image=mask_image,
        num_inference_steps=35,
        guidance_scale=6.5
    ).images[0]

    out_name = f"bg_{os.path.splitext(os.path.basename(fname))[0]}_{category}.png"
    out_path = os.path.join(METHOD_B_IMAGES, out_name)
    out.save(out_path)

    rows.append({
        "source_filename": fname,
        "output_filename": out_name,
        "privacy_category": category,
        "prompt": prompt
    })

df_b = pd.DataFrame(rows)
df_b.to_csv(METHOD_B_LABELS, index=False)
df_b.head()

### Notes
If you need to download the generated results from Colab, you can zip the output folder (optional).

In [None]:
# Optional: zip results for download
zip_path = "inpainting_bg_replace_results.zip"
with zipfile.ZipFile(zip_path, 'w') as zipf:
    for file in os.listdir(METHOD_B_IMAGES):
        zipf.write(os.path.join(METHOD_B_IMAGES, file), arcname=file)

# from google.colab import files
# files.download(zip_path)

## Summary
- Method A (SDXL Turbo): generates full synthetic webcam images + behavior-vector labels.
- Method B (Inpainting BG replacement): keeps the person, replaces the background, and saves metadata labels.