In [None]:
import zipfile

with zipfile.ZipFile('datasets/celeba-hq/img_align_celeba.zip', 'r') as zip_ref:
    zip_ref.extractall('datasets/celeba-hq/raw_images')


In [None]:
from PIL import Image
import os

input_dir = 'datasets/celeba-hq/raw_images'
output_dir = 'datasets/celeba-hq/val_source_256'
os.makedirs(output_dir, exist_ok=True)

for fname in os.listdir(input_dir):
    if fname.lower().endswith('.jpg'):
        input_path = os.path.join(input_dir, fname)
        output_path = os.path.join(output_dir, fname)

        try:
            img = Image.open(input_path).convert('RGB')
            img = img.resize((256, 256), Image.LANCZOS)
            img.save(output_path)
        except Exception as e:
            print(f"Error processing {fname}: {e}")


In [1]:
import os

os.environ['PYTHONPATH'] = "."

!python bin/gen_mask_dataset.py configs/data_gen/random_thick_256.yaml datasets/celeba-hq/val_source_256 datasets/celeba-hq/val_masks_thick


Detectron v2 is not installed


2025-04-08 18:20:42.222961: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-08 18:20:50.548182: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [4]:
import os

mask_dir = 'datasets/celeba-hq/val_masks_thick'

for fname in os.listdir(mask_dir):
    if not fname.endswith('_mask000.png'):
        os.remove(os.path.join(mask_dir, fname))

print("Cleaned: Only mask files remain.")


Cleaned: Only mask files remain.


In [6]:
import os
import shutil

# Paths
mask_dir = 'datasets/celeba-hq/val_masks_thick'
img_dir = 'datasets/celeba-hq/val_source_256'

# Step 1: Rename masks to imagenum_mask.png
renamed_masks = set()

for fname in os.listdir(mask_dir):
    if fname.endswith('_mask000.png'):
        base = fname.split('_')[0]  # e.g., 000001
        new_name = f"{base}_mask.png"
        os.rename(os.path.join(mask_dir, fname), os.path.join(mask_dir, new_name))
        renamed_masks.add(base)

# Step 2: Delete images without corresponding mask
deleted_imgs = []

for img_file in os.listdir(img_dir):
    if img_file.endswith('.jpg'):
        img_id = os.path.splitext(img_file)[0]  # remove .jpg
        if img_id not in renamed_masks:
            os.remove(os.path.join(img_dir, img_file))
            deleted_imgs.append(img_file)

print(f"✅ Renamed {len(renamed_masks)} mask files.")
print(f"🗑️ Removed {len(deleted_imgs)} images without matching masks.")


✅ Renamed 201481 mask files.
🗑️ Removed 1118 images without matching masks.


In [7]:
import os

img_dir = "datasets/celeba-hq/val_source_256"
mask_dir = "datasets/celeba-hq/val_masks_thick"

images = sorted(os.listdir(img_dir))
masks = sorted(os.listdir(mask_dir))

print(f"Number of images: {len(images)}")
print(f"Number of masks: {len(masks)}")


Number of images: 201481
Number of masks: 201481


In [8]:
import os
from sklearn.model_selection import train_test_split

# Define dataset paths
dataset_root = 'datasets/celeba-hq'
image_dir = os.path.join(dataset_root, 'val_source_256')
mask_dir = os.path.join(dataset_root, 'val_masks_thick')

# List image and mask files
images = sorted(os.listdir(image_dir))
masks = sorted(os.listdir(mask_dir))

# Split into train/validation (90% train, 10% val)
train_imgs, val_imgs, train_masks, val_masks = train_test_split(
    images, masks, test_size=0.1, random_state=42
)

# Function to save lists into files
def save_flist(file_list, file_path):
    with open(file_path, 'w') as file:
        for item in file_list:
            file.write(f"{item}\n")

# Create directory for flist files if it doesn't exist
flist_dir = os.path.join(dataset_root, 'flists')
os.makedirs(flist_dir, exist_ok=True)

# Save the flists
save_flist(train_imgs, os.path.join(flist_dir, 'train_images.flist'))
save_flist(train_masks, os.path.join(flist_dir, 'train_masks.flist'))
save_flist(val_imgs, os.path.join(flist_dir, 'val_images.flist'))
save_flist(val_masks, os.path.join(flist_dir, 'val_masks.flist'))

print("File lists (.flist) generated successfully in:", flist_dir)


File lists (.flist) generated successfully in: datasets/celeba-hq\flists
