In [1]:
import os
import shutil
import random

# # Paths
# # base_dir = "/home/chia/bdd_multimodal-main/Pre_opt_post_SAR"
# base_dir = "/home/chia/bdd_multimodal-main/Pre_GANSAR_post_SAR"
base_dir = "/home/chia/bdd_multimodal-main/pre_SAR_gan_whole"
output_dir = "/home/chia/bdd_multimodal-main/BDD_dataset_GAN-whole"

# Define splits
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

In [14]:
# # Ensure output directories exist
# for split in ["train", "val", "test"]:
#     for category in ["pre_disaster", "post_disaster","target"]:
#         os.makedirs(os.path.join(output_dir, split, category), exist_ok=True)

# # List all files in the mixed Images & Target directory
# all_files = sorted(os.listdir(os.path.join(base_dir, "images")))  # All images are in one folder
# target_files = sorted(os.listdir(os.path.join(base_dir, "target")))  # All masks are in one folder

In [3]:

# Ensure output directories exist
for split in ["train", "val", "test"]:
    for category in ["pre_disaster", "post_disaster","target"]:
                
        os.makedirs(os.path.join(output_dir, split, category), exist_ok=True)

# List all files in the mixed Images & Target directory
all_files = sorted(os.listdir(os.path.join(base_dir, "images")))  # All images are in one folder
target_files = sorted(os.listdir(os.path.join(base_dir, "target")))  # All masks are in one folder

# Separate images based on filename patterns
pre_disaster_sar_files = [f for f in all_files if "_pre_disaster_sar" in f]
pre_disaster_optical_files = [f for f in all_files if "_pre_disaster" in f and "_sar" not in f]  # Exclude SAR
post_disaster_sar_files = [f for f in all_files if "_post_disaster_sar" in f]
building_damage_masks = [f for f in target_files if "_building_damage" in f]  # Assuming these represent damage


# Ensure dataset consistency
assert len(pre_disaster_sar_files) == len(pre_disaster_optical_files) == len(post_disaster_sar_files) == len(building_damage_masks), \
    "File mismatch! Some disaster types have missing images."

In [4]:


# Shuffle the dataset for random splitting
data_pairs = list(zip(pre_disaster_sar_files, pre_disaster_optical_files, post_disaster_sar_files, building_damage_masks))
random.shuffle(data_pairs)

# Compute split indices
total_files = len(data_pairs)
train_split = int(total_files * train_ratio)
val_split = int(total_files * (train_ratio + val_ratio))


train_files = data_pairs[:train_split]
val_files = data_pairs[train_split:val_split]
test_files = data_pairs[val_split:]

In [5]:


# Function to move files into appropriate folders
def move_files(file_set, split):
    for pre_sar, pre_optical, post_sar, damage_mask in file_set:
        shutil.move(os.path.join(base_dir, "images", pre_sar), os.path.join(output_dir, split, "pre_disaster", pre_sar))
        shutil.move(os.path.join(base_dir, "images", pre_optical), os.path.join(output_dir, split, "pre_disaster", pre_optical))
        shutil.move(os.path.join(base_dir, "images", post_sar), os.path.join(output_dir, split, "post_disaster", post_sar))
        shutil.move(os.path.join(base_dir, "target", damage_mask), os.path.join(output_dir, split, "target", damage_mask))

# Move files to respective folders
move_files(train_files, "train")
move_files(val_files, "val")
move_files(test_files, "test")

print("Dataset successfully sorted and split into train, val, and test sets!")


Dataset successfully sorted and split into train, val, and test sets!
