In [None]:
import os
import shutil
import random
from pathlib import Path


real_data_path = Path(r"C:/Users/Pulpit/Desktop/ml space/MarsData-MarsData-V2")

output_path = Path(r"C:/Users/Pulpit/Desktop/ml space/MarsData-combined")
train_img_path = output_path / "train" / "img"
train_mask_path = output_path / "train" / "mask"
test_img_path = output_path / "test" / "img"
test_mask_path = output_path / "test" / "mask"

for folder in [train_img_path, train_mask_path, test_img_path, test_mask_path]:
    folder.mkdir(parents=True, exist_ok=True)

def split_and_copy(data_path, is_real, train_ratio=0.85):
    images = sorted(data_path.glob("img/**/*.png"))
    masks = sorted(data_path.glob("label/**/*.png"))

    print(len(images), len(masks))
    assert len(images) == len(masks), "Mismatch between images and masks"

    data_pairs = list(zip(images, masks))
    random.shuffle(data_pairs)

    train_size = int(len(data_pairs) * train_ratio)

    train_pairs = data_pairs[:train_size]
    test_pairs = data_pairs[train_size:] if is_real else []

    for img_path, mask_path in train_pairs:
        shutil.copy(img_path, train_img_path / img_path.name)
        shutil.copy(mask_path, train_mask_path / mask_path.name)

    for img_path, mask_path in test_pairs:
        shutil.copy(img_path, test_img_path / img_path.name)
        shutil.copy(mask_path, test_mask_path / mask_path.name)

split_and_copy(real_data_path, is_real=True)

print("Done")


In [None]:
import os
import shutil

base_path = r"C:\Users\Pulpit\Desktop\ml space\SynMars-master"
output_path = r"C:\Users\Pulpit\Desktop\ml space\MarsData-combined"

train_img_path = os.path.join(output_path, "train", "img")
train_mask_path = os.path.join(output_path, "train", "mask")

os.makedirs(train_img_path, exist_ok=True)
os.makedirs(train_mask_path, exist_ok=True)

routes = [f"route{i}" for i in range(1, 7)]

for route in routes:
    img_folder = os.path.join(base_path, route, "img")
    mask_folder = os.path.join(base_path, route, "mask")

    img_files = sorted(os.listdir(img_folder))
    mask_files = sorted(os.listdir(mask_folder))

    assert len(img_files) == len(mask_files), f"Mismatch in {route}: {len(img_files)} images, {len(mask_files)} masks"

    for img, mask in zip(img_files, mask_files):
        shutil.copy(os.path.join(img_folder, img), os.path.join(train_img_path, img))
        shutil.copy(os.path.join(mask_folder, mask), os.path.join(train_mask_path, mask))

print("Done")

In [None]:
import os

def rename_files_in_directory(directory, prefix):
    files = sorted(os.listdir(directory))
    for i, filename in enumerate(files):
        new_name = f"{prefix}_{i+1:04d}.png"
        os.rename(os.path.join(directory, filename), os.path.join(directory, new_name))

base_dir = r"C:\Users\Pulpit\Desktop\ml space\MarsData-combined"
sub_dirs = ["train/img", "train/mask", "test/img", "test/mask"]

for sub_dir in sub_dirs:
    full_path = os.path.join(base_dir, sub_dir)
    prefix = sub_dir.replace("/", "_")
    rename_files_in_directory(full_path, prefix)

print("Renaming Done")

In [None]:
# This cell was used to pad images to a certain size, but it is not a good approach

# import os
# from PIL import Image, ImageOps

# folder_path_1 = r"C:\Users\Pulpit\Desktop\ml space\MarsData-combined\train\img"
# folder_path_2 = r"C:\Users\Pulpit\Desktop\ml space\MarsData-combined\train\mask"
# folder_path_3 = r"C:\Users\Pulpit\Desktop\ml space\MarsData-combined\test\img"
# folder_path_4 = r"C:\Users\Pulpit\Desktop\ml space\MarsData-combined\test\mask"

# folder_paths = [folder_path_1, folder_path_2, folder_path_3, folder_path_4]

# target_width = 1920
# target_height = 1080

# for folder in folder_paths:
#     for filename in os.listdir(folder):
#         if filename.endswith(".png"):
#             file_path = os.path.join(folder, filename)
            
#             img = Image.open(file_path)
#             original_width, original_height = img.size

#             if original_width < target_width or original_height < target_height:
#                 left_padding = (target_width - original_width) // 2
#                 top_padding = (target_height - original_height) // 2
#                 right_padding = target_width - original_width - left_padding
#                 bottom_padding = target_height - original_height - top_padding

#                 padding = (left_padding, top_padding, right_padding, bottom_padding)
#                 img_with_padding = ImageOps.expand(img, border=padding, fill=(0, 0, 0))
                
#                 img_with_padding.save(file_path)
#                 print(f"Padded: {filename}")
#             else:
#                 print(f"Already sufficient size: {filename}")


In [None]:
import os
import pickle
from PIL import Image
import torchvision.transforms as tv

def serialize_dataset(dataset_path, target_path):
    images_path = os.path.join(dataset_path, "img")
    masks_path = os.path.join(dataset_path, "mask")
    image_files = sorted(os.listdir(images_path))
    mask_files = sorted(os.listdir(masks_path))

    os.makedirs(target_path, exist_ok=True)

    for img_file, mask_file in zip(image_files, mask_files):
        img = Image.open(os.path.join(images_path, img_file)).convert("RGB")
        mask = Image.open(os.path.join(masks_path, mask_file)).convert("L")

        transform_image = tv.transforms.Compose([
            # tv.transforms.Resize((1080, 1920)), # in v3 version we mix images with different sizes
            tv.transforms.ToTensor(),
            tv.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])

        transform_mask = tv.transforms.Compose([
            # tv.transforms.Resize((1080, 1920)),
            tv.transforms.ToTensor()
        ])

        data = {
            "image": transform_image(img),
            "mask": transform_mask(mask)
        }

        with open(os.path.join(target_path, f"{img_file}.pkl"), "wb") as f:
            pickle.dump(data, f)
main_path = r"C:\Users\Pulpit\Desktop\ml space\MarsData-combined"

train_dataset_path = os.path.join(main_path, "train")
test_dataset_path = os.path.join(main_path, "test")

serialize_dataset(train_dataset_path, os.path.join(main_path, "serialized_train"))
serialize_dataset(test_dataset_path, os.path.join(main_path, "serialized_test"))