In [17]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
import cv2
from PIL import Image

from tqdm.notebook import tqdm

In [3]:
spacecrafts_dir = Path("C:\\Users\\nikhi\\Documents\\Projects\\NASA_segmentation_F24\\data\\spacecrafts")
images_dir = spacecrafts_dir / "images"
masks_dir = spacecrafts_dir / "mask"

train_images = os.listdir(images_dir / "train")
train_masks = os.listdir(masks_dir / "train")

val_images = os.listdir(images_dir / "val")
val_masks = os.listdir(masks_dir / "val")

In [9]:
# Check if all images and masks are in the png format
for img in train_images + val_images:
    assert img.endswith(".png"), f"{img} is not a png file"
print("All images are in png format")
    
for mask in train_masks + val_masks:
    assert mask.endswith(".png"), f"{mask} is not a png file"
print("All masks are in png format")

# Check if all images and masks are of the same size
for img in tqdm(train_images, desc="Checking train image sizes"):
    img_path = images_dir / "train" / img
    mask_path = masks_dir / "train" / img.replace(".png", "_mask.png")
    img = cv2.imread(str(img_path))
    mask = cv2.imread(str(mask_path))
    
    if img is None:
        print(f"Could not read image: {img_path}")
    if mask is None:
        print(f"Could not read mask: {mask_path}")
    
    img_size = img.shape[:2]
    mask_size = mask.shape[:2]
    assert img_size == mask_size, f"[train] Image and mask sizes do not match for {img}"
print("All train images and masks are of the same size")


for img in tqdm(val_images, desc="Checking val image sizes"):
    img_path = images_dir / "val" / img
    mask_path = masks_dir / "val" / img.replace(".png", "_mask.png")
    img = cv2.imread(str(img_path))
    mask = cv2.imread(str(mask_path))
    
    if img is None:
        print(f"Could not read image: {img_path}")
    if mask is None:
        print(f"Could not read mask: {mask_path}")
    
    img_size = img.shape[:2]
    mask_size = mask.shape[:2]
    assert img_size == mask_size, f"[val] Image and mask sizes do not match for {img}"
print("All val images and masks are of the same size")

# Check if the number of images and masks are the same
assert len(train_images) == len(train_masks), "Number of training images and masks are not the same"
assert len(val_images) == len(val_masks), "Number of validation images and masks are not the same"
print("Number of training images and masks:", len(train_images))

# Check if every image has a corresponding mask
for img in train_images:
    assert f"{os.path.splitext(img)[0]}_mask.png" in train_masks, f"Mask for {img} not found"
print("All train images have a corresponding mask")

for img in val_images:
    assert f"{os.path.splitext(img)[0]}_mask.png" in val_masks, f"Mask for {img} not found"
print("All val images have a corresponding mask")

All images are in png format
All masks are in png format


Checking train image sizes:   0%|          | 0/2517 [00:00<?, ?it/s]

All train images and masks are of the same size


Checking val image sizes:   0%|          | 0/600 [00:00<?, ?it/s]

All val images and masks are of the same size
Number of training images and masks: 2517
All train images have a corresponding mask
All val images have a corresponding mask


In [10]:
dest_dir = Path("C:\\Users\\nikhi\\Documents\\Projects\\NASA_segmentation_F24\\data\\spacecrafts_processed")
images_dest_dir = dest_dir / "images"
masks_dest_dir = dest_dir / "masks"

os.makedirs(images_dest_dir / "train", exist_ok=True)
os.makedirs(masks_dest_dir / "train", exist_ok=True)
os.makedirs(images_dest_dir / "val", exist_ok=True)
os.makedirs(masks_dest_dir / "val", exist_ok=True)

In [31]:
# Copy and resize train images and masks
for img in tqdm(train_images, desc="Copying train images"):
    img_path = images_dir / "train" / img
    mask_path = masks_dir / "train" / img.replace(".png", "_mask.png")
    
    img_dest_path = images_dest_dir / "train" / img
    mask_dest_path = masks_dest_dir / "train" / img
        
    # Resize image
    img = cv2.imread(str(img_path))
    img = cv2.resize(img, (1280, 1024), interpolation=cv2.INTER_LANCZOS4)
    img = Image.fromarray(img)
    img.save(img_dest_path)
    
    # Merge classes and Resize mask
    mask = cv2.imread(str(mask_path))    
    mask = cv2.resize(mask, (1280, 1024), interpolation=cv2.INTER_LANCZOS4)
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    mask[mask > 0] = 255
    
    mask = Image.fromarray(mask)
    mask.save(mask_dest_path)

Copying train images:   0%|          | 0/2517 [00:00<?, ?it/s]

In [32]:
# Copy and resize val images and masks   
for img in tqdm(val_images, desc="Copying val images"):
    img_path = images_dir / "val" / img
    mask_path = masks_dir / "val" / img.replace(".png", "_mask.png")
    
    img_dest_path = images_dest_dir / "val" / img
    mask_dest_path = masks_dest_dir / "val" / img
        
    # Resize image
    img = cv2.imread(str(img_path))
    img = cv2.resize(img, (1280, 1024), interpolation=cv2.INTER_LANCZOS4)
    img = Image.fromarray(img)
    img.save(img_dest_path)
    
    # Merge classes and Resize mask
    mask = cv2.imread(str(mask_path))    
    mask = cv2.resize(mask, (1280, 1024), interpolation=cv2.INTER_LANCZOS4)
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    mask[mask > 0] = 255
    
    mask = Image.fromarray(mask)
    mask.save(mask_dest_path)

Copying val images:   0%|          | 0/600 [00:00<?, ?it/s]