Imports

In [None]:
# Importing necessary libraries
import os
import glob
import random
import shutil
from datetime import datetime

import cv2
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split

from patchify import patchify

In [None]:
data_folders = ['A','B']

OUTPUT = 'output'
RMVD = 'removed'

IMGS = 'images'
MSKS = 'masks'

PRE_PNG = '_pre_disaster'
POST_PNG = '_post_disaster'

IMGS_PATCHIFY = os.path.join(OUTPUT, IMGS)
MSKS_PATCHIFY = os.path.join(OUTPUT, MSKS)

IMGS_REMOVIFY = os.path.join(RMVD, IMGS)
MSKS_REMOVIFY = os.path.join(RMVD, MSKS)

Patchify Images from two folders A and B

In [None]:
def pathizer(paths):
  for path in paths:
    if not os.path.exists(path):
        os.makedirs(path)
        print(f"Created directory: {path}")
    else:
        print(f"Directory already exists: {path}")

In [None]:
def patchifizer(path_from_folders, keep=True):
    removed = 0
    for d in path_from_folders:

        image_directory = os.path.join(d, IMGS)
        mask_directory = os.path.join(d, MSKS)

        all_images = sorted([im for im in os.listdir(image_directory) if im.endswith(PRE_PNG + '.png')])

        print(f"Loaded images from Path: {image_directory}")
        print(f"Loaded masks from Path: {mask_directory}")

        for image_name_pre in tqdm(all_images, desc='Processing Images', leave=False):
            # Handling file names for pre and post
            image_name_post = image_name_pre.replace(PRE_PNG, POST_PNG)

            # Constructing paths for pre and post images/masks
            image_path_pre = os.path.join(image_directory, image_name_pre)
            mask_path_pre = os.path.join(mask_directory, image_name_pre)
            image_path_post = os.path.join(image_directory, image_name_post)
            mask_path_post = os.path.join(mask_directory, image_name_post)

            # Read images and masks
            image_pre = cv2.imread(image_path_pre, 1)
            mask_pre = cv2.imread(mask_path_pre, 0)
            image_post = cv2.imread(image_path_post, 1)
            mask_post = cv2.imread(mask_path_post, 0)

            if image_pre is None or mask_pre is None or image_post is None or mask_post is None:
                print(f"Failed to load image or mask for {image_name_pre} and {image_name_post}...")
                continue

            # Patchifying images and masks
            patches_img_pre = patchify(image_pre, (256, 256, 3), step=256)
            patches_mask_pre = patchify(mask_pre, (256, 256), step=256)
            patches_img_post = patchify(image_post, (256, 256, 3), step=256)
            patches_mask_post = patchify(mask_post, (256, 256), step=256)

            # Processing each patch
            for i in range(patches_mask_pre.shape[0]):
                for j in range(patches_mask_pre.shape[1]):
                    single_patch_img_pre = patches_img_pre[i, j, 0, :, :]
                    single_patch_mask_pre = patches_mask_pre[i, j, :, :]
                    single_patch_img_post = patches_img_post[i, j, 0, :, :]
                    single_patch_mask_post = patches_mask_post[i, j, :, :]

                    # Generating filenames for patches
                    base_filename = f"{image_name_pre.split('.')[0]}_patch_{i}{j}"
                    img_patch_path_pre = os.path.join(IMGS_PATCHIFY, base_filename + '.png')
                    mask_patch_path_pre = os.path.join(MSKS_PATCHIFY, base_filename + '.png')
                    img_patch_path_post = img_patch_path_pre.replace(PRE_PNG, POST_PNG)
                    mask_patch_path_post = mask_patch_path_pre.replace(PRE_PNG, POST_PNG)

                    # Saving patches based on condition
                    if np.sum(single_patch_mask_pre) > 0:
                        cv2.imwrite(img_patch_path_pre, single_patch_img_pre)
                        cv2.imwrite(mask_patch_path_pre, single_patch_mask_pre)
                        cv2.imwrite(img_patch_path_post, single_patch_img_post)
                        cv2.imwrite(mask_patch_path_post, single_patch_mask_post)
                        # print('Saved PRE and POST patches')
                    else:
                        if keep:
                            img_patch_path_rem_pre = os.path.join(IMGS_REMOVIFY, base_filename + '.png')
                            mask_patch_path_rem_pre = os.path.join(MSKS_REMOVIFY, base_filename + '.png')
                            img_patch_path_rem_post = img_patch_path_rem_pre.replace(PRE_PNG, POST_PNG)
                            mask_patch_path_rem_post = mask_patch_path_rem_pre.replace(PRE_PNG, POST_PNG)

                            cv2.imwrite(img_patch_path_rem_pre, single_patch_img_pre)
                            cv2.imwrite(mask_patch_path_rem_pre, single_patch_mask_pre)
                            cv2.imwrite(img_patch_path_rem_post, single_patch_img_post)
                            cv2.imwrite(mask_patch_path_rem_post, single_patch_mask_post)
                            # print('Removed PRE and POST patches')
                            removed += 1

        print("Total images removed:", removed)

patchifizer(data_folders)

Loaded images from Path: A\images
Loaded masks from Path: A\masks


Processing Images:   0%|          | 0/3279 [00:00<?, ?it/s]

                                                                       

Total images removed: 21659
Loaded images from Path: B\images
Loaded masks from Path: B\masks


                                                                          

Total images removed: 121460




In [None]:
def check_matching_files(img_folder, mask_folder):
    # Helper function to normalize filenames
    def normalize_name(name):
        for suffix in ['_pre_disaster', '_post_disaster']:
            name = name.replace(suffix, '')
        return os.path.splitext(name)[0]

    # Get list of files and normalize their names
    img_files = {normalize_name(f) for f in os.listdir(img_folder) if f.endswith('.png')}
    mask_files = {normalize_name(f) for f in os.listdir(mask_folder) if f.endswith('.png')}

    # Check if all image files have corresponding mask files
    unmatched_files = img_files.symmetric_difference(mask_files)
    if unmatched_files:
        print("Unmatched files found:", unmatched_files)
    else:
        print("All files in images and masks match.")

    # Check for both pre and post versions
    for file_base in img_files.union(mask_files):
        if not any(file_base + suffix in img_files for suffix in ['_pre_disaster', '_post_disaster']):
            print(f"Missing pre or post version for image: {file_base}")
        if not any(file_base + suffix in mask_files for suffix in ['_pre_disaster', '_post_disaster']):
            print(f"Missing pre or post version for mask: {file_base}")
# check_matching_files(path_to_maskify, path_to_imagify)
# check_matching_files(path_to_maskify_rem, path_to_imagify_rem)