In [1]:
import os
import shutil

# Define the paths
data_folder = "data"
data_no_mask_folder = "data_no_mask"
subfolders = ["benign", "malignant", "normal"]

# Create data_no_mask_folder if it doesn't exist
if not os.path.exists(data_no_mask_folder):
    os.makedirs(data_no_mask_folder)

# Loop through each subfolder
for subfolder in subfolders:
    subfolder_path = os.path.join(data_folder, subfolder)
    data_no_mask_subfolder_path = os.path.join(data_no_mask_folder, subfolder)
    
    # Create subfolder in data_no_mask_folder if it doesn't exist
    if not os.path.exists(data_no_mask_subfolder_path):
        os.makedirs(data_no_mask_subfolder_path)
    
    # Loop through files in subfolder
    for filename in os.listdir(subfolder_path):
        if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
            # Check if 'mask' is in the filename
            if 'mask' not in filename.lower():
                # Copy the image to data_no_mask_folder
                shutil.copy(os.path.join(subfolder_path, filename), data_no_mask_subfolder_path)


In [3]:
import os
import cv2
import numpy as np
from collections import defaultdict
import shutil

# Function to concatenate multiple mask images into one
def concatenate_masks(mask_paths):
    masks = [cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) for mask_path in mask_paths]
    concatenated_mask = np.zeros_like(masks[0])
    for mask in masks:
        concatenated_mask = np.maximum(concatenated_mask, mask)
    return concatenated_mask

# Define the paths
data_folder = "data"
mask_data_folder = "mask_data"
subfolders = ["benign", "malignant", "normal"]

# Create mask_data_folder if it doesn't exist
if not os.path.exists(mask_data_folder):
    os.makedirs(mask_data_folder)

# Dictionary to store original image paths and their corresponding mask paths
image_mask_dict = defaultdict(list)

# Loop through each subfolder
for subfolder in subfolders:
    subfolder_path = os.path.join(data_folder, subfolder)
    mask_subfolder_path = os.path.join(mask_data_folder, subfolder)
    
    if not os.path.exists(mask_subfolder_path):
        os.makedirs(mask_subfolder_path)
    
    for filename in os.listdir(subfolder_path):
        if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
            if 'mask' in filename.lower():
                shutil.copy(os.path.join(subfolder_path, filename), mask_subfolder_path)
            else:

                image_name = filename.split('.')[0]
                image_mask_dict[image_name].append(os.path.join(subfolder_path, filename))

for image_name, mask_paths in image_mask_dict.items():
    if len(mask_paths) > 1:
        concatenated_mask = concatenate_masks(mask_paths)
        cv2.imwrite(os.path.join(mask_data_folder, f"{image_name}_mask.png"), concatenated_mask)



In [4]:
import os
import cv2
import numpy as np

# Function to concatenate multiple mask images into one
def concatenate_masks(mask_paths):
    masks = [cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) for mask_path in mask_paths]
    concatenated_mask = np.zeros_like(masks[0])
    
    for mask in masks:
        concatenated_mask = np.maximum(concatenated_mask, mask)
    return concatenated_mask

# Define the paths
mask_data_folder = "mask_data"
subfolders = ["benign", "malignant", "normal"]

# Loop through each subfolder
for subfolder in subfolders:
    subfolder_path = os.path.join(mask_data_folder, subfolder)
    
    # Loop through files in subfolder
    for filename in os.listdir(subfolder_path):
        if filename.endswith("_mask.png"):
            image_name = filename.split('_mask')[0]
            other_masks = [f for f in os.listdir(subfolder_path) if f.startswith(image_name + '_mask_')]

            if other_masks:
                mask_paths = [os.path.join(subfolder_path, filename)]
                for mask in other_masks:
                    mask_paths.append(os.path.join(subfolder_path, mask))
                
                # Concatenate masks
                concatenated_mask = concatenate_masks(mask_paths)
                for mask in mask_paths:
                    os.remove(mask)
                # Save concatenated mask
                cv2.imwrite(os.path.join(subfolder_path, f"{image_name}_mask.png"), concatenated_mask)
                
                # Remove individual mask files

In [9]:
import splitfolders
splitfolders.ratio("./data_no_mask", output="./data-split", seed=1337, ratio=(0.7, 0.2, 0.1), group_prefix=None, move=False)

Copying files: 780 files [00:01, 731.97 files/s]


In [10]:
import splitfolders
splitfolders.ratio("./mask_data", output="./data-split_mask", seed=1337, ratio=(0.7, 0.2, 0.1), group_prefix=None, move=False)

Copying files: 780 files [00:00, 1087.97 files/s]
