# 1. Convert lesion annotations to masks

In [None]:
# Download the executable file from https://github.com/computationalpathologygroup/ASAP/releases, and append the binary folder to the path.
import sys
sys.path.append('/opt/ASAP/bin')

# Import the multiresolutionimageinterface.py file from the bin folder.
import multiresolutionimageinterface as mir
import cv2
from tqdm import tqdm_notebook
import os
import colorsys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.image import imread
import itertools
from tqdm import tqdm


In [None]:
# To load tif files and xml format annotation files
reader = mir.MultiResolutionImageReader()
annotation_list = mir.AnnotationList()
xml_repository = mir.XmlRepository(annotation_list)

In [None]:
# Addresses for 'YOUR' annotation files and training WSI (Whole Slide Images) files
dirAnnotations = '/home/team1/ddrive/team1/camelyon16/annotations'
dirData = '/home/team1/ddrive/team1/camelyon16/raw_data'
dirHome = '/home/team1/ddrive/team1/camelyon16'

In [None]:
# Store only filenames in the format '~/data/training/center_0\\patient_001_node_1.tif' in the list
ImageFiles = []
# r=root, d=directories, f = files
for r, d, f in os.walk(dirData):
    for file in f:
        if '.tif' in file and 'mask' not in file:
            ImageFiles.append(os.path.join(r, file))

ImageFiles.sort()

In [None]:
# Take a folder where XML files are located and convert each annotation file into a mask (tumorous areas in white, non-tumorous areas in black)
def CreateAnnotationMask(annotationPath):
    
    # Store only the name of the XML file, excluding the directory and extension, e.g., tumor_001
    fileNamePart = annotationPath.replace('.xml','').replace(dirAnnotations, "")
    
    # Add .tif extension to fileNamePart, e.g., tumor_001.tif
    tifName = fileNamePart + '.tif'

    # If there is no matching value in ImageFiles for tifName, skip -> this is only executed for images with annotations (where tumor exists).
    partialMatches = [s for s in ImageFiles if tifName in s]
    if len(partialMatches) == 0:
        print('Warning - This file is missing from the file list: {0} - skipping.'.format(tifName))
        return
    tifPath = partialMatches[0]
    
    # If the tif file does not exist, skip
    if (not os.path.isfile(tifPath)): 
        print('Warning - Could not locate {0} - skipping this annotation file.'.format(tifPath))
        return
    
    # If a file already exists, skip
    maskPath = tifPath.replace('.tif', '_mask.tif')
    if (os.path.isfile(maskPath)):
        print('Info - Mask file of {0} already exists - skipping'.format(tifPath))
        return
    
    # Fetch the XML file
    xml_repository.setSource(annotationPath)
    xml_repository.load()

    # Convert the XML file with polygons into a mask tif file
    annotation_mask = mir.AnnotationToMask()
    mr_image = reader.open(tifPath)
    if(mr_image is None):
        print('Warning - Could not read {0} - skipping'.format(tifPath))
        return
    label_map = {'metastases': 1, 'normal': 2}
    conversion_order = ['metastases', 'normal']
    annotation_mask.convert(annotation_list, 
                            maskPath, 
                            mr_image.getDimensions(), 
                            mr_image.getSpacing(), 
                            label_map, 
                            conversion_order)


In [None]:
# Create a list to store the names of annotation files
AnnotationFiles = []
# r=root, d=directories, f = files
for r, d, f in os.walk(dirAnnotations):
    for file in f:
        if '.xml' in file:
            AnnotationFiles.append(os.path.join(r, file))


In [None]:
AnnotationFiles.sort()
AnnotationFiles[:5]

In [None]:
from tqdm.notebook import tqdm

for f in tqdm(AnnotationFiles, 'Creating masks...'):
    print('Annotation file: ' + f)
    CreateAnnotationMask(f)


# 2. making tissue masks

In [None]:
## This function is adapted from a digital pathology pipeline code of Mikko Tukiainen
# Functions identical to those in the wsi2tissueMask.py file in the utils folder.
# Background is set to 0, and tissue parts to 255
def make_tissue_mask(slide, mask_level=4, morpho=None, morpho_kernel_size=5, morpho_iter=1, median_filter=False, return_original=False):
    ''' make tissue mask
        return tissue mask array which has tissue locations (pixel value 0 -> empty, 255 -> tissue)
    Args:
        slide (MultiResolutionImage): MultiResolutionImage slide to process
        mask_level (int): defines the level of zoom at which the mask will be created (default 4)
        morpho (cv2.MORPHO): OpenCV morpho flag, Cv2.MORPHO_OPEN or Cv2.MORPHO_CLOSE (default None)
        morpho_kernel_size (int): kernel size for morphological transformation (default 5)
        morpho_iter (int): morphological transformation iterations (default=1)
        median_filter (bool): Use median filtering to remove noise (default False)
        return_original (bool): return also the unmasked image
    '''
    
    # Read the slide
    ds = slide.getLevelDownsample(mask_level)
    original_tissue = slide.getUCharPatch(0,
                                          0,
                                          int(slide.getDimensions()[0] / float(ds)),
                                          int(slide.getDimensions()[1] / float(ds)),
                                          mask_level)
    
    # Extract only the brightness channel of the mask and binarize according to the threshold
    tissue_mask = cv2.cvtColor(np.array(original_tissue), cv2.COLOR_RGBA2RGB)
    tissue_mask = cv2.cvtColor(tissue_mask, cv2.COLOR_BGR2HSV)
    tissue_mask = tissue_mask[:, :, 1]
    _, tissue_mask = cv2.threshold(tissue_mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Apply morphological transformations
    if morpho is not None:
        kernel = np.ones((morpho_kernel_size, morpho_kernel_size), np.uint8)
        tissue_mask = cv2.morphologyEx(tissue_mask, morpho, kernel, iterations=morpho_iter)
    
    # Remove noise with median filtering
    if median_filter:
        tissue_mask = cv2.medianBlur(tissue_mask, 15)
    
    # Convert mask to numpy array
    tissue_mask = np.array(tissue_mask, dtype=np.uint8)

    # Decide whether to also return the original
    if return_original:
        return tissue_mask, original_tissue
    else:
        return tissue_mask


In [None]:
def CreateTissueMask(tifPath):
    
    # Extract only the filename
    fileNamePart = tifPath.replace('.tif','').replace(dirData, "")
    
    # Skip if this mask is already found
    maskPath = tifPath.replace('.tif', '_tissue_mask_ds16.npy')
    if (os.path.isfile(maskPath)):
        print('Info - Tissue mask file of {0} already exists - skipping'.format(tifPath))
        return
    
    # Create tissue mask
    mr_image = reader.open(tifPath)
    if(mr_image is None):
        print('Warning - Could not read {0} - skipping'.format(tifPath))
        return
    tissue_mask = make_tissue_mask(mr_image,
                                   # mr_image.getBestLevelForDownSample(16), 
                                   1,
                                   morpho=cv2.MORPH_CLOSE,
                                   morpho_kernel_size=7,
                                   morpho_iter=2,
                                   median_filter=True)
    # tissue_mask is a binary array dtype.uint8 (16 times downsampled)
    np.save(maskPath, tissue_mask)


In [None]:
for f in tqdm_notebook(ImageFiles, 'Creating tissue masks...'):
    print('WSI: ' + f)
    CreateTissueMask(f)

# 3. Create a DataFrames to record patch information

In [None]:
def getTissueMask(tifPath):
    maskPath = tifPath.replace('.tif', '_tissue_mask_ds16.npy')
    if (not os.path.isfile(maskPath)): return None
    return np.load(maskPath)

# Function to create the center positions of samples (patches)
def sample_centers(tissue_mask, mask_downscale=16, sample_side=512, focus_width_percentage=0.25, padding_percentage=0.01):
    # Width and height of the tissue mask
    mask_width, mask_height = tissue_mask.shape[:2]

    # Sample size
    side = sample_side / mask_downscale

    # Padding size
    padding_width = mask_width * padding_percentage
    padding_height = mask_height * padding_percentage

    # Half-width of the focus area
    half_focus = int(sample_side * focus_width_percentage / mask_downscale)
    
    # List to store the center coordinates of the samples
    sample_centers = []
    
    # Determine sample centers based on areas where tissue exists
    for i in range(int(mask_width // side)):
        for j in range(int(mask_height // side)):
            for sub_shift in [0, 0.5]:
                x = int((i + sub_shift) * side)
                y = int((j + sub_shift) * side)
                min_x = int(max(0, x - half_focus))
                max_x = int(min(x + half_focus, mask_width - 1))
                min_y = int(max(0, y - half_focus))
                max_y = int(min(y + half_focus, mask_height - 1))
                
                # Skip samples in the padding area
                if(min_x < padding_width or max_x > mask_width - padding_width): continue
                if(min_y < padding_height or max_y > mask_height - padding_height): continue
                
                # Add to samples only areas where tissue exists
                if(tissue_mask[min_x:max_x, min_y:max_y].sum() > 0):
                    sample_centers.append(np.array([x, y]))
                    
    # Restore the mask downscale to compute coordinates
    sample_centers = np.array(sample_centers) * mask_downscale
    return sample_centers

# Check if there is a tumor in the patch
def isTumor(mask_level_0):
    return (mask_level_0.max() > 0)

# Calculate the percentage of tumor in the patch
def tumorPercentage(mask_level_0):
    area = mask_level_0.shape[0] * mask_level_0.shape[1]
    tumorPixels = np.count_nonzero(mask_level_0)
    channels = 3
    return tumorPixels / (area * channels)


In [None]:
reader = mir.MultiResolutionImageReader()

# Load image
def getImage(tifPath):
    if (not os.path.isfile(tifPath)): return None
    return reader.open(tifPath)

# Load mask file (only the tumorous parts)
def getAnnoMask(tifPath):
    maskPath = tifPath.replace('.tif', '_mask.tif')
    if (not os.path.isfile(maskPath)): return None
    return reader.open(maskPath)


def getSamplesWithAnnotations(mr_image, mr_mask, x_cent, y_cent, width=512, height=512):
    channels = 3
    imgs = np.zeros((1, width, height, channels), dtype=np.int32)
    masks = np.zeros((1, width, height, channels), dtype=np.int32)

    lev = mr_image.getBestLevelForDownSample(1)
    ds = mr_image.getLevelDownsample(lev)
    imgs[0] = mr_image.getUCharPatch(int(x_cent - (ds*width/2)),
                                     int(y_cent - (ds*height/2)),
                                     width,
                                     height,
                                     lev)
    masks[0] = mr_mask.getUCharPatch(int(x_cent - (ds*width/2)),
                                     int(y_cent - (ds*height/2)),
                                     width,
                                     height,
                                     lev)
    return imgs, masks


In [None]:
# Split a WSI file into patches and create a CSV file storing annotations for each patch
def CreateDF(tifPath, overrideExisting=False, size=256):
    # How many times to multiply the 16x reduced tissue mask. Since it's 16, it's equivalent to 400x original magnification
    mask_downscales = [16]
    mags = ['400x']
    
    mr_image = getImage(tifPath)
    mr_mask = getAnnoMask(tifPath)

    for i, (mask_downscale, mag) in enumerate(zip(mask_downscales, mags)):
        
        # Only store the file name
        fileNamePart = tifPath.replace('.tif','').replace(dirData, "")
        df_path = dirHome + '/dataframes/' + fileNamePart.split('/')[-1] + '.csv'
    
        if (os.path.isfile(df_path) and not overrideExisting):
            print('Info - Dataframe file of {0} already exists - skipping'.format(df_path))
            continue
        
        tissue_mask = getTissueMask(tifPath)
        patch_centers = sample_centers(tissue_mask, mask_downscale=mask_downscale, sample_side=size)

    
        print("Sliced WSI {1} to {0} patches.".format(len(patch_centers), tifPath))
        
        # Load the current image file/mask file

        
        df = pd.DataFrame(columns=['patchId',
                                   'fileName',
                                   'centerX',
                                   'centerY',
                                   'isTumor',
                                   'tumorPercentage'
                                    ])
        
        # If the directory is different, it needs to be changed.
        wsi_name = tifPath.split('/')[-1]
        tumor_idx = wsi_name.strip('.tif').split('_')[-1]
        
        for c in tqdm_notebook(patch_centers, 'Patches...'):
            imgs, masks = getSamplesWithAnnotations(mr_image, mr_mask, x_cent=c[1], y_cent=c[0], width=size, height=size)

            isTumor_attr = isTumor(masks[i])
            tumorPrc_attr = tumorPercentage(masks[i])
            
            df = df.append({'patchId': str(tumor_idx) + '_' + str(c[0]).zfill(7) + str(c[1]).zfill(7),
                           'fileName': tifPath,
                           'centerX': c[0],
                           'centerY': c[1],
                           'isTumor': isTumor_attr,
                           'tumorPercentage': int(tumorPrc_attr * 1000) / 10}, ignore_index=True)
    
        df.to_csv(df_path)


In [None]:
for f in tqdm_notebook(ImageFiles, 'Creating dataframes...'):
    try:
        CreateDF(f)
    except: 
        print('Cannot find the file..')

## Merging dataframes

In [None]:
# Set directory path
directory = dirHome + '/dataframes'

dfs = []

# Browse all files in the directory and add the DataFrames to the list
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        dfs.append(df)


In [None]:
merged_df = pd.concat(dfs, axis=0, ignore_index=True)
merged_df = merged_df.drop('Unnamed: 0', axis=1)
merged_df['wsi_id'] = merged_df['patchId'].apply(lambda x: x.split('_')[0])
merged_df

# 4. Creating dataset

In [None]:
positive_df = positive_df[(positive_df['tumorPercentage'] > 20) & (positive_df['tumorPercentage'] < 90)]
positive_df

In [None]:
train_comb = ['061', '018', '039', '031', '035', '051', '064', '040', '030', '007', '025', '041',
 '054', '065', '005', '046', '023', '004', '022', '008', '006', '068', '001', '009',
 '015', '042', '016', '056', '027', '057', '038', '063', '019', '045', '037', '062',
 '058', '033', '010', '002', '060', '055', '069', '052', '066', '011', '024']

val_comb = ['048', '013', '067', '044', '043', '036']

test_comb = ['032', '047', '049', '028', '050', '053', '034' ,'012' ,'059', '014', '020' ,'003',
 '026','029' ,'017']

In [None]:
# Split Dataframe
df_train = positive_df[positive_df['wsi_id'].isin(train_comb)]
df_valid = positive_df[positive_df['wsi_id'].isin(val_comb)]
df_test = positive_df[positive_df['wsi_id'].isin(test_comb)]

In [None]:
print(df_train['isTumor'].value_counts())
print(df_valid['isTumor'].value_counts())
print(df_test['isTumor'].value_counts())

## Training - Positive Set

In [None]:
# Create a copy of the DataFrame
df_train_copy = df_train.copy()

# Define intervals (0, 5, 10, ..., 100)
bins = np.arange(0, 105, 5)

# Divide 'tumorPercentage' values into 5-unit intervals
df_train_copy['range'] = pd.cut(df_train_copy['tumorPercentage'], bins, right=False)

# Sample up to 430 items per interval
def sample_per_group(x):
    n_samples = 430
    return x.sample(n=min(len(x), n_samples), random_state=42) if len(x) > 0 else x

# Add include_groups=False to groupby to resolve a warning
df_train_sampled = df_train_copy.groupby('range', as_index=False, observed=True).apply(sample_per_group).reset_index(drop=True)

df_train_sampled.drop('range', axis=1, inplace=True)

df_train_sampled['tumorPercentage'].plot(kind='hist', bins=20)  # Adjust the bins value from 19 to 20
df_train_sampled.to_csv(dirHome + '/sample_patches_train_6020.csv', index=False)
print(len(df_train_sampled))


## Train - Negative Set

In [None]:
df_train_neg_all = merged_df[(merged_df['wsi_id'].isin(train_comb)) & (merged_df['isTumor'] == False)]
df_train_neg = df_train_neg_all.sample(n=6000, random_state=42)
print(df_train_neg.shape)

In [None]:
df_train_neg.to_csv(dirHome + '/sample_patches_negative_train_6020.csv', index=False)

## Valid - Positive Set

In [None]:
# Create a copy of the DataFrame
df_valid_copy = df_valid.copy()

# Define intervals (0, 5, 10, ..., 100)
bins = np.arange(0, 105, 5)

# Divide 'tumorPercentage' values into 5-unit intervals
df_valid_copy['range'] = pd.cut(df_valid_copy['tumorPercentage'], bins, right=False)

# Sample up to 50 items per interval
def sample_per_group(x):
    n_samples = 50
    return x.sample(n=min(len(x), n_samples), random_state=42) if len(x) > 0 else x

# Add include_groups=False to groupby to resolve a warning
df_valid_sampled = df_valid_copy.groupby('range', as_index=False, observed=True).apply(sample_per_group).reset_index(drop=True)
df_valid_sampled.drop('range', axis=1, inplace=True)

df_valid_sampled['tumorPercentage'].plot(kind='hist', bins=20)  # Adjust the bins value from 19 to 20
df_valid_sampled.to_csv(dirHome + '/sample_patches_valid_6020.csv', index=False)
print(len(df_valid_sampled))


## Valid - Negative Set

In [None]:
df_valid_neg_all = merged_df[(merged_df['wsi_id'].isin(val_comb)) & (merged_df['isTumor'] == False)]
df_valid_neg = df_valid_neg_all.sample(n=700, random_state=42)
print(df_valid_neg.shape)

In [None]:
df_valid_neg.to_csv(dirHome + '/sample_patches_negative_valid_6020.csv', index=False)

## Test - Positive Set

In [None]:
# Create a copy of the DataFrame
df_test_copy = df_test.copy()

# Define intervals (0, 5, 10, ..., 100)
bins = np.arange(0, 105, 5)

# Divide 'tumorPercentage' values into 5-unit intervals
df_test_copy['range'] = pd.cut(df_test_copy['tumorPercentage'], bins, right=False)

# Sample up to 143 items per interval
def sample_per_group(x):
    n_samples = 143
    return x.sample(n=min(len(x), n_samples), random_state=42) if len(x) > 0 else x

# Add include_groups=False to groupby to resolve a warning
df_test_sampled = df_test_copy.groupby('range', as_index=False, observed=True).apply(sample_per_group).reset_index(drop=True)

print(len(df_test_sampled))
# Check the number of samples in each interval

df_test_sampled.drop('range', axis=1, inplace=True)

df_test_sampled['tumorPercentage'].plot(kind='hist', bins=20)  # Adjust the bins value from 19 to 20
df_test_sampled.to_csv(dirHome + '/sample_patches_test_6020.csv', index=False)


## Test - Negative Set

In [None]:
df_test_neg_all = merged_df[(merged_df['wsi_id'].isin(test_comb)) & (merged_df['isTumor'] == False)]
df_test_neg = df_test_neg_all.sample(n=2000, random_state=42)
print(df_test_neg.shape)

In [None]:
df_test_neg.to_csv(dirHome + '/sample_patches_negative_test_6020.csv', index=False)

# 5. Creating Patches

In [None]:
def getSamples(mr_image, x_cent, y_cent, levels, sz):
    channels = 3
    imgs = np.zeros((len(levels), sz, sz, channels), dtype=np.uint8)
    for i, lev in enumerate(levels):
        ds = mr_image.getLevelDownsample(lev)
        imgs[i] = mr_image.getUCharPatch(int(x_cent - (ds*sz/2)),
                                         int(y_cent - (ds*sz/2)),
                                         sz,
                                         sz,
                                         lev)
    return imgs

def getMaskedSamples(mr_mask, x_cent, y_cent, levels, sz):
    masks = np.zeros((len(levels), sz, sz), dtype=np.uint8)
    for i, lev in enumerate(levels):
        ds = mr_mask.getLevelDownsample(lev)
        mask = mr_mask.getUCharPatch(int(x_cent - (ds*sz/2)),
                                     int(y_cent - (ds*sz/2)),
                                     sz,
                                     sz,
                                     lev)
        # Select and use only the red channel.
        masks[i] = mask[:, :, 0]  # Using only the red channel
    return masks


## Extract Positive Patches

In [None]:
df_train = pd.read_csv(dirHome + '/sample_patches_train_6020.csv')
df_valid = pd.read_csv(dirHome + '/sample_patches_valid_6020.csv')
df_test = pd.read_csv(dirHome + '/sample_patches_test_6020.csv')

In [None]:
df_train['wsi_id'] = df_train['patchId'].apply(lambda x: x.split('_')[0])
df_valid['wsi_id'] = df_valid['patchId'].apply(lambda x: x.split('_')[0])
df_test['wsi_id'] = df_test['patchId'].apply(lambda x: x.split('_')[0])

In [None]:
df_train

In [None]:
# Enter the directory where you want to finally save the dataset
dirRoot = '/home/team1/ddrive/team1/dataset_dest/camelyon16'
os.makedirs(dirRoot, exist_ok=True)
df_list = [df_train, df_valid, df_test]
dirName_list = ['train', 'val', 'test']

levels = [0]

reader = mir.MultiResolutionImageReader()

for WSI in tqdm(ImageFiles, desc='Processing WSIs'):
    mr_image = reader.open(WSI)
    mask_image_path = WSI.replace('.tif', '_mask.tif')
    mask_image = reader.open(mask_image_path)

    split = WSI.split('/')
    wsi_id = split[-1].split('_')[1].split('.')[0]
    
    for df, dirName in zip(df_list, dirName_list):
        df_sub = df[df.wsi_id == wsi_id]
        for i in range(len(df_sub)):
            id = str(df_sub.iloc[i].patchId)
            label = 1 if df_sub.iloc[i].isTumor else 0

            image_dir = os.path.join(dirRoot, dirName, 'image')
            mask_dir = os.path.join(dirRoot, dirName, 'mask')
            os.makedirs(image_dir, exist_ok=True)  # Ensure the image directory exists
            os.makedirs(mask_dir, exist_ok=True)   # Ensure the mask directory exists
            
            fileNamePrefix = os.path.join(image_dir, id)
            center_x = df_sub.iloc[i].centerX
            center_y = df_sub.iloc[i].centerY
    
            imgs = getSamples(mr_image, center_y, center_x, levels, 512)
    
            image_file_name = f"{fileNamePrefix}_{label}.png"
            if not os.path.exists(image_file_name):
                cv2.imwrite(image_file_name, imgs[0])
            else:
                print(f"File {image_file_name} already exists, skipping.")
    
            if label:
                masks = getMaskedSamples(mask_image, center_y, center_x, levels, 512)
                maskFileNamePrefix = os.path.join(mask_dir, id)
                mask_file_name = f"{maskFileNamePrefix}_{label}.png"
                if not os.path.exists(mask_file_name):
                    cv2.imwrite(mask_file_name, masks[0])
                else:
                    print(f"File {mask_file_name} already exists, skipping.")


## Extract Negative Patches

In [None]:
df_train_neg = pd.read_csv(dirHome + '/sample_patches_negative_train_6020.csv')
df_valid_neg = pd.read_csv(dirHome + '/sample_patches_negative_valid_6020.csv')
df_test_neg = pd.read_csv(dirHome + '/sample_patches_negative_test_6020.csv')

In [None]:
df_train_neg['wsi_id'] = df_train_neg['patchId'].apply(lambda x: x.split('_')[0])
df_valid_neg['wsi_id'] = df_valid_neg['patchId'].apply(lambda x: x.split('_')[0])
df_test_neg['wsi_id'] = df_test_neg['patchId'].apply(lambda x: x.split('_')[0])

In [None]:
# Enter the directory where you want to finally save the dataset
dirRoot = '/home/team1/ddrive/team1/dataset_dest/camelyon16'
os.makedirs(dirRoot, exist_ok=True)
df_list = [df_train_neg, df_valid_neg, df_test_neg]
dirName_list = ['train', 'val', 'test']

levels = [0]

reader = mir.MultiResolutionImageReader()

for WSI in tqdm(ImageFiles, desc='Processing WSIs'):
    mr_image = reader.open(WSI)
    mask_image_path = WSI.replace('.tif', '_mask.tif')
    mask_image = reader.open(mask_image_path)

    split = WSI.split('/')
    wsi_id = split[-1].split('_')[1].split('.')[0]
    
    for df, dirName in zip(df_list, dirName_list):
        df_sub = df[df.wsi_id == wsi_id]
        for i in range(len(df_sub)):
            id = str(df_sub.iloc[i].patchId)
            label = 1 if df_sub.iloc[i].isTumor else 0

            image_dir = os.path.join(dirRoot, dirName, 'image')
            mask_dir = os.path.join(dirRoot, dirName, 'mask')
            os.makedirs(image_dir, exist_ok=True)  # Ensure the image directory exists
            os.makedirs(mask_dir, exist_ok=True)   # Ensure the mask directory exists
            
            fileNamePrefix = os.path.join(image_dir, id)
            center_x = df_sub.iloc[i].centerX
            center_y = df_sub.iloc[i].centerY
    
            imgs = getSamples(mr_image, center_y, center_x, levels, 512)
    
            image_file_name = f"{fileNamePrefix}_{label}.png"
            if not os.path.exists(image_file_name):
                cv2.imwrite(image_file_name, imgs[0])
            else:
                print(f"File {image_file_name} already exists, skipping.")
    
            if label:
                masks = getMaskedSamples(mask_image, center_y, center_x, levels, 512)
                maskFileNamePrefix = os.path.join(mask_dir, id)
                mask_file_name = f"{maskFileNamePrefix}_{label}.png"
                if not os.path.exists(mask_file_name):
                    cv2.imwrite(mask_file_name, masks[0])
                else:
                    print(f"File {mask_file_name} already exists, skipping.")