### Script to Read WSIs and XML(Masks) of each nuclei type
### Created by Taimoor Shakeel

This code will create separate folder for each patient and subfolders for annotated images.

Each sub-folder corresponding to sub-images under each patient will contain 4 sub-sub-folders (Epithelial, Lymphocyte, Neutrophil and Macrophage) to save indviudal corresponding binary-masks with value 255 for each cell-type and background 0

### Input
data_path: Specify the path of downloaded images

destination_path = Specify the path to save corresponding binary masks

### Output
MoNuSAC_masks directory in the destination_path

Binary masks will be saved in each sub-sub-folder

Folder -- Patient name

Sub-folder -- Sub-images under each patient

Sub-Sub-folder -- Annotated cell-type on each sub-image


In [1]:
#Process whole slide images

import os
os.environ['PATH'] = "C:/Users/Administrator/Downloads/openslide-win64-20160612/openslide-win64-20160612/bin" + ";" + os.environ['PATH']
import openslide
#from openslide import open_slide # http://openslide.org/api/python/

import os
from xml.dom import minidom
import numpy as np
from openslide import open_slide  
from glob import glob
import cv2
import matplotlib.pyplot as plt
import scipy.io as sio
from PIL import Image
import scipy
import scipy.ndimage
from shapely.geometry import Polygon
from skimage import draw
import xml.etree.ElementTree as ET

In [2]:
def create_directory(directory):
    '''
    Creates a new folder in the specified directory if the folder doesn't exist.
    INPUT
        directory: Folder to be created, called as "folder/".
    OUTPUT
        New folder in the current directory.
    '''
    if not os.path.exists(directory):
        os.makedirs(directory)

In [57]:
# Read svs files from the desired path
count = 0

Organ = "Breast"
# Organ = "Lung"
# Organ = "Kidney"
# Organ = "Prostate"

main = "D://Taimoor_Datasets/Thesis/Nuclei/MoNuSAC/Training_Data/"

destination_path = os.path.join(main) # Path to save binary masks corresponding to xml files

# Target path
# Path to annotations
data_path = os.path.join(main, "Images_&_Annotations_Organ/", str(Organ)) #Path to read data from

try:
    # Make directory
    create_directory(data_path)
    # Change root path
    os.chdir(data_path)#Create folder named as MoNuSAC_masks
except OSError:
    print ("Creation of the images and annotations directory %s failed" % data_path)

    
    
# Target path
# Path to annotations
mask_path = os.path.join(main, "Masks_Organ/", str(Organ)) #Path to read data from and Path to save binary masks corresponding to xml files

try:
    # Make directory
    create_directory(mask_path)
    # Change root path
    os.chdir(mask_path)#Create folder named as MoNuSAC_masks
except OSError:
    print ("Creation of the mask directory %s failed" % mask_path)
    

    
patients = [x[0] for x in os.walk(data_path)]#Total patients in the data_path

print('\n')
print('---'*35)
print('Main Folders')

print(destination_path)
print(data_path)
print(mask_path)
print(len(patients))
print('---'*35)




---------------------------------------------------------------------------------------------------------
Main Folders
D://Taimoor_Datasets/Thesis/Nuclei/MoNuSAC/Training_Data/
D://Taimoor_Datasets/Thesis/Nuclei/MoNuSAC/Training_Data/Images_&_Annotations_Organ/Breast
D://Taimoor_Datasets/Thesis/Nuclei/MoNuSAC/Training_Data/Masks_Organ/Breast
11
---------------------------------------------------------------------------------------------------------


In [55]:
for patient_loc in patients[:]:
# for patient_loc in patients[9:]:
    patient_name = patient_loc[len(data_path)+1:]#Patient name
    print(patient_name)
    
    ## To make patient's name directory in the destination folder
    try:
        os.mkdir(patient_name)
    except OSError:
        print ("\n Creation of the patient's directory %s failed" % patient_name)
        
    ## Read sub-images of each patient in the data path        
    sub_images = glob(patient_loc+'/*.svs')
    for sub_image_loc in sub_images:
        sub_image_name = sub_image_loc[len(data_path)+len(patient_name)+1:-4]        
        print(sub_image_name)
        
        ## To make sub_image directory under the patient's folder
        sub_image = './'+patient_name+'/'+sub_image_name #Destination path
        try:
            os.mkdir(sub_image)
        except OSError:
            print ("\n Creation of the patient's directory %s failed" % sub_image)
            
        image_name = sub_image_loc
        img = openslide.OpenSlide(image_name)
                                  
        # Read xml file
        xml_file_name  = image_name[:-4]
        xml_file_name = xml_file_name+'.xml'
        tree = ET.parse(xml_file_name)
        root = tree.getroot()
        
        #Generate binary mask for each cell-type                         
        for k in range(len(root)):
            label = [x.attrib['Name'] for x in root[k][0]]
            label = label[0]
            
            for child in root[k]:
                for x in child:
                    r = x.tag
                    if r == 'Attribute':
                        count = count+1
                        print(count)
                        label = x.attrib['Name']
                        binary_mask = np.transpose(np.zeros((img.read_region((0,0),0,img.level_dimensions[0]).size))) 
                        print(label)
                        
                        # Create directory for each label
                        sub_path = sub_image+'/'+label
                        
                        try:
                            os.mkdir(sub_path)
                        except OSError:
                            print ("Creation of the directory %s failed" % label)
                        else:
                            print ("Successfully created the directory %s " % label) 
                                          
                        
                    if r == 'Region':
                        regions = []
                        vertices = x[1]
                        coords = np.zeros((len(vertices), 2))
                        for i, vertex in enumerate(vertices):
                            coords[i][0] = vertex.attrib['X']
                            coords[i][1] = vertex.attrib['Y']        
                        regions.append(coords)
                        poly = Polygon(regions[0])  
                        
                        vertex_row_coords = regions[0][:,0]
                        vertex_col_coords = regions[0][:,1]
                        fill_row_coords, fill_col_coords = draw.polygon(vertex_col_coords, vertex_row_coords, binary_mask.shape)
                        binary_mask[fill_row_coords, fill_col_coords] = 255
                        mask_path = sub_path+'/'+str(count)+'_mask.tif'
                        cv2.imwrite(mask_path, binary_mask)
                        
    


TCGA-A2-A0CV-01Z-00-DX1
\TCGA-A2-A0CV-01Z-00-DX1_1
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_1
461
Epithelial
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_1/Epithelial
462
Lymphocyte
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_1/Lymphocyte
463
Neutrophil
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_1/Neutrophil
464
Macrophage
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_1/Macrophage
\TCGA-A2-A0CV-01Z-00-DX1_2
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_2
465
Epithelial
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_2/Epithelial
466
Lymphocyte
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_2/Lymphocyte
467
Neutrophil
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_2/Neutrophil
468
Macrophage
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_2/Macrophage
\TCGA-A2-A0CV-01Z-00-DX1_3
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_3
469
Epithelial
./TCGA-A2-A0CV-01Z-00-DX1/\TCGA-A2-A0CV-01Z-00-DX1_3/Epithelial
470
Lymphocyte
./TCGA-A2

545
Epithelial
./TCGA-BH-A18T-01Z-00-DX1/\TCGA-BH-A18T-01Z-00-DX1_6/Epithelial
546
Lymphocyte
./TCGA-BH-A18T-01Z-00-DX1/\TCGA-BH-A18T-01Z-00-DX1_6/Lymphocyte
547
Neutrophil
./TCGA-BH-A18T-01Z-00-DX1/\TCGA-BH-A18T-01Z-00-DX1_6/Neutrophil
548
Macrophage
./TCGA-BH-A18T-01Z-00-DX1/\TCGA-BH-A18T-01Z-00-DX1_6/Macrophage
TCGA-D8-A1X5-01Z-00-DX2
\TCGA-D8-A1X5-01Z-00-DX2_1
./TCGA-D8-A1X5-01Z-00-DX2/\TCGA-D8-A1X5-01Z-00-DX2_1
549
Epithelial
./TCGA-D8-A1X5-01Z-00-DX2/\TCGA-D8-A1X5-01Z-00-DX2_1/Epithelial
550
Lymphocyte
./TCGA-D8-A1X5-01Z-00-DX2/\TCGA-D8-A1X5-01Z-00-DX2_1/Lymphocyte
551
Neutrophil
./TCGA-D8-A1X5-01Z-00-DX2/\TCGA-D8-A1X5-01Z-00-DX2_1/Neutrophil
552
Macrophage
./TCGA-D8-A1X5-01Z-00-DX2/\TCGA-D8-A1X5-01Z-00-DX2_1/Macrophage
\TCGA-D8-A1X5-01Z-00-DX2_2
./TCGA-D8-A1X5-01Z-00-DX2/\TCGA-D8-A1X5-01Z-00-DX2_2
553
Epithelial
./TCGA-D8-A1X5-01Z-00-DX2/\TCGA-D8-A1X5-01Z-00-DX2_2/Epithelial
554
Lymphocyte
./TCGA-D8-A1X5-01Z-00-DX2/\TCGA-D8-A1X5-01Z-00-DX2_2/Lymphocyte
555
Neutrophil
./TCGA-D8-A

Epithelial
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_1/Epithelial
630
Lymphocyte
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_1/Lymphocyte
631
Neutrophil
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_1/Neutrophil
632
Macrophage
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_1/Macrophage
\TCGA-E9-A22G-01Z-00-DX1_2
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_2
633
Epithelial
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_2/Epithelial
634
Lymphocyte
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_2/Lymphocyte
635
Neutrophil
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_2/Neutrophil
636
Macrophage
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_2/Macrophage
\TCGA-E9-A22G-01Z-00-DX1_3
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_3
637
Epithelial
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_3/Epithelial
638
Lymphocyte
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G-01Z-00-DX1_3/Lymphocyte
639
Neutrophil
./TCGA-E9-A22G-01Z-00-DX1/\TCGA-E9-A22G