In [1]:
import os 
import cv2
from PIL import Image 
from tqdm import tqdm
import numpy as np 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Constants 

DATA_PATH = "E:/ML_Datasets/Breast_Cancer_Histopathology_Dataset/"
RESIZE_SHAPE=(256,256)

# Idea


Semi supervised data preparation 

1. Build a classifier or use ResNet without image-net weights for training a classifier 
2. Freeze the classifier weights and build a segmentation model 


The data preparation for the classifier. Select 2 classes - Stroma , Tumor

For data preparation for the classifier - using the mask and image name see if the classes are seen on the image and using pixel altering keep the region coresponding to the mask as such for the image and the rest of the region pixelated. 

Create a dataset like for 500 each or 100 each for stroma and tumor in their directories. 

## Feature Preparation

In [19]:
def load_data_from_directory(data_path,resize_shape):
    images_path=os.path.join(data_path,"Images")
    masks_path=os.path.join(data_path,"Masks")
    file_list = os.listdir(images_path)
    image_list=[]
    mask_list=[]
    for idx in range(len(file_list)):
        image_path=os.path.join(images_path,file_list[idx])
        mask_path=os.path.join(masks_path,file_list[idx])
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        image = cv2.resize(image,resize_shape,interpolation=cv2.INTER_LINEAR)
        mask = cv2.imread(mask_path,0)
        mask = cv2.resize(mask,resize_shape,interpolation=cv2.INTER_LINEAR)
        image_list.append(image)
        mask_list.append(mask)
    return image_list,mask_list,file_list

def save_class_labels_for_current_image(image,mask,file_name):
    save_folder = 'Processed_Classifier_Dataset'
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
        
    if (mask == 1).any():
        blank_image = np.zeros_like(image)
        coords = np.argwhere(mask == 1)
        for x,y in coords:
            blank_image[x,y]=image[x,y]
        if not os.path.exists(os.path.join(save_folder,'stroma')):
            os.makedirs(os.path.join(save_folder,'stroma'))
        Image.fromarray(blank_image).save(os.path.join(save_folder,'stroma',file_name))
    if (mask == 2).any():
        blank_image = np.zeros_like(image)
        coords = np.argwhere(mask == 2)
        for x,y in coords:
            blank_image[x,y]=image[x,y]
        if not os.path.exists(os.path.join(save_folder,'tumor')):
            os.makedirs(os.path.join(save_folder,'tumor'))
        Image.fromarray(blank_image).save(os.path.join(save_folder,'tumor',file_name))

def apply_preprocessing(data_path,resize_shape):
    image_list,mask_list,file_list = load_data_from_directory(data_path,resize_shape)
    for idx, id_  in tqdm(enumerate(image_list),total=len(image_list)):
        save_class_labels_for_current_image(image_list[idx],mask_list[idx],file_list[idx])

    print('Saved Classifier Processed Dataset')

In [20]:
apply_preprocessing(DATA_PATH,RESIZE_SHAPE)

100%|████████████████████████████████████████████████████████████████████████████████| 151/151 [00:08<00:00, 17.07it/s]

Saved Classifier Processed Dataset



