In [3]:
#Dataset
import torch
import numpy as np
import openslide
import cv2
from fastai.vision import *
from shapely import geometry
from pytorch_helper import *
from pytorch_augmentations import *
# from slide.pytorch_helper import *
# from slide.pytorch_augmentations import *

class BuildingsDataset(torch.utils.data.Dataset):

    """Canine Mammary Tumor Buildings Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        image_paths (list): list to images folder
        annotations_file (str): path to segmentation masks json database
        class_rgb_values (list): RGB values of select classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    
    def __init__(
            self, 
            image_paths: list, #list with image files
            annotations_file, #json file
            class_rgb_values=None, 
            augmentation=None, #todo
            preprocessing=None, 
            staining = True,
            down_factor = 4, #new
            level = 2,
            patch_size = 320,
            num_patches = 5,
            width: int = 320, height: int = 320,
            white_mask_application = None, label_dict=None

    ):
        #images folder full path
        self.image_paths = image_paths #[Path(os.path.join(images_folder, image_id)) for image_id in sorted(os.listdir(images_folder))]
        # self.image_path = image_path
        self.poly_klasse = "tumor_id"
        self.name = 'supercategory_1'
        self.id = 'id_super'
        #width and height of the patch
        self.width = width
        self.height = height
        self.white_mask_application = white_mask_application
        self.label_dict = label_dict
        self.num_patches = num_patches
        self.patch_size = patch_size
        self.down_factor = down_factor
        self.level = level
        self._level = level
        self.annotations_file = annotations_file

        # if level is None:
        #     level = self.slide.level_count - 1
        # self._level = level
        # self.sample_func = sample_func
        # self.white_mask_application = white_mask_application
        # das definierte label dict
        self.label_dict = label_dict
        self.class_rgb_values = class_rgb_values
        self.augmentation = augmentation
        self.preprocessing = preprocessing
        self.staining = staining
    
    #Funktionen um Masken und Images in Patches zu bekommen 
    # Funktion um Region aus der Slide zu lesen
    def get_patch(self, x: int = 0, y: int = 0):
        rgb = np.array(self.slide.read_region(location=(int(x * self.down_factor), int(y * self.down_factor)),
                                              level=self._level, size=(self.width, self.height)))[:, :, :3]
        return rgb
        # Funktion um die Konturen zu zeichnen
    def get_y_patch(self, x: int = 0, y: int = 0, down_factor: int = 4):  #HIER LÄUFT WAHRSCHEINLICH WAS SCHIEF
        # leere Segmentierungsmaske erstellen
        contains_tumor = False
        y_patch = -1*np.ones(shape=(self.height, self.width), dtype=np.int8)
        inv_map = {v: k for k, v in self.tissue_classes.items()}  
        # contains_tumor = False

        # über die Polygone iterieren - eine Liste mit Json Formatierung der Annotationen 
        for poly in self.polygons:
            # print("poly[id]", poly["id"])
            # print("keys", poly.keys())
            coordinates = np.array(poly['segmentation']).reshape(
                (-1, 2)) / down_factor
            # coordinates für patch anpassen
            coordinates = coordinates - (x, y)
            item = poly[self.poly_klasse]
            #test whether contains tumor: 
            label = self.label_dict[inv_map[item]]
            
            if poly["category_id"] == 2: 
                print("contains tumor")
                contains_tumor = True
            # if label == 1:
            y_patch = cv2.drawContours(y_patch, [coordinates.reshape(
                (-1, 1, 2)).astype(int)], -1, label, -1, maxLevel = 0)

            # y_patch = np.ones(shape=(self.height, self.width), dtype=np.int8) - y_patch #geht nicht wegen den -1 en 
        
        #2nd matrix 
        # print("cooordiantes", coordinates)

        if contains_tumor:
            #make tumor matrix
            tumor_y_patch = np.zeros(shape=(self.height, self.width), dtype=np.int8)
            #only draw tumor contours 
            for poly in self.polygons: 
                item = poly[self.poly_klasse]
                # id = poly["id"]
                label = self.label_dict[inv_map[item]]
                coordinates = np.array(poly['segmentation']).reshape(
                (-1, 2)) / down_factor
                coordinates = coordinates - (x, y)
                
                #if item == tumor 
                # if inv_map[item] == 'Tumor': 
                if poly["category_id"] == 2: 
                    # print("tumor is drawn")
                    # print("coordinates", coordinates.reshape((-1, 1, 2)).astype(int))
                    # print("coordinates_tumor ?", coordinates)
                    # print("inv_map[item]", inv_map[item])
                    # print("label", label)
                    #make new mask that has only tumor 
                    # cv2.fillPoly(tumor_y_patch, [coordinates.reshape((-1, 1, 2)).astype(int)], -1, label)
                    #HERE I AM OVERWRITING OLD COORDINATES WITH THE DOMINANT TUMOR 
                    cv2.drawContours(y_patch, [coordinates.reshape(
                    (-1, 1, 2)).astype(int)], -1, label, -1) # tumor label = 1 but for inverse it is 0
        #     #create inverse mask 
            # inv_mask = cv2.bitwise_not(tumor_y_patch)
            # y_patch = inv_mask #test
            #multiply mask with y_patch 
            # img1 = cv2.bitwise_and(inv_mask, y_patch)
            # y_patch = img1 #test
            #add original 
            # img2 = cv2.bitwise_or(img1, mask)
            # y_patch = img2
        
        
        
        # img_masked = cv2.bitwise_and(y_patch, mask)



        # white mask for parts without annotation
        #white = 145
        if self.white_mask_application:
            print("white mask is applied")
            
            white_mask = cv2.cvtColor(self.get_patch(
                x, y), cv2.COLOR_RGB2GRAY) > self.white
            # excluded = (y_patch == -1)
            # white mask, patch und y_patch zusammen
            # y_patch[np.logical_and(white_mask, excluded)] = 0
            
            #-----
            #GEÄNDERT . ES SOLL NICHT MEHR WENN NICHT GEMACHT WERDEN WENN DER BEREICH OTHER IST 
            excluded = (y_patch == -1)
            y_patch[white_mask] = 0
            #-----                     
        return y_patch
    
    def get_new_train_coordinates(self, down_factor, slide_w, slide_h):
        #new just get random coordinates from image 
        # default sampling method
        xmin, ymin = 0,0
        #calculate downsamples w and h values 
        slide_w = slide_w//down_factor
        slide_h = slide_h//down_factor
        #generate random x and y values in range of original image 
        x = random.uniform(xmin, slide_w)
        y = random.uniform(ymin, slide_h)
        #prüfen nach x und y noch ein patch von 320 passt 
        if x+320 > slide_w: 
            #subsract the difference
            xmin = x-((x+320)-slide_w)
        else: 
            xmin = x
        if y+320 > (slide_h): 
            #subsract the difference
            ymin = y-((y+320)-slide_h)

        return xmin, ymin

    def __getitem__(self, i):

        with open(self.annotations_file) as f:
            data = json.load(f) 
            #define segmentations superclass 
            self.tissue_classes = dict(zip([cat[self.name] for cat in data["categories"]], [
                                       cat[self.id] for cat in data["categories"]]))
            # print("tissue classes", self.tissue_classes)
            #get image id
            name = Path(self.image_paths[i]).name #ging nicht anders
            image_id = [i.get('id') for i in data.get("images") if i.get("file_name") == name][0]
            #get annotations for image id
            self.polygons = [anno for anno in data['annotations'] if anno["image_id"] == image_id]
        
        # tumor labels 
        self.labels = set([poly[self.poly_klasse] for poly in self.polygons])
        print("labels", self.labels)
        
        # [print("poly keys if self.id = 'id' then use this", poly.keys()) for poly in self.polygons]
        
        #NEU gegen Problem Bilder ohne Lables 
        if len(list(self.labels)) == 0:
            print("der path hat keine Labels", self.image_paths[i])
   
        # Slide einlesen
        self.slide = openslide.open_slide(str(self.image_paths[i]))

        #Graubild erstellen ----
        thumbnail = cv2.cvtColor(
            np.array(self.slide.read_region((0, 0), self.slide.level_count - 1, self.slide.level_dimensions[-1]))[:, :,
                 :3], cv2.COLOR_RGB2GRAY)

        blurred = cv2.GaussianBlur(thumbnail, (5, 5), 0)
        self.white, _ = cv2.threshold(
            blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        #get patch coordinates
        x,y = self.get_new_train_coordinates(down_factor = self.slide.level_downsamples[self.level], slide_w= self.slide.dimensions[0], slide_h= self.slide.dimensions[1])

        image = self.get_patch(x, y)

        mask = self.get_y_patch(x,y, down_factor=self.slide.level_downsamples[self.level])

        # one-hot-encode the mask
        mask = one_hot_encode(mask, self.class_rgb_values).astype('float')
        
        #apply staining 
        if self.staining: 
            image = get_stained_patch(image,patch_size = self.patch_size, num_patches = self.num_patches)
        
        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        
            
        return image, mask

        

        
    def __len__(self):
        # return length of 
        return len(self.image_paths)




In [5]:
%matplotlib notebook

In [9]:
#testing -----------------------------
#Tumor / Kein Tumor Segmentierung 
label_dict = {'Other': -1, 'Bg':0, 'Tumor': 1, 'KeinTumor': 2}

# # #Slide Container Objekt: ACHTUNG EVENTUELL DB ANPASSEN 
from process_slides import *
# # #
image_folder = "/home/klose/Data/crops_rescaled"
annotations_file = "/home/klose/Data/CMC_Tumor_NonTumor.json"
# # #
train_files, valid_files, _ = load_slides(["train","valid"], patch_size = 320,label_dict = label_dict, level=2, target_folder = image_folder, annotation_file = annotations_file)
train_data = create_patches(train_files, patches_per_slide=5)
valid_data = create_patches(valid_files, patches_per_slide=5)
# # #
# # class_rgb values 
class_names = [-1,0, 1, 2]
# select_classes = ['Bg', 'Tumor', 'KeinTumor']
select_classes = [-1,0, 1, 2]
class_rgb_values = []
class_rgb_values.append([-1]) # Background (weiß)
class_rgb_values.append([0])  # Tumor (dunkelgrün)
#gutartige Bereiche
class_rgb_values.append([1]) # Normal (rosa)
class_rgb_values.append([2]) # Other (flieder) - das was ich nicht trainieren will 

# # Get RGB values of required classes
select_class_indices = [class_names.index(cls) for cls in select_classes]
select_class_rgb_values =  np.array(class_rgb_values)[select_class_indices]

sc = BuildingsDataset(image_paths = train_data, annotations_file = annotations_file,class_rgb_values=select_class_rgb_values, width=320, height=320, label_dict=label_dict, augmentation = False, staining=False)


y_patch = sc[0]
import imageio


#test
dataset = BuildingsDataset(image_paths = train_data, annotations_file = annotations_file,class_rgb_values=select_class_rgb_values, width=320, height=320, label_dict=label_dict, augmentation = get_training_augmentation(), staining=True, white_mask_application =False)
image, mask = dataset[0]
maske = mask[0]
print("image shape ausgabe", image.shape )
print("shape mask", mask.shape)
# print(mask)

ground_truth_mask = colour_code_segmentation(reverse_one_hot(mask), select_class_rgb_values)
print("shape ground_truth_mask", ground_truth_mask.shape)
one_hot_encoded_mask = reverse_one_hot(mask)
print("shape one_hot_encoded_mask",one_hot_encoded_mask.shape)

# print("farben",reverse_one_hot(ground_truth_mask))

visualize(
    original_image = image,
    ground_truth_mask = colour_code_segmentation(reverse_one_hot(mask), select_class_rgb_values), #besteht aus meinen 0,1,2,-1 werten
    one_hot_encoded_mask = reverse_one_hot(mask) #besteht aus eindimensionaler Matrix mit 0-3 werten, brauche ich glaub nicht, weil initial schon so is
)

#poly keys dict_keys(['segmentation', 'iscrowd', 'image_id', 'tumor_id', 'category_id', 'id', 'bbox', 'area'])

100%|████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 5410.61it/s]

labels {1, 2, -1}
contains tumor
labels {1, 2, -1}
contains tumor
image shape ausgabe (320, 320, 3)
shape mask (320, 320, 4)
shape ground_truth_mask (320, 320, 1)
shape one_hot_encoded_mask (320, 320)





<IPython.core.display.Javascript object>

In [59]:
print(ground_truth_mask.max())

1


In [60]:
print(ground_truth_mask.min())

-1
