# Prepare image and volume crops
Here we will create simple tools for cropping input imagery and validate produces annotations.

Every section has a safety mechanism in form of a flag which has to be changed manually to True for section to produce / change anything. 

## Extensions
It is very recommended to have a few extensions to jupyter notebook enabled so that it is much easier to work with this toolset:
* Table of Contents (2) - allows to easy orient oneself in many sections of this notebook
* Initialization cells - makes sure that all setup cells are run so that you can go straight to cropping

## Contents:
1. Loading and setting up data paths
2. Crop new data for annotation (dapi, membrane)
3. Crop new random data for CLB
4. Recrop using previous crops
5. Establish voxel size
6. Validate annotations
7. Compare annotation data
8. Prepare summary of all annotations

In [None]:
from notebook_utils import *

import imageio
import skimage.segmentation
import skimage.filters

import scipy.stats

import random

import os
import shutil

from tqdm import tqdm_notebook as tqdm

%matplotlib inline

## Setup paths and common methods
Here each new user can add its local paths to the dataset and code that are required by this toolset. The proper one is chosen as the one that actually exists.

In [None]:
import clb.utils
from functools import lru_cache
from clb.dataprep.readers import get_volume_reader

In [None]:
data_root = pick_path([r'D:\Fafa\MIT\CellDx', # fmroz
                       r'C:\MIT\CLDX\Data', # fmroz
                       r'C:\Annotations\CLDX\Crop_dataset', # patryk
                       '/home/bartlomiej/clb_data/Crop_dataset' # bkuchnowski
                      ])
raw_data_root = pick_path([r'G:\Dyski zespołu\CLB\Public', # fmroz
                          r'G:\Shared drives\CLB\Public', # fmroz
                           '/home/bartlomiej/clb_data/raw_data' #bkuchnowski
                          ])
raw_annotation_root = pick_path([r"G:\Dyski zespołu\CLB\Public\Datasets annotation", # fmroz
                                 r"G:\Shared drives\CLB\Public\Datasets annotation", # fmroz
                                 r"C:\Annotations\CLDX\New_crops", # patryk
                                 r"/home/bartlomiej/clb_data/New_crops" # bkuchnowski
                                ])
annotation_root = pick_path([r"G:\Dyski zespołu\CLB_Team\Private\_annotations",  # fmroz
                             r"G:\Shared drives\CLB_Team\Private\_annotations",  # fmroz
                             r"C:\Annotations\CLDX\New_crops", # patryk
                             r"/home/bartlomiej/clb_data/New_crops" # bkuchnowski
                            ])
code_root = pick_path([r"O:\Code\cldx-pilot", # fmroz 
                       r"C:\MIT\CLDX\Code\cldx-pilot", # fmroz
                       r"C:\Mit_Projects\cldx-pilot", # patryk
                       r"/home/bartlomiej/repos/cldx-pilot" # bkuchnowski
                      ])

In [None]:
import clb.evaluate.evaluator_segment
import clb.classify.utils
def get_standard_datasets(annotation_root, series, in_gd = True):
    if series is None:
        series_list = os.listdir(annotation_root)
        datasets_lists = [get_standard_datasets(annotation_root, s, in_gd) for s in series_list]
        return sum(datasets_lists, [])
    
    annotation_base_dir = os.path.join(annotation_root, series)
    input_dir = os.path.join(annotation_base_dir, "Input" if in_gd else "images")
    labels_dir = os.path.join(annotation_base_dir, "Annotations" if in_gd else "labels")
    return clb.evaluate.evaluator_segment.get_all_datasets(input_dir, labels_dir, input_dir, input_dir)

def get_class_datasets(annotation_root, series, class_name):
    annotation_base_dir = os.path.join(annotation_root, series)
    input_dir = os.path.join(annotation_base_dir, "input")
    labels_dir = os.path.join(annotation_base_dir, "annotations")
    return clb.classify.utils.get_all_datasets(input_dir, labels_dir, input_dir, input_dir, class_name)

## Definition of cropping sets
These sets represent one series of the data from which we crop data. It should correspond to the folder with data that we store.

In [None]:
class CroppingSet:
    def __init__(self, full_path_template, annotation_prefix, output_root_dir, upper_case=True):
        self.full_path_template = full_path_template
        self.annotation_prefix = annotation_prefix
        self.output_root_dir = output_root_dir
        self.input_name = "Input" if upper_case else "input"
        self.annotations_name = "Annotations" if upper_case else "annotations"
        self.raw_lif_path = None
        self.raw_series = None
        self.internal_name = os.path.basename(output_root_dir)
    
    def set_lif_file(self, lif_path, series):
        """
        @param series: zero-based index of series in lif file
        """
        self.raw_lif_path = lif_path
        self.raw_series = series
    
    def get_multi_channels(self):
        c1 = self.full_path_template.replace("CX","C1")
        c2 = self.full_path_template.replace("CX","C2")
        c3 = self.full_path_template.replace("CX","C3")
        c4 = self.full_path_template.replace("CX","C4")
        return (c1,c2,c3,c4)
    
    def get_raw_data(self):
        if self.raw_lif_path is not None:
            return get_volume_reader(self.raw_lif_path, series=self.raw_series)
        return None
    
    def get_voxel_size(self):
        if self.raw_lif_path is None:
            return None
        z,y,x = self.get_raw_data().voxel_size
        return round(z, 2), round(y, 2), round(x, 2)
    
    def get_channel_data(self, channel_index, max_z=None):
        if self.raw_lif_path is None:
            path_to_full = self.get_multi_channels()[channel_index]
            volume = imageio.volread(path_to_full)
            if max_z is not None:
                volume = volume[:max_z]
            return volume
        else:
            if max_z is None:
                volume_iter = self.get_raw_data()[:, channel_index]
            else:
                volume_iter = self.get_raw_data()[:max_z, channel_index]
            return volume_iter.to_numpy()
        
    def get_datasets(self, include_non_annotated=False):
        input_dir = os.path.join(self.output_root_dir, self.input_name)
        labels_dir = os.path.join(self.output_root_dir, self.annotations_name)
        if include_non_annotated:
            labels_dir = input_dir
        datasets = clb.evaluate.evaluator_segment.get_all_datasets(input_dir, labels_dir, input_dir, input_dir)
        return datasets
    
    @property
    def cropped_path_input(self):
        return os.path.join(self.output_root_dir, self.input_name)
    
    @property
    def cropped_path_annotation(self):
        return os.path.join(self.output_root_dir, self.annotations_name)
    
    @lru_cache(5)
    def get_data_shape(self):
        return tuple(self.get_channel_data(0).shape)[:3]
    
    def set_voxel_size(self, infos):
        dataset_voxel_size = self.get_voxel_size()
        warning = []
        if dataset_voxel_size is not None:
            for info in infos:
                info_voxel_size = getattr(info, 'voxel_size', None)
                if info_voxel_size is not None and info_voxel_size != dataset_voxel_size:
                    warning.append("Changed voxel size from {0} to {1}.".format(info_voxel_size, dataset_voxel_size))
                info.voxel_size = dataset_voxel_size
        for warn in set(warning):
            print (warn)
    
    def crop_by_infos(self, infos, channel_index=0):
        # It also sets proper voxel size.
        self.set_voxel_size(infos)
        volume = self.get_channel_data(channel_index, max_z=infos[-1].z+1).copy()
        return clb.cropping.VolumeROI.from_absolute_crop_with_padding(infos, volume).crop_volume
    
    def get_all_infos(self):
        list_of_files = [clb.cropping.CropInfo.load(d.crop_info) for d in self.get_datasets(include_non_annotated=True)]
        return [d for ds in list_of_files for d in ds]
    
    def get_overlap(self, infos):
        return clb.cropping.CropInfo.overlap_volume_fraction(infos, self.get_all_infos())
    
    def crop_by_infos_merged(self, infos, channel_indices=[0]):
        res = []
        for index in channel_indices:
            res.append(self.crop_by_infos(infos, index))
        return np.squeeze(np.stack(res, -1))
    
    def crop_dataset(self, dataset, channel_index=0):
        infos = clb.cropping.CropInfo.load(dataset.crop_info)
        return self.crop_by_infos(infos, channel_index)

    def duplicate_set(self, output_root_dir, annotation_prefix=None):
        annotation_prefix = annotation_prefix or self.annotation_prefix
        new_set = CroppingSet(self.full_path_template, annotation_prefix, output_root_dir, True)
        new_set.input_name = self.input_name
        new_set.annotations_name = self.annotations_name
        
        new_set.raw_lif_path = self.raw_lif_path
        new_set.raw_series = self.raw_series
        return new_set

In [None]:
raw_lif_file_T3_T5_T6 = os.path.join(raw_data_root, "060718 To MIT.lif")
raw_lif_file_NewClass = os.path.join(raw_data_root, "110718 more markers to test classification.lif")
raw_lif_file_512DenoisingData_S1_S2 = os.path.join(raw_data_root, "Ab feasibility - Internal R-D_07022019.lif")
raw_lif_file_512DenoisingData_S3_S4_S5 = os.path.join(raw_data_root, "Lectin feasiblity-expression_ETC cleared.lif")
raw_lif_file_Tonsil_CD = os.path.join(raw_data_root, "Tonsil Ab test-1-23-19.lif")

path_to_full_T8_S1 = os.path.join(data_root, "T8 S1 CX -#8T 2048 1024 0.5um more images.tif")
path_to_full_T8_S2 = os.path.join(data_root, "T8 S2 CX -#8T 2048 1024 0.5um more images.tif")
path_to_full_T8_S3 = os.path.join(data_root, "T8 S3 CX -#8T 2048 1024 0.5um more images.tif")
path_to_full_T3_S1 = os.path.join(data_root, "T3 S1 CX -#3T 1024 step 0.5.tif")
path_to_full_T5_S1 = os.path.join(data_root, "T5 S1 CX -#5T 1024 step 0.5.tif")
path_to_full_T6_S1 = os.path.join(data_root, "T6 S1 CX -#6T 1024 step 0.5.tif")
path_to_full_NewClass_S1 = os.path.join(data_root, "TestNewClasses S1 CX - Tonsil 175-2 1024.tif")
path_to_full_NewClass_S2 = os.path.join(data_root, "TestNewClasses S2 CX - Tonsil 130 1024.tif")
path_to_full_NewClass_S3 = os.path.join(data_root, "TestNewClasses S3 CX - FFPE 2048-1024.tif")
path_to_full_512DenoisingData_S1 = os.path.join(data_root, "512DenoisingData S1 CX - Ab feasibility S8.tif")
path_to_full_512DenoisingData_S2 = os.path.join(data_root, "512DenoisingData S2 CX - Ab feasibility S18.tif")
path_to_full_512DenoisingData_S3 = os.path.join(data_root, "512DenoisingData S3 CX - Lectin feasiblity S2.tif")
path_to_full_512DenoisingData_S4 = os.path.join(data_root, "512DenoisingData S4 CX - Lectin feasiblity S8.tif")
path_to_full_512DenoisingData_S5 = os.path.join(data_root, "512DenoisingData S5 CX - Lectin feasiblity S12.tif")

annotation_prefix_T8_S1 = "#8T S1 2048_1024 crop_"
annotation_prefix_T8_S2 = "#8T S2 2048_1024 crop_"
annotation_prefix_T8_S3 = "#8T S3 2048_1024 crop_"
annotation_prefix_T3_S1 = "#3T S1 1024 crop_"
annotation_prefix_T5_S1 = "#5T S1 1024 crop_"
annotation_prefix_T6_S1 = "#6T S1 1024 crop_"
annotation_prefix_T3_S1_count = "#3T S1 1024 crop_C_class_"
annotation_prefix_T5_S1_count = "#5T S1 1024 crop_C_class_"
annotation_prefix_T6_S1_count = "#6T S1 1024 crop_C_class_"
annotation_prefix_NewClass_S1 = "TestNewClasses S1 1024 crop_"
annotation_prefix_NewClass_S2 = "TestNewClasses S2 1024 crop_"
annotation_prefix_NewClass_S3 = "TestNewClasses S3 2048_1024 crop_"
annotation_prefix_512DenoisingData_S1 = "512DenoisingData S1 C1 crop_"
annotation_prefix_512DenoisingData_S2 = "512DenoisingData S2 C1 crop_"
annotation_prefix_512DenoisingData_S3 = "512DenoisingData S3 C1 crop_"
annotation_prefix_512DenoisingData_S4 = "512DenoisingData S4 C1 crop_"
annotation_prefix_512DenoisingData_S5 = "512DenoisingData S5 C1 crop_"

annotation_prefix_Tonsil_CD3_S2 = "Tonsil_CD3 S2 crop_"
annotation_prefix_Tonsil_CD8_S3 = "Tonsil_CD8 S3 crop_"

### Segmentation

In [None]:
annotation_dir_T3_S1 = os.path.join(annotation_root, "T3_S1")
annotation_dir_T5_S1 = os.path.join(annotation_root, "T5_S1")
annotation_dir_T6_S1 = os.path.join(annotation_root, "T6_S1")
annotation_dir_T8_S1 = os.path.join(annotation_root, "T8_S1")
annotation_dir_T8_S2 = os.path.join(annotation_root, "T8_S2")
annotation_dir_T8_S3 = os.path.join(annotation_root, "T8_S3")
annotation_dir_NewClass_S1_Seg = os.path.join(annotation_root, "TestNewClasses_S1")
annotation_dir_NewClass_S2_Seg = os.path.join(annotation_root, "TestNewClasses_S2")
annotation_dir_512DenoisingData_S1 =  os.path.join(annotation_root, "512DenoisingData_S1")
annotation_dir_512DenoisingData_S2 =  os.path.join(annotation_root, "512DenoisingData_S2")
annotation_dir_512DenoisingData_S3 =  os.path.join(annotation_root, "512DenoisingData_S3")
annotation_dir_512DenoisingData_S4 =  os.path.join(annotation_root, "512DenoisingData_S4")
annotation_dir_512DenoisingData_S5 =  os.path.join(annotation_root, "512DenoisingData_S5")

cropping_set_T3 = CroppingSet(path_to_full_T3_S1, annotation_prefix_T3_S1, annotation_dir_T3_S1)
cropping_set_T3.set_lif_file(raw_lif_file_T3_T5_T6, 1)
cropping_set_T5 = CroppingSet(path_to_full_T5_S1, annotation_prefix_T5_S1, annotation_dir_T5_S1)
cropping_set_T5.set_lif_file(raw_lif_file_T3_T5_T6, 2)
cropping_set_T6 = CroppingSet(path_to_full_T6_S1, annotation_prefix_T6_S1, annotation_dir_T6_S1)
cropping_set_T6.set_lif_file(raw_lif_file_T3_T5_T6, 0)

cropping_set_T8_1 = CroppingSet(path_to_full_T8_S1, annotation_prefix_T8_S1, annotation_dir_T8_S1)
cropping_set_T8_2 = CroppingSet(path_to_full_T8_S2, annotation_prefix_T8_S2, annotation_dir_T8_S2)
cropping_set_T8_3 = CroppingSet(path_to_full_T8_S3, annotation_prefix_T8_S3, annotation_dir_T8_S3)

cropping_set_NewClass_S1_Seg = \
    CroppingSet(path_to_full_NewClass_S1, annotation_prefix_NewClass_S1, annotation_dir_NewClass_S1_Seg, upper_case=False)
cropping_set_NewClass_S1_Seg.set_lif_file(raw_lif_file_NewClass, 0)
cropping_set_NewClass_S2_Seg = \
    CroppingSet(path_to_full_NewClass_S2, annotation_prefix_NewClass_S2, annotation_dir_NewClass_S2_Seg, upper_case=False)
cropping_set_NewClass_S2_Seg.set_lif_file(raw_lif_file_NewClass, 1)

cropping_set_512DenoisingData_S1 = CroppingSet(path_to_full_512DenoisingData_S1, annotation_prefix_512DenoisingData_S1, annotation_dir_512DenoisingData_S1, upper_case=False)
cropping_set_512DenoisingData_S1.set_lif_file(raw_lif_file_512DenoisingData_S1_S2, 7)
cropping_set_512DenoisingData_S2 = CroppingSet(path_to_full_512DenoisingData_S2, annotation_prefix_512DenoisingData_S2, annotation_dir_512DenoisingData_S2, upper_case=False)
cropping_set_512DenoisingData_S2.set_lif_file(raw_lif_file_512DenoisingData_S1_S2, 17)
cropping_set_512DenoisingData_S3 = CroppingSet(path_to_full_512DenoisingData_S3, annotation_prefix_512DenoisingData_S3, annotation_dir_512DenoisingData_S3, upper_case=False)
cropping_set_512DenoisingData_S3.set_lif_file(raw_lif_file_512DenoisingData_S3_S4_S5, 1)
cropping_set_512DenoisingData_S4 = CroppingSet(path_to_full_512DenoisingData_S4, annotation_prefix_512DenoisingData_S4, annotation_dir_512DenoisingData_S4, upper_case=False)
cropping_set_512DenoisingData_S4.set_lif_file(raw_lif_file_512DenoisingData_S3_S4_S5, 7)
cropping_set_512DenoisingData_S5 = CroppingSet(path_to_full_512DenoisingData_S5, annotation_prefix_512DenoisingData_S5, annotation_dir_512DenoisingData_S5, upper_case=False)
cropping_set_512DenoisingData_S5.set_lif_file(raw_lif_file_512DenoisingData_S3_S4_S5, 11)

all_segmentation_sets = [cropping_set_T8_1, cropping_set_T8_2, cropping_set_T8_3,
                     cropping_set_NewClass_S1_Seg, cropping_set_NewClass_S2_Seg,
                     cropping_set_T3, cropping_set_T5, cropping_set_T6,
                     cropping_set_512DenoisingData_S1, cropping_set_512DenoisingData_S2, cropping_set_512DenoisingData_S3, cropping_set_512DenoisingData_S4, cropping_set_512DenoisingData_S5
                    ]

### Classification

In [None]:
annotation_dir_T3_S1_extend = os.path.join(annotation_root, "T3_S1_extended")
annotation_dir_T5_S1_extend = os.path.join(annotation_root, "T5_S1_extended")
annotation_dir_T6_S1_extend = os.path.join(annotation_root, "T6_S1_extended")
annotation_dir_T3_S1_count = os.path.join(annotation_root, "T3_S1_counters")
annotation_dir_T5_S1_count = os.path.join(annotation_root, "T5_S1_counters")
annotation_dir_T6_S1_count = os.path.join(annotation_root, "T6_S1_counters")
annotation_dir_NewClass_S1_Class_Test = os.path.join(annotation_root, "TestNewClasses_S1_test")
annotation_dir_NewClass_S2_Class_Test = os.path.join(annotation_root, "TestNewClasses_S2_test")
annotation_dir_NewClass_S1 = os.path.join(annotation_root, "TestNewClasses_S1_extended")
annotation_dir_NewClass_S2 = os.path.join(annotation_root, "TestNewClasses_S2_extended")
annotation_dir_NewClass_S3 = os.path.join(annotation_root, "TestNewClasses_S3_extended")

annotation_dir_Tonsil_CD3_S2 = os.path.join(annotation_root, "Tonsil_CD3_S2_extended")
annotation_dir_Tonsil_CD8_S3 = os.path.join(annotation_root, "Tonsil_CD8_S3_extended")

cropping_set_T3_count = cropping_set_T3.duplicate_set(annotation_dir_T3_S1_count, annotation_prefix_T3_S1_count)
cropping_set_T5_count = cropping_set_T5.duplicate_set(annotation_dir_T5_S1_count, annotation_prefix_T5_S1_count)
cropping_set_T6_count = cropping_set_T6.duplicate_set(annotation_dir_T6_S1_count, annotation_prefix_T6_S1_count)
cropping_set_T3_extend = cropping_set_T3.duplicate_set(annotation_dir_T3_S1_extend)
cropping_set_T5_extend = cropping_set_T5.duplicate_set(annotation_dir_T5_S1_extend)
cropping_set_T6_extend = cropping_set_T6.duplicate_set(annotation_dir_T6_S1_extend)

cropping_set_NewClass_S1 = cropping_set_NewClass_S1_Seg.duplicate_set(annotation_dir_NewClass_S1)
cropping_set_NewClass_S2 = cropping_set_NewClass_S1_Seg.duplicate_set(annotation_dir_NewClass_S2)
cropping_set_NewClass_S3 = \
    CroppingSet(path_to_full_NewClass_S3, annotation_prefix_NewClass_S3, annotation_dir_NewClass_S3, upper_case=False)
cropping_set_NewClass_S3.set_lif_file(raw_lif_file_NewClass, 2)

cropping_set_NewClass_S1_Class_Test = cropping_set_NewClass_S1.duplicate_set(annotation_dir_NewClass_S1_Class_Test)
cropping_set_NewClass_S2_Class_Test = cropping_set_NewClass_S2.duplicate_set(annotation_dir_NewClass_S2_Class_Test)

cropping_set_Tonsil_CD3_S2 = CroppingSet(None, annotation_prefix_Tonsil_CD3_S2, annotation_dir_Tonsil_CD3_S2, upper_case=False)
cropping_set_Tonsil_CD3_S2.set_lif_file(raw_lif_file_Tonsil_CD, 1)
cropping_set_Tonsil_CD8_S3 = CroppingSet(None, annotation_prefix_Tonsil_CD8_S3, annotation_dir_Tonsil_CD8_S3, upper_case=False)
cropping_set_Tonsil_CD8_S3.set_lif_file(raw_lif_file_Tonsil_CD, 2)


all_classification_sets = [
                     cropping_set_NewClass_S1, cropping_set_NewClass_S2, cropping_set_NewClass_S3,
                     cropping_set_NewClass_S1_Class_Test, cropping_set_NewClass_S2_Class_Test,
                     cropping_set_T3_count, cropping_set_T5_count, cropping_set_T6_count,
                     cropping_set_T3_extend, cropping_set_T5_extend, cropping_set_T6_extend,
                     cropping_set_Tonsil_CD3_S2, cropping_set_Tonsil_CD8_S3
                    ]

all_cropping_sets = all_segmentation_sets + all_classification_sets

## Crop new data for annotation (dapi, membrane)
This allows to crop at the specific position and save dapi and membrane. It is actually the first implementation and should not be used much.

In [None]:
import clb.cropping
importlib.reload(clb.cropping)
crop_new_data = False  # IT IS DEPRECATED USE ONLY IF REALLY NEED 

#### Set from which dataset and coordinates of the crop

In [None]:
path_to_full_dapi = cropping_set_T6.get_multi_channels()[0]
path_to_full_memb = cropping_set_T6.get_multi_channels()[2]

annotation_dir = cropping_set_T6.cropped_path_input
annotation_prefix = cropping_set_T6.annotation_prefix

crop_number = "52"
eval_only_crop_number = "E5"
annotation_name = annotation_prefix + crop_number + " 0.5um_dapi.yaml"
annotation_path = os.path.join(annotation_dir, annotation_name)

if crop_new_data:
    x = 25
    y = 693
    start_z = 40
    thickness = 5
    crops_volume = clb.cropping.CropInfo.create_volume(y,x,200,200,range(start_z, start_z+thickness, 1))
    print('crops_len =', len(crops_volume))

#### Crop and see

In [None]:
if crop_new_data:
    first_image = imageio.volread(path_to_full_dapi)[crops_volume[0].z].copy()
    first_crop = crops_volume[0].crop(first_image, from_volume=False)
    print(first_crop.shape)
#show_all(1,1, first_crop, scale=5)

#### Save crop and crop info

In [None]:
if crop_new_data:
    print(annotation_name)
    output_path_dapi = annotation_path[:-10] + "_dapi.tif"
    output_path_mem = annotation_path[:-10] + "_mem.tif"
    dapis = []
    membranes = []
    
    zs = [crop.z for crop in crops_volume]
    min_z, max_z = zs[0], zs[-1]+1
    
    loaded_dapi = imageio.volread(path_to_full_dapi)[:max_z].copy()
    loaded_memb = imageio.volread(path_to_full_memb)[:max_z].copy()
    
    for crop in tqdm(crops_volume):
        cropped_dapi = crop.crop(loaded_dapi)
        cropped_membrane = crop.crop(loaded_memb)
        dapis.append(cropped_dapi)
        membranes.append(cropped_membrane)
    print("saving", output_path_dapi)
    imageio.mimwrite(output_path_dapi, dapis)
    imageio.mimwrite(output_path_mem, membranes)
    print("saving", annotation_path)
    clb.cropping.CropInfo.save(crops_volume, annotation_path)
    # save info also
    
    show_all(1,2, dapis[0], membranes[0], scale=20)

## Crop new random data for CLB
This section allows to crop randomly from entire volume - the random proposal is presented and it can be approved and saved.
The number of the crops should then be manually incremented.

In [None]:
import clb.cropping
importlib.reload(clb.cropping)

def show_crop(cropping_set, size_z, crops_volume, channels):
    first_crop = cropping_set.crop_by_infos_merged(crops_volume, channels)
    print('data_shape = ', cropping_set.get_data_shape())
    print('crop_shape =', first_crop.shape)
    print('crop_info =', crops_volume[0])
    print('overlap_with_previous = {0} %'.format(100 * cropping_set.get_overlap(crops_volume)))
    return crops_volume, first_crop[size_z // 2]

def get_and_show_random_crop(cropping_set, size_z, size_y, size_x, channels):
    crops_volume = clb.cropping.CropInfo.create_random_volume(cropping_set.get_data_shape(), size_y,size_x,size_z)
    return show_crop(cropping_set, size_z, crops_volume, channels)

def get_and_show_crop(cropping_set, left_top, crop_shape, channels):
    slices = range(left_top[0], left_top[0]+crop_shape[0], 1)
    crops_volume = clb.cropping.CropInfo.create_volume(left_top[1],left_top[2],crop_shape[1],crop_shape[2],slices)
    return show_crop(cropping_set, size_z, crops_volume, channels)

def save_croped_data(cropping_set, crops_volume, channels, number_desc, output_input, 
                     output_view=None, view_channels=None, view_deeper=0):
    os.makedirs(output_input, exist_ok=True)
    if output_view is not None:
        os.makedirs(output_view, exist_ok=True)
    
    output_name = cropping_set.annotation_prefix + number_desc
    annotation_name = output_name + ".yaml"
    annotation_path = os.path.join(output_input, annotation_name)
    output_path_image_name = output_name + ".tif"
    output_path_image_path = os.path.join(output_input, output_path_image_name)
    if output_view is not None and view_channels is not None:
        output_path_image_full_name = output_name + "_view.tif"
        output_path_image_full_path = os.path.join(output_view, output_path_image_full_name)
    print(annotation_name)

    if os.path.isfile(output_path_image_path):
        raise Exception("Requested save path exists!\n" + 
                        output_path_image_path + "\n\nBe carefull as annotations may already exist.")
    
    if os.path.isfile(annotation_path):
        raise Exception("Requested save path exists!\n" + 
                        annotation_path + "\n\nBe carefull as annotations may already exist.")
    
    crop_all_channels = cropping_set.crop_by_infos_merged(crops_volume, channels)
    print("saving", output_path_image_path)
    imageio.mimwrite(output_path_image_path, crop_all_channels)
    print("saving", annotation_path)
    clb.cropping.CropInfo.save(crops_volume, annotation_path)
    
    if output_view is not None and view_channels is not None:
        print("saving view", output_path_image_full_path)
        if view_deeper > 0:
            crops_volume = clb.cropping.CropInfo.extend_infos(crops_volume, (view_deeper, 0, 0))
        crop_view_channels = cropping_set.crop_by_infos_merged(crops_volume, view_channels)
        crop_view_channels = clb.dataprep.utils.ensure_3d_rgb(crop_view_channels)

        imageio.mimwrite(output_path_image_full_path, crop_view_channels)

### Crop new data for segmentation 1um (dapi) with view

In [None]:
import clb.cropping
importlib.reload(clb.cropping)
crop_new_segment_data = False
crop_new_segment_512_prefix = "seg"

In [None]:
cropping_set = cropping_set_512DenoisingData_S5
number_prefix = crop_new_segment_512_prefix
crop_number_for_random_crops = 76

crop_number_desc = number_prefix + '_' + str(crop_number_for_random_crops)

size_z, size_y, size_x = (5, 128, 128) 
sizes = size_z, size_y, size_x
channels = [0]  # only dapi
view_channels = [0,1]  # all to help, None if not needed
view_deeper = 3
random_output_root = cropping_set.output_root_dir
random_output_input = os.path.join(random_output_root, "input")
random_output_view = os.path.join(random_output_root, "view")

fig = None
if crop_new_segment_data:
    print (crop_number_desc)
    #crops_volume, first_crop = get_and_show_crop(cropping_set, (101, 220, 20), sizes, channels)
    crops_volume, first_crop = get_and_show_random_crop(cropping_set, size_z, size_y, size_x, channels)
    fig = show_all(1,1, first_crop, scale=7)
fig

In [None]:
if crop_new_segment_data:
    os.makedirs(random_output_input, exist_ok=True)
    os.makedirs(os.path.join(random_output_root, "annotations"), exist_ok=True)
    save_croped_data(cropping_set, crops_volume, channels, crop_number_desc, 
                     random_output_input, random_output_view, view_channels, view_deeper)

### Crop new data for CLB counting validation (dapi, PAN, KI)

In [None]:
import clb.cropping
importlib.reload(clb.cropping)
crop_new_counting_data = False
instance_prefix = 'instance'
class_prefix = 'class'

In [None]:
cropping_set = cropping_set_T6
number_prefix = instance_prefix
counter_crop_number = 13

counter_crop_number_desc = 'C_' + number_prefix + '_' + str(counter_crop_number)
if number_prefix == class_prefix:
    size_z, size_y, size_x = (30, 400, 400) # class size
else:
    size_z, size_y, size_x = (20, 200, 200) # instance size
channels = [0,1,2]

counter_output_root = cropping_set.output_root_dir + "_counters"
counter_output_input = os.path.join(counter_output_root, "input")

fig = None
if crop_new_counting_data:
    print (counter_crop_number_desc)
    crops_volume, first_crop = get_and_show_random_crop(cropping_set, size_z, size_y, size_x, channels)
    fig = show_all(1,1, first_crop, scale=7)
fig

#### Save crop and crop info

In [None]:
if crop_new_counting_data:
    os.makedirs(counter_output_input, exist_ok=True)
    os.makedirs(os.path.join(counter_output_root, "counters"), exist_ok=True)
    
    save_croped_data(cropping_set, crops_volume, channels, counter_crop_number_desc, counter_output_input)

### Crop new data for CLB classification (dapi, PAN, KI)

In [None]:
import clb.cropping
importlib.reload(clb.cropping)
crop_new_random_data = False
crop_rand_prefix = 'class_rand'
root_dir_suffix = "_extended"

In [None]:
cropping_set = cropping_set_T8_2
number_prefix = crop_rand_prefix
random_crop_number = 5

crop_number_desc = number_prefix + '_' + str(random_crop_number)

size_z, size_y, size_x = (15, 400, 400) # class size
channels = [0,1,2]
random_output_root = cropping_set.output_root_dir + root_dir_suffix
random_output_input = os.path.join(random_output_root, "input")

fig = None
if crop_new_random_data:
    print (crop_number_desc)
    crops_volume, first_crop = get_and_show_random_crop(cropping_set, size_z, size_y, size_x, channels)
    fig = show_all(1,1, first_crop, scale=7)
fig

In [None]:
if crop_new_random_data:
    os.makedirs(random_output_input, exist_ok=True)
    os.makedirs(os.path.join(random_output_root, "annotations"), exist_ok=True)
    
    save_croped_data(cropping_set, crops_volume, channels, crop_number_desc, random_output_input)

### Crop new data for CLB classification (dapi, PDL1-AF568, CD8-AF647)

In [None]:
import clb.cropping
importlib.reload(clb.cropping)
crop_new_pdl_cd_data = False
crop_pdlcd_prefix = 'class_pdlcd'

In [None]:
cropping_set = cropping_set_Tonsil_CD8_S3
number_prefix = crop_pdlcd_prefix
random_crop_number = 30

crop_number_desc = number_prefix + '_' + str(random_crop_number)

size_z, size_y, size_x = (15, 600, 600) # class size
sizes = size_z, size_y, size_x
channels = [0,1,3]
random_output_root = cropping_set.output_root_dir
random_output_input = os.path.join(random_output_root, "input")

fig = None
if crop_new_pdl_cd_data:
    print (crop_number_desc)
    #crops_volume, first_crop = get_and_show_crop(cropping_set, (101, 220, 20), sizes, channels)
    crops_volume, first_crop = get_and_show_random_crop(cropping_set, size_z, size_y, size_x, channels)
    fig = show_all(1,1, first_crop, scale=7)
fig

In [None]:
if crop_new_pdl_cd_data:
    os.makedirs(random_output_input, exist_ok=True)
    os.makedirs(os.path.join(random_output_root, "annotations"), exist_ok=True)
    
    save_croped_data(cropping_set, crops_volume, channels, crop_number_desc, random_output_input)

## Recrop using previous crops

In [None]:
importlib.reload(clb.cropping)

def extend_crop_info(infos, padding_3d):
    return CropInfo.extend_infos(infos, padding_3d)
    
def get_extended_crop_info(dataset, padding_3d):
    infos = clb.cropping.CropInfo.load(dataset.crop_info)
    return extend_crop_info(infos, padding_3d)

### Recrop instance crops for classification crops (dapi, PAN, KI)

In [None]:
recrop_new_data = False

In [None]:
def find_info_index_by_z(infos, z):
    return [i.z for i in infos].index(z)

In [None]:
def get_extended_labels(dataset, padding_3d, original_data_shape):
    infos = clb.cropping.CropInfo.load(dataset.crop_info)
    extended_infos = get_extended_crop_info(dataset, padding_3d)
    labels = imageio.volread(dataset.gt)
    
    # keep them inside bounds
    extended_infos = clb.cropping.CropInfo.restrict_infos(extended_infos, original_data_shape)
    
    res = clb.cropping.CropInfo.empty_volume(extended_infos)
    pad_z, pad_y, pad_x = padding_3d
    l_index = 0
    
    for org_info in infos:
        z = org_info.z
        new_z_index = find_info_index_by_z(extended_infos, z)
        same_slice_ext_info = extended_infos[new_z_index]
        
        y_min = org_info.y - same_slice_ext_info.y
        y_max = y_min + org_info.shape[0]
        
        x_min = org_info.x - same_slice_ext_info.x
        x_max = x_min + org_info.shape[1]
        
        res[new_z_index][y_min:y_max, x_min:x_max] = labels[l_index]
        l_index += 1
    
    return res

In [None]:
def crop_all_channels_labels(cropping_set, dataset, padding_3d, data_shape):
    extended_crop_infos = get_extended_crop_info(dataset, padding_3d)
    channel_dapi = cropping_set.crop_by_infos(extended_crop_infos, 0)
    channel_kpi = cropping_set.crop_by_infos(extended_crop_infos, 1)
    channel_pan = cropping_set.crop_by_infos(extended_crop_infos, 2)
    
    labels = get_extended_labels(dataset, padding_3d, data_shape)
    return channel_dapi, channel_kpi, channel_pan, labels

In [None]:
cropping_set = cropping_set_T3
datasets = cropping_set.get_datasets()[5:6]
padding = (8, 100, 100)

extended_output_root = cropping_set.output_root_dir + "_extended"
extended_output_input = os.path.join(extended_output_root,"Input")
os.makedirs(extended_output_input, exist_ok=True)
os.makedirs(os.path.join(extended_output_root,"Annotations"), exist_ok=True)

if recrop_new_data:
    print ("Extending by", padding)
    for dataset in datasets:
        print ("Recropping extended for...", dataset.input, end=' ')
        extended_crop_infos = get_extended_crop_info(dataset, padding)
        dapi, kpi, pan, labels = crop_all_channels_labels(cropping_set, dataset, padding, cropping_set.get_data_shape())
        print (" done.")
        
        # reduce lut input to one channel
        print (dapi.shape, dapi.dtype)
        dapi_single = dapi #np.amax(dapi, 3)
        kpi_single = kpi #np.amax(kpi, 3)
        pan_single = pan #np.amax(pan, 3)
        
        old_annotation_name = os.path.basename(dataset.input)
        
        new_annotation_name = old_annotation_name[:-4] + "_stack.tif"
        output_path_stack = os.path.join(extended_output_input, new_annotation_name)
        #output_stack = np.stack([dapi_single, kpi_single, pan_single, labels], axis=3)
        output_stack = np.stack([dapi_single, kpi_single, pan_single], axis=3)
        
        print("saving", output_path_stack)
        imageio.mimwrite(output_path_stack, output_stack)
        
        new_labels_name = old_annotation_name[:-4] + "_labels.tif"
        output_path_labels = os.path.join(extended_output_input, new_labels_name)
        print("saving", output_path_labels)
        imageio.mimwrite(output_path_labels, labels)
        
        new_info_name = old_annotation_name[:-4] + "_stack.yaml"
        output_path_yaml = os.path.join(extended_output_input, new_info_name)
        print("saving", output_path_yaml)
        clb.cropping.CropInfo.save(extended_crop_infos, output_path_yaml)
        

### Recrop classification crops for instance crops (dapi, PDL1, CD8)

In [None]:
def get_splitted_crop_info(dataset, desired_shape, tile_num):
    infos = clb.cropping.CropInfo.load(dataset.crop_info)
    tile_count_x = infos[0].shape[-1] // desired_shape[-1]
    
    tile_num_x = tile_num % tile_count_x
    tile_num_y = tile_num // tile_count_x
    
    y = infos[0].y
    x = infos[0].x
    
    dz_size, dheight, dwidth = desired_shape
    y += tile_num_y * dheight
    x += tile_num_x * dwidth
    
    zs = [i.z for i in infos]
    zs = zs[(len(zs) - dz_size) // 2:]
    zs = zs[:dz_size]
    
    return clb.cropping.CropInfo.create_volume(y, x, dheight, dwidth, zs)

In [None]:
importlib.reload(clb.cropping)
recrop_dapi_data = False

cropping_set_classes = cropping_set_NewClass_S2
cropping_set_seg = cropping_set_NewClass_S2_Seg
datasets = cropping_set_classes.get_datasets()[0:3]

shape = (3, 200, 200)
number_of_tiles = 9
channels = [0, 1, 3]
view_extend = (4, 0, 0)

instance_output_root = cropping_set_seg.output_root_dir
instance_output_input = os.path.join(instance_output_root,"input")
print(instance_output_input)
os.makedirs(instance_output_input, exist_ok=True)
os.makedirs(os.path.join(instance_output_root,"annotations"), exist_ok=True)

if recrop_dapi_data:
    print ("Splitting into", shape)
    for dataset in datasets:
        print ("Splitting for... ", os.path.basename(dataset.input))
        for cube_num in range(number_of_tiles):
            old_annotation_name = os.path.basename(dataset.input)
            new_annotation_name = old_annotation_name[:-4] + "_tile_{0}.tif".format(cube_num)
            
            new_info_name = old_annotation_name[:-4] + "_tile_{0}.yaml".format(cube_num)
            output_path_yaml = os.path.join(instance_output_input, new_info_name)
            
            output_path_dapi = os.path.join(instance_output_input, new_annotation_name)
            
            if os.path.isfile(output_path_dapi) or os.path.isfile(output_path_yaml):
                print("Requested save path exists!", output_path_dapi)
                continue
              
            print("saving", output_path_dapi)
            one_cube_infos = get_splitted_crop_info(dataset, shape, cube_num)
            cube_dapi = cropping_set_seg.crop_by_infos(one_cube_infos, 0)
            imageio.mimwrite(output_path_dapi, cube_dapi)

            print("saving", output_path_yaml)
            clb.cropping.CropInfo.save(one_cube_infos, output_path_yaml)
            
            # get extended with all channels
            new_deeper_name = old_annotation_name[:-4] + "_tile_{0}_view.tif".format(cube_num)
            output_path_deeper = os.path.join(instance_output_input, new_deeper_name)
            
            print("saving", output_path_deeper)
            deeper_cube_infos = extend_crop_info(one_cube_infos, view_extend)
            cube_channels_data = []
            for channel in channels:
                cube_channels_data.append(cropping_set_seg.crop_by_infos(deeper_cube_infos, channel))
            output_stack = np.stack(cube_channels_data, axis=3)

            imageio.mimwrite(output_path_deeper, output_stack)

## Establish voxel size

In [None]:
def get_crop_voxel_size(crop_info_path):
    crop_infos = clb.cropping.CropInfo.load(crop_info_path)
    voxel_sizes = list(set([getattr(info, 'voxel_size', None) for info in crop_infos]))
    if len(voxel_sizes) != 1:
        raise Exception("Different voxel sizes in slices of the same 3d crop.")
    return voxel_sizes[0]

### Inspect crop voxel size 

In [None]:
inspect_crop_voxel_sizes = True

def show_crops_voxel_size(cropping_set):
    datasets_infos = [d.crop_info for d in cropping_set.get_datasets()]
    print("Dataset:", cropping_set.internal_name, "voxel size:", cropping_set.get_voxel_size())
    for info in datasets_infos:
        info_voxel_size = get_crop_voxel_size(info)
        print('\t',os.path.basename(info), "has voxel size:", info_voxel_size)

if inspect_crop_voxel_sizes:   
    for cropping_set in all_cropping_sets:
        show_crops_voxel_size(cropping_set)

### Set voxel sizes for all crops

In [None]:
set_voxel_sizes_all_crops = False

def set_crops_voxel_size(cropping_set, override_voxel_size=None):
    dataset_voxel_size = cropping_set.get_voxel_size()
    print("Dataset:", cropping_set.internal_name, "voxel size:", dataset_voxel_size)
    
    voxel_size_to_set = override_voxel_size or dataset_voxel_size
    
    datasets_infos = [d.crop_info for d in cropping_set.get_datasets()]
    for info in datasets_infos:
        info_voxel_size = get_crop_voxel_size(info)
        print('\t',os.path.basename(info), "has voxel size:", info_voxel_size)
        if info_voxel_size != voxel_size_to_set:
            crop_infos = clb.cropping.CropInfo.load(info)
            for i in crop_infos:
                i.voxel_size = voxel_size_to_set
            clb.cropping.CropInfo.save(crop_infos, info)
            print('\t', "Changed to:", get_crop_voxel_size(info))
            
if set_voxel_sizes_all_crops:
    for cropping_set in [cs for cs in all_cropping_sets if cs.raw_lif_path is not None]:
        set_crops_voxel_size(cropping_set)
        
#set_crops_voxel_size(cropping_set_T8_1, override_voxel_size=(0.5, 0.46, 0.46))
#set_crops_voxel_size(cropping_set_T8_2, override_voxel_size=(0.5, 0.46, 0.46))
#set_crops_voxel_size(cropping_set_T8_3, override_voxel_size=(0.5, 0.46, 0.46))

## Validate annotations

In [None]:
def validate_crop_info_in_bounds(d, y_size, x_size):
    crop_info = clb.cropping.CropInfo.load(d.input[:-4] + '.yaml')[0]
    restricted = crop_info.restrict((1024, y_size, x_size))
    if crop_info != restricted:
        print("Cropping outside of bounds, recrop will be different." + str(restricted))

def validate_dtype(gt_volume):
    if gt_volume.dtype != np.uint8 and gt_volume.dtype != np.uint16:
        print("Annotation has unexpecteed type: " + str(gt_volume.dtype))

### Validate annotation for instance segmentation

In [None]:
run_validate_annotations_segmentation = False

from clb.image_processing import find_corresponding_labels

check_subset_internal_names = ['512']

def set_filter(cropping_set):
    if check_subset_internal_names is None:
        return True
    else:
        return any([text in cropping_set.internal_name for text in check_subset_internal_names])
    

def all_datasets_from_training(annotation_root, subfolder):
    res = []
    for dataset in ['train', 'val', 'test']:
        res +=  get_standard_datasets(annotation_root, os.path.join(subfolder, dataset), False)
    return res

crops_with_big_problems = []

if run_validate_annotations_segmentation:
    datasets_private = []
    groups_private = filter(set_filter, all_segmentation_sets)
    
    for cropping_set in groups_private:
        datasets_private += cropping_set.get_datasets()
    
    datasets_clb = []
    groups = ['T3','T5','T6','T8','NC']
    for group in groups:
        datasets_clb += all_datasets_from_training(os.path.join(code_root, "data\\training"), group)
    
    datasets_current = datasets_private
    
    for i, d in enumerate(datasets_current):
        # validate that it can be read using imageio
        print ("Validating: {0}\n\t{1}".format(i,d.gt))
        
        if "_E" in d.gt:
            print ("Evaluation crops are temporary skipped.")
            continue
        
        crop_volume = imageio.volread(d.gt)
        
        # validate that size is 200x200
        if crop_volume.shape[1:] not in [(200, 200),(128,128)]:
            print("Incorrect shape:", d.gt, crop_volume.shape[1:])
            
        validate_crop_info_in_bounds(d, 1024, 1024)
        validate_dtype(crop_volume)
            
        # validate that every object is connected
        crop_volume_no_blobs = crop_volume.copy() 
        crop_volume_no_blobs[crop_volume_no_blobs == 1] = 0 # remove blobs
        relabel_volume = skimage.measure.label(crop_volume_no_blobs)
        for k in range(len(relabel_volume)):
            raw = crop_volume_no_blobs[k]
            relabeled = relabel_volume[k]
            
            mapping_12 = find_corresponding_labels(raw, relabeled, return_overlap=True)
            overlaps_12 = [(a,b,overlap) for a, (b, overlap) in mapping_12.items() if overlap < 1.0]
            
            mapping_21 = find_corresponding_labels(relabeled, raw, return_overlap=True)
            overlaps_21 = [(a,b,overlap) for a, (b, overlap) in mapping_21.items() if overlap < 1.0]
            
            if (overlaps_12):
                print("\tIncorrect objects in raw-relabel:", d.gt, "s={0}".format(k), overlaps_12)
                crops_with_big_problems.append((i, k, overlaps_12[0][0]))
            if (overlaps_21):
                print("\tIncorrect objects in relabel-raw:", d.gt, "s={0}".format(k), overlaps_21)

In [None]:
def get_suspect(file, frame, label):
    inspect_gt = datasets_current[file].gt
    print ("File: ", inspect_gt)
    print ("Slice number (first=0): ", frame)
    print ("Label value: ", label)
    print ()
    crop_volume = imageio.volread(inspect_gt)[frame]
    crop_volume[crop_volume!=label] = 0
    return crop_volume

def get_all_suspects(suspect_list):
    return [get_suspect(file=file, frame=frame, label=label) for (file, frame, label) in suspect_list]

#show_all(1, 32, get_suspect(file=2, frame=0, label=38), get_suspect(file=6, frame=0, label=21), scale=20)
#show_all((len(crops_with_big_problems)+2)//3, 3, scale=5, *get_all_suspects(crops_with_big_problems))

### Validate annotation for classification

In [None]:
import clb.classify.utils
importlib.reload(clb.classify.utils)

run_validate_annotations_classification_pan_ki = False
run_validate_annotations_classification_pdl_cd = False

def validate_annotation_class(dataset_names, class_name):
    print ("\nNow validating class:", class_name, "\n")
    datasets_private = []
    for group in dataset_names:
        datasets_private += get_class_datasets(annotation_root, group, class_name)

    
    datasets_current = datasets_private
    for i, d in enumerate(datasets_current):
        gt_path = os.path.relpath(d.gt, annotation_root)
        # validate that it can be read using imageio
        print ("Validating: {0}\n\t{1}".format(i,gt_path))
        crop_volume = imageio.volread(d.gt)
        
        # validate that size is 400x400
        if np.any(np.array(crop_volume.shape[1:]) < 300):
            print("Incorrect shape:", gt_path, crop_volume.shape[1:])
            
        # validate values 1-2
        if np.any(crop_volume > 2):
            print("Invalid values:", gt_path, np.unique(crop_volume))
            
        validate_crop_info_in_bounds(d, 1024, 1024)
        validate_dtype(crop_volume)
            
        # validate only one slice annotated
        annotated_slices = [i for i, s in enumerate(crop_volume) if np.any(s)]
        if len(annotated_slices) > 1:
            print("More than one slice annotated:", gt_path, annotated_slices)    

In [None]:
dataset_names = ['T3_S1_counters','T5_S1_counters','T6_S1_counters', 
                 'T3_S1_extended','T5_S1_extended','T6_S1_extended','T8_S2_extended']

if run_validate_annotations_classification_pan_ki:
    validate_annotation_class(dataset_names, "epith")
    validate_annotation_class(dataset_names, "Ki67")
    
dataset_names_pdl_cd = ['TestNewClasses_S1_extended', 'TestNewClasses_S2_extended', 'TestNewClasses_S3_extended',
                       'TestNewClasses_S1_test', 'TestNewClasses_S2_test']

if run_validate_annotations_classification_pdl_cd:
    validate_annotation_class(dataset_names_pdl_cd, "pdl1")
    validate_annotation_class(dataset_names_pdl_cd, "cd8")

## Compare annotation data

In [None]:
import clb.classify.utils
run_compare_ann = False
dataset_names_pairs = [('T3_S1_extended', '20180608_T3_S1_subsamples_classes'),
                       ('T5_S1_extended','20180608_T5_S1_subsamples_classes'),
                       ('T6_S1_extended','20180608_T6_S1_subsamples_classes'),
                       ('T8_S2_extended', '20180316_T8_S2_subsamples_classes'),
                       
                       ('T3_S1_counters', '20180608_T3_S1_subsamples_counting', 'annotations'),
                       ('T5_S1_counters', '20180608_T5_S1_subsamples_counting', 'annotations'),
                       ('T6_S1_counters', '20180608_T6_S1_subsamples_counting', 'annotations')]

dataset_names_pairs_new = [('TestNewClasses_S1_extended', '20181107_TestNewClass_S1_classes', 'annotations'),
                           ('TestNewClasses_S2_extended', '20181107_TestNewClass_S2_classes', 'annotations'),
                           ('TestNewClasses_S3_extended', '20181107_TestNewClass_S3_classes', 'annotations'),
                           ('TestNewClasses_S1_test', '20181107_TestNewClass_S1_classes_test', 'annotations'),
                           ('TestNewClasses_S2_test', '20181107_TestNewClass_S2_classes_test', 'annotations')]

def compare(path_a,path_b):
    name_a = os.path.basename(path_a)
    name_b = os.path.basename(path_b)
    
    print("Comparing:", name_a)
    if name_a != name_b:
        print ("\tDifferent names!\n\t\t", name_a, "\n\t\t", name_b)
        
    a = imageio.volread(path_a)
    b = imageio.volread(path_b)

    if a.shape != b.shape or np.any(a!=b):
        print ("\tDifferent values!"
 
if run_compare_ann:
    for data_tuple in dataset_names_pairs + dataset_names_pairs_new:
        raw_dir_name = "Annotations"
        if len(data_tuple) > 2:
            processed_name, raw_name, raw_dir_name = data_tuple
        else:
            processed_name, raw_name = data_tuple
            
        processed_path = os.path.join(annotation_root, processed_name, "annotations")
        raw_path = os.path.join(raw_annotation_root, raw_name, raw_dir_name)
        
        process_files = clb.classify.utils.find_all_tiffs(processed_path, "shapes")
        raw_files = clb.classify.utils.find_all_tiffs(raw_path, "shapes")
        
        print ("Process files:", len(process_files), " Raw files:", len(raw_files))
        for processed_tif, raw_tif in zip(process_files, raw_files):
            compare(processed_tif, raw_tif)

## Prepare summary of all annotations

In [None]:
run_prepare_summary = False

import clb.evaluate.evaluator_segment
importlib.reload(clb.evaluate.evaluator_segment)
importlib.reload(clb.cropping)

def prepare_summary(name, file_paths):
    all_data = [(os.path.basename(f), clb.cropping.CropInfo.load(f)) for f in file_paths]
    res = "DATASET: " + name + ", number of annotated crops = " + str(sum([len(cs) for _, cs in all_data]))
    res += ", number of volume crops = " + str(len(all_data))
    for n, cs in all_data:
        c_first = cs[0]
        zs = [c.z for c in cs]
        z_step = zs[1] - zs[0]
        res += "\n\tCrop: {0}\n\t\tx={1}, y={2}, z={3}, z_step={4}".format(n, c_first.x, c_first.y, str(zs), z_step)
    return res

def validate_volume_set(volume_set):
    exist_input = os.path.exists(volume_set.input)
    exist_gt = os.path.exists(volume_set.gt)
    #print (volume_set.input + "\n" + volume_set.gt + "\n")
    if not exist_input or not exist_gt:
        print (volume_set.input + " is missing gt? " + (not exist_gt))

def make_summary_file(root_dir, datasets_names, output_name, 
                      input_name="Input", labels_name="Annotations", get_datasets_fun=None):
    output = os.path.join(root_dir, output_name)
    with open(output, "w") as f:
        for dataset in tqdm(datasets_names):
            print ('Summary for', dataset + '...')
            annotation_base_dir = os.path.join(root_dir, dataset)
            input_dir = os.path.join(annotation_base_dir, input_name)
            labels_dir = os.path.join(annotation_base_dir, labels_name)
            datasets = get_datasets_fun(input_dir, labels_dir)
            for s in datasets:
                validate_volume_set(s)
            print(prepare_summary(dataset, [ d.input[:-4] + '.yaml' for d in datasets ]), file=f)
            
if run_prepare_summary:
    def get_datasets(i,l):
        return clb.evaluate.evaluator_segment.get_all_datasets(i,l,i,i)
    make_summary_file(annotation_root, ['T3_S1','T5_S1','T6_S1','T8_S1','T8_S2','T8_S3',
                                       'TestNewClasses_S1', 'TestNewClasses_S2'], 
                      "summary_instances.txt", get_datasets_fun=get_datasets)

if run_prepare_summary:
    def get_datasets(i,l):
        return clb.classify.utils.get_all_datasets(i,l,i,i,'epith')
    make_summary_file(annotation_root, ['T3_S1_extended','T5_S1_extended','T6_S1_extended','T8_S2_extended',
                                        'T3_S1_counters', 'T5_S1_counters', 'T6_S1_counters'], 
                      "summary_classes_panck_ki67.txt", input_name="input", labels_name="annotations",
                      get_datasets_fun=get_datasets)
    
if run_prepare_summary:
    def get_datasets(i,l):
        return clb.classify.utils.get_all_datasets(i,l,i,i,'cd')
    make_summary_file(annotation_root, ['TestNewClasses_S1_extended', 'TestNewClasses_S2_extended', 'TestNewClasses_S3_extended',
                                       'TestNewClasses_S1_test', 'TestNewClasses_S2_test'], 
                      "summary_classes_pdl1_cd8.txt", input_name="input", labels_name="annotations",
                      get_datasets_fun=get_datasets)