In [1]:
from abc import ABC, abstractmethod
import os
from pathlib import Path
import numpy as np
from PIL import Image

from torch.cuda import empty_cache

In [2]:
from deepflash2 import learner
from deepflash2.models import get_diameters

In [3]:
from findmycells.main import Database, CZIZStack, ImageJROIs, CropStitchingArtefactsCroppingStrategy
from findmycells.utils import convert_12_to_8_bit_rgb_image

In [None]:
project_root_dir = '/mnt/c/Users/dsege/TEMP/test_project/'

if os.path.isdir(project_root_dir) == False:
    project_root_dir = 'path on your system'

In [None]:
project_name = Project(project_root_dir)
# project_root_dir: path of project root directory

# creates the database object and sets-up everything (main subdirs, file infos, ..)


project_name.preprocess(microscopy_files, cropping_strategy)
# microscopy_files: list of all microscopy image files that shall be preprocessed (this would allow step-by-step processing)
# cropping_strategy: object of type CroppingStrategy that specifies which exact cropping strategy shall be used

# infers:
    # microscopy-image-file-loader from filetype (e.g. .czi file loader)
    # whether it´s a z-stack
    # whether it´s an RGB image
# automatically updates the database accordingly


project_name.save_current_status()
# saves current progress status to disk (basically the database.file_infos as pickle file? - what about shapely polygons that might have been loaded?)

project_name.load_status()
# loads the corresponding informations from disk and allows the user to continue from here


project_name.run_segmentations(files, SegmentationStrategy)
# files: list of files that shall be segmented (in case of df2 make sure that all are used due to image statistics & cellpose diameter)
# SegmentationStrategy: e.g. deepflash2_binary, deepflash2_instances, intellesis

# launches the respective segmentations and saves all progress in the database object & all segmentations in the corresponding subdir


project_name.run_quantifications(files, QuantificationStrategy)
# files: list of files whose segmentations shall be used for quantification
# QuantificationStrategy: e.g. 2D, 3D_grid_estimation, 3D_entire_volume (enable selection of more specific options / exclusion criteria)

# Launches the respective quantifications and saves all progress in the database object & all quantification results in the results table


In [4]:
project_root_dir = '/mnt/c/Users/dsege/TEMP/test_project/'

if os.path.isdir(project_root_dir) == False:
    project_root_dir = 'path on your system'


db = Database(project_root_dir)

In [5]:
db.deepflash2_temp_dir

'/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/'

# Preprocessing

Change database:

- input:
    required - one of the following:
    a) raw microscopy images (e.g. RGB z-stack)
    b) pre-processed images (e.g. cropped, brightnes-contrast adjusted, or converted to single planes)
    
    optional:
    a) roi files (that mark the corresponding areas in each image that shall be quantified)
    b) masks (that mark the corresponding areas in each image that shall be quantified)
    

In [None]:
# Preprocessing

db.create_subdirectory_structure(db.preprocessed_images_dir)

for file_id in db.file_infos['file_id']:
    if file_id == '0010':
        file_infos = db.get_file_infos(file_id)
        group, subject = file_infos['group_id'], file_infos['subject_id']
        filename, image_filetype, roi_filetype = file_infos['original_file_id'], file_infos['microscopy_filetype'], file_infos['rois_filetype']
        image_filepath_in = f'{db.microscopy_image_dir}{group}/{subject}/{filename}{image_filetype}' 
        rois_filepath_in = f'{db.rois_to_analyze_dir}{group}/{subject}/{filename}{roi_filetype}'
        
        print('starting to load file_id: ' + file_id)
        microscopy_image = CZIZStack(image_filepath_in)
        rois = ImageJROIs(rois_filepath_in)
        # right now only one ROI that is used for all planes. Ultimately, however, it should also be possible to have plane-specific ROI(s)!
        cropping_strategy = CropStitchingArtefactsCroppingStrategy()
        
        for plane_idx in range(microscopy_image.total_planes):
            plane_id = str(plane_idx).zfill(3)
            image_plane = microscopy_image.as_array[plane_idx].copy() 
            cropped_image = cropping_strategy.crop_image(image_plane)
            cropped_image = convert_12_to_8_bit_rgb_image(cropped_image)
            cropped_image = Image.fromarray(cropped_image, 'RGB')
            image_filepath_out = f'{db.preprocessed_images_dir}{group}/{subject}/{file_id}-{plane_id}.png'
            cropped_image.save(image_filepath_out)
            del image_plane, cropped_image
            print(f'done with plane {plane_id}')
        
        cropping_strategy.adjust_rois(rois)
        rois.from_array_to_shapely_polygon()
        
        db.update_file_infos(file_id, 'cropping_row_indices', (cropping_strategy.lower_row_idx, cropping_strategy.upper_row_idx))
        db.update_file_infos(file_id, 'cropping_column_indices', (cropping_strategy.lower_col_idx, cropping_strategy.upper_col_idx))
        db.import_roi_polygons(rois)
        
        del microscopy_image
        print('done with processing of file_id: ' + file_id)

In [None]:
db.file_infos

## Next steps:


- Sofie:
    - Method in Database class that creates a pandas DataFrame from file_infos
    - Method in Database class that allows to save the DataFrame to disk



- Integration of df2 API -> Dennis
    - binary predictions
    - cellpose predictions

## Integration of df2:

Next steps:

- df2 creates 2 zarr directories in the tmp folder in Linux that quickly waste disk space and are not automatically deleted: find a fix!
    - fix for the one that is created during (at the end of) the predictions found (see code below)
    - the first one must be generated upon creation of the EnsembleLearner object?
    
- try to run cellpose independently of normal predictions

- move files from the auto-generated df2 directories into the corresponding project dir folders

- changes if all images are processed individually vs. all together? different image stats for predictions? different feature stats for cellpose?

- Run regular df2 predictions on a batch of images (distributed files across 2 or more directories) - not possible: df2 requires a single input dir
- save df_ens 
- delete EnsembleLearner object which was used for predictions
- delete temporary files from system temp dir
- clear GPU cache / memory?

- get diameter for cellpose across all masks
- iterate through all images individually to run cellpose predictions:
    - create a new EnsembleLearner object each time
    - pass a DataFrame with only that single corresponding row as df_ens
    - set cellpose_diameter to calculated diameter (identical for all!)
    - run cellpose preds
    - clear GPU cache / memory?
- delete all temporary files in custom temp dir (contains softmax preds which were required for cellpose)
- move all predictions to the correct project dirs

## Integration of deepflash2:

### I) Run binary predictions on a batch of images:

In [6]:
image_dir = '/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/'
export_dir = db.deepflash2_dir
zarr_dir = db.deepflash2_temp_dir
ensemble_dir = db.trained_models_dir
ensemble_path = Path(ensemble_dir)

ensemble_learner = learner.EnsembleLearner(image_dir = image_dir, ensemble_path = ensemble_path)
ensemble_learner.stats

Found 5 models in folder /mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/trained_models
['Unet_resnet34_2classes-fold1.pth', 'Unet_resnet34_2classes-fold2.pth', 'Unet_resnet34_2classes-fold3.pth', 'Unet_resnet34_2classes-fold4.pth', 'Unet_resnet34_2classes-fold5.pth']
Computing Stats...


(array([0.36283578, 0.38616829, 0.42082587]),
 array([0.11086489, 0.13612533, 0.17018981]))

In [7]:
ensemble_learner.get_ensemble_results(ensemble_learner.files, zarr_store = zarr_dir, export_dir = export_dir, use_tta = ensemble_learner.pred_tta)

df_temp_pred_results = ensemble_learner.df_ens.copy()
df_temp_pred_results

Predicting 0000-003.png


Predicting 0001-003.png


Predicting 0002-003.png


Predicting 0003-004.png


Predicting 0004-004.png


Predicting 0005-004.png


Predicting 0006-004.png


Predicting 0007-004.png


Predicting 0008-004.png


Predicting 0009-005.png


Predicting 0010-005.png


Unnamed: 0,file,ensemble,n_models,uncertainty_score,image_path,softmax_path,uncertainty_path,energy_path
0,0000-003.png,Unet_resnet34_2classes,5,0.103049,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0000-003.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0000-003.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0000-003.png,
1,0001-003.png,Unet_resnet34_2classes,5,0.068256,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0001-003.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0001-003.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0001-003.png,
2,0002-003.png,Unet_resnet34_2classes,5,0.107383,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0002-003.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0002-003.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0002-003.png,
3,0003-004.png,Unet_resnet34_2classes,5,0.081598,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0003-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0003-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0003-004.png,
4,0004-004.png,Unet_resnet34_2classes,5,0.108982,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0004-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0004-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0004-004.png,
5,0005-004.png,Unet_resnet34_2classes,5,0.10805,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0005-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0005-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0005-004.png,
6,0006-004.png,Unet_resnet34_2classes,5,0.108684,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0006-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0006-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0006-004.png,
7,0007-004.png,Unet_resnet34_2classes,5,0.085985,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0007-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0007-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0007-004.png,
8,0008-004.png,Unet_resnet34_2classes,5,0.085817,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0008-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0008-004.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0008-004.png,
9,0009-005.png,Unet_resnet34_2classes,5,0.087464,/mnt/c/Users/dsege/TEMP/test_project/pooled_for_batch_processing_test_all/0009-005.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/smx/0009-005.png,/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp/std/0009-005.png,


Delete EnsembleLearner object that was used for binary segmentations from memory:

In [8]:
del ensemble_learner

### II) Run cellpose preds on individual images to avoid overflow of GPU memory:

In [9]:
deepflash2_masks_dir = db.deepflash2_dir + 'masks/'

mask_paths = [deepflash2_masks_dir + elem for elem in os.listdir(deepflash2_masks_dir)]

masks_as_arrays = []

for mask_as_image in mask_paths:
    with Image.open(mask_as_image) as image:
        masks_as_arrays.append(np.array(image))

cellpose_diameter = get_diameters(masks_as_arrays)
cellpose_diameter

2021-12-27 19:01:11,551 [INFO] WRITING LOG OUTPUT TO /home/ds/.cellpose/run.log


55

In [10]:
empty_cache()

for row_id in range(df_temp_pred_results.shape[0]):
    
    ensemble_learner = learner.EnsembleLearner(image_dir = image_dir, ensemble_path = ensemble_path)
    df_single_row = df_temp_pred_results.iloc[row_id].to_frame().T.reset_index(drop=True).copy()
    ensemble_learner.df_ens = df_single_row
    ensemble_learner.cellpose_diameter = cellpose_diameter
    ensemble_learner.get_cellpose_results(export_dir = export_dir)
    
    del ensemble_learner
    empty_cache()

Found 5 models in folder /mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/trained_models
['Unet_resnet34_2classes-fold1.pth', 'Unet_resnet34_2classes-fold2.pth', 'Unet_resnet34_2classes-fold3.pth', 'Unet_resnet34_2classes-fold4.pth', 'Unet_resnet34_2classes-fold5.pth']
Computing Stats...
Using diameter of 55
2021-12-27 19:02:40,459 [INFO] ** TORCH CUDA version installed and working. **
2021-12-27 19:02:40,461 [INFO] >>>> using GPU
2021-12-27 19:02:40,647 [INFO] ~~~ FINDING MASKS ~~~
2021-12-27 19:03:24,793 [INFO] >>>> TOTAL TIME 44.14 sec
Found 5 models in folder /mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/trained_models
['Unet_resnet34_2classes-fold1.pth', 'Unet_resnet34_2classes-fold2.pth', 'Unet_resnet34_2classes-fold3.pth', 'Unet_resnet34_2classes-fold4.pth', 'Unet_resnet34_2classes-fold5.pth']
Computing Stats...
Using diameter of 55
2021-12-27 19:04:15,394 [INFO] ** TORCH CUDA version installed and working. **
2021-12-27 19:04:15,395 [INFO] >>>> using GPU
2021-12-27 19:04

## Remove all temp directories and files:

**Optional: delete the zarr dirs in the systems tmp dir prior to cellpose predictions**

In [14]:
import shutil

# Remove all zarr directories that were created in systems tmp directory:
temp_zarr_subdirs = [elem for elem in os.listdir('/tmp/') if 'zarr' in elem]
if len(temp_zarr_subdirs) > 0:
    for subdirectory in temp_zarr_subdirs:
        shutil.rmtree(f'/tmp/{subdirectory}/')
    
# Remove all zarr directories that were created in designated df2 temp directory:
shutil.rmtree(db.deepflash2_temp_dir[:db.deepflash2_temp_dir.rfind('/')])

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/c/Users/dsege/TEMP/test_project/03_deepflash2/temp'