# Germinal center: chromatin based cell type detection: 
---
## Part1: Dataset generation


Data (from Claudio, IFOM): 
10 images depicting germinal centers (annotated by hand) stained for Dapi, CD3 and AICDA. 


### Setting up the environment 

Read in the libraries and the set up the paths

In [1]:
# import libraries
import sys
sys.path.append("..")

from pathlib import Path
from glob import glob
import pandas as pd
import os
from tqdm.notebook import tqdm

from src.batch.nuclear_segmentation import segment_objects_stardist2d
from src.batch.extract_features import extract_nmco_feats_batch, measure_intensity_batch,extract_spatial_coordinates_batch
from src.batch.cell_segmentation import cell_seg_dilation_batch
from src.utils.preprocess_images import extract_channel_save_image
from src.utils.cell_type_detection import assign_cell_status

In [2]:
path_to_raw_images = '/media/pathy_s/54fe7289-2a61-4f3a-ac84-eeea3c0d1d2f/ifom_germinal_centers/dataset_1/' 
path_to_DNA_image_raw= path_to_raw_images +'DAPI/'
path_to_CD3_image_raw= path_to_raw_images +'CD3/'
path_to_AICDA_image_raw= path_to_raw_images +'AICDA/'
path_to_gernminal_center = path_to_raw_images +'germinal_center_anno/'

path_to_DNA_image = path_to_raw_images +'DAPI_c/'
path_to_CD3_image = path_to_raw_images +'CD3_c/'
path_to_AICDA_image = path_to_raw_images +'AICDA_c/'

path_to_output_segmented_nuclei= path_to_raw_images +"segmented_nucleus/"
path_to_output_ij_nuclei_rois= path_to_raw_images + "segmented_nuc_ij_roi/"
path_to_output_nuclear_features= path_to_raw_images + "nmco_feat/"
path_to_output_segmented_cells= path_to_raw_images +"segmented_cells/"

path_to_output_cellular_AICDA_levels = path_to_raw_images + "aicda_level/"
path_to_output_cellular_cd3_levels = path_to_raw_images + "cd3_level/"
path_to_output_germinal_center_loc = path_to_raw_images + "position_wrt_germinal_center/"

path_to_output_spatial_cordiates = path_to_raw_images + "spatial_cordiates/"

path_to_output_consolidated_dataset = path_to_raw_images + "consolidated_data/"

### Compute features

We process the channels, segment nuclei using a pretrained stardist model, extract nmco features and segment cells by expanding nuclear boundary and then measure cellular levels of AICDA, CD3 and location within the germinal center. 

In [3]:
# process the image rgb to single channel tiff
print('Preprocessing image to make single channel tiff images......')
extract_channel_save_image(path_to_DNA_image_raw,path_to_DNA_image, 1)
extract_channel_save_image(path_to_CD3_image_raw,path_to_CD3_image, 2)
extract_channel_save_image(path_to_AICDA_image_raw,path_to_AICDA_image, 3)

# perfrom nuclear segmentation 
print('Nuclear segmentation........')
segment_objects_stardist2d(image_dir = path_to_DNA_image,
                               output_dir_labels = path_to_output_segmented_nuclei,
                               output_dir_ijroi = path_to_output_ij_nuclei_rois,
                               use_pretrained = True)

# compute nuclear features
print('Nuclear feature extraction........')
nuc_features = extract_nmco_feats_batch(raw_image_path = path_to_DNA_image, 
                         labelled_image_path = path_to_output_segmented_nuclei,
                         output_dir  = path_to_output_nuclear_features)

# segment cells by boundary expansion
print('Nuclear boundary expansion to segment cells.......')
cell_seg_dilation_batch(path_to_output_segmented_nuclei, path_to_output_segmented_cells)


Preprocessing image to make single channel tiff images......
Nuclear segmentation........
Found model '2D_versatile_fluo' for 'StarDist2D'.
Loading network weights from 'weights_best.h5'.
Loading thresholds from 'thresholds.json'.
Using default values: prob_thresh=0.479071, nms_thresh=0.3.


100%|██████████| 42/42 [00:06<00:00,  6.14it/s]
100%|██████████| 30/30 [00:04<00:00,  7.09it/s]
100%|██████████| 16/16 [00:02<00:00,  6.85it/s]
100%|██████████| 20/20 [00:02<00:00,  7.08it/s]
100%|██████████| 36/36 [00:05<00:00,  6.57it/s]
100%|██████████| 25/25 [00:03<00:00,  6.54it/s]
100%|██████████| 15/15 [00:02<00:00,  6.61it/s]
100%|██████████| 20/20 [00:03<00:00,  6.54it/s]
100%|██████████| 42/42 [00:06<00:00,  6.64it/s]
100%|██████████| 30/30 [00:04<00:00,  6.34it/s]


Nuclear feature extraction........


100%|██████████| 9778/9778 [09:47<00:00, 16.63it/s]
100%|██████████| 5612/5612 [05:20<00:00, 17.51it/s]
100%|██████████| 3330/3330 [03:08<00:00, 17.68it/s]
100%|██████████| 3754/3754 [03:32<00:00, 17.65it/s]
100%|██████████| 7596/7596 [07:38<00:00, 16.57it/s]
100%|██████████| 4914/4914 [04:33<00:00, 17.94it/s]
100%|██████████| 3432/3432 [03:53<00:00, 14.72it/s]
100%|██████████| 3992/3992 [04:37<00:00, 14.38it/s]
100%|██████████| 8091/8091 [07:50<00:00, 17.18it/s]
100%|██████████| 6680/6680 [06:16<00:00, 17.73it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Nuclear boundary expansion to segment cells.......


100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


In [4]:
#measure cellular levels of proteins
print('computing cellular levels of proteins.....')

cd3_levels = measure_intensity_batch(labelled_image_path = path_to_output_segmented_cells, 
                        protein_image_path = path_to_CD3_image, 
                        output_dir = path_to_output_cellular_cd3_levels)

aicda_levels = measure_intensity_batch(labelled_image_path = path_to_output_segmented_cells, 
                        protein_image_path = path_to_AICDA_image, 
                        output_dir = path_to_output_cellular_AICDA_levels)
gc_levels = measure_intensity_batch(labelled_image_path = path_to_output_segmented_cells, 
                        protein_image_path = path_to_gernminal_center,
                        output_dir = path_to_output_germinal_center_loc)

  0%|          | 0/10 [00:00<?, ?it/s]

computing cellular levels of proteins.....


100%|██████████| 10/10 [02:02<00:00, 12.30s/it]
100%|██████████| 10/10 [02:02<00:00, 12.29s/it]
100%|██████████| 10/10 [02:03<00:00, 12.34s/it]


In [3]:
# measure spatial nuclear location
spatial_coordiates = extract_spatial_coordinates_batch(labelled_image_path = path_to_output_segmented_nuclei, 
                        output_dir = path_to_output_spatial_cordiates)


### Save output

In [5]:
# setup unique nuclear ids
nuc_features['nuc_id'] = nuc_features['image'].astype(str) + '_'+ nuc_features['label'].astype(str)
aicda_levels['nuc_id'] = aicda_levels['image'].astype(str) + '_'+ aicda_levels['label'].astype(str)
cd3_levels['nuc_id'] = cd3_levels['image'].astype(str) + '_'+ cd3_levels['label'].astype(str)
gc_levels['nuc_id'] = gc_levels['image'].astype(str) + '_'+ gc_levels['label'].astype(str)

# save the data aquired
Path(path_to_output_consolidated_dataset).mkdir(parents=True, exist_ok=True)

nuc_features.to_csv(path_to_output_consolidated_dataset+"/" +"nuc_features.csv")
aicda_levels.to_csv(path_to_output_consolidated_dataset+"/" +"aicda_levels.csv")
cd3_levels.to_csv(path_to_output_consolidated_dataset+"/" +"cd3_levels.csv")
gc_levels.to_csv(path_to_output_consolidated_dataset+"/" +"gc_levels.csv")
spatial_coordiates.to_csv(path_to_output_consolidated_dataset+"/" +"spatial_coordiates.csv")
