In [None]:
import spacr

## Generate cellpose masks

In [None]:
# 1. Organize images into npy stacks.
# 2. Preprocess images for object detection.
# 3. Generate masks.

settings = {'metadata_type':'cellvoyager',
            'custom_regex':None,
            'experiment':'exp',
            'channels':[0,1,2,3],
            'nucleus_channel':0,
            'nucleus_background':100,
            'nucleus_Signal_to_noise':10,
            'nucleus_CP_prob':0,
            'cell_channel':3,
            'cell_background':100,
            'cell_Signal_to_noise':10,
            'cell_CP_prob':0,
            'pathogen_channel':1,
            'pathogen_background':200,
            'pathogen_Signal_to_noise':5,
            'pathogen_CP_prob':0,
            'magnification':20,
            'save':True,
            'plot':False,
            'preprocess':True,
            'masks':True,
            'batch_size':50,
            'test_mode':False}

src = 'path'
spacr.core.preprocess_generate_masks(src, settings=settings)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

settings = {'include_noninfected':True, 
            'include_multiinfected':True,
            'include_multinucleated':True,
            'remove_background':False,
            'filter_min_max':[[0,1000000],[0,1000000],[500,1000000]],
            'channel_dims':[0,1,2,3],
            'backgrounds':[100,100,100,100],
            'cell_mask_dim':4,
            'nucleus_mask_dim':5,
            'pathogen_mask_dim':6,
            'overlay_chans':[0,2,3],
            'outline_thickness':3,
            'outline_color':'gbr',
            'overlay':True,
            'normalization_percentiles':[1,99],
            'normalize':True,
            'print_object_number':True,
            'nr':10,
            'figuresize':20,
            'cmap':'inferno',
            'verbose':True}

src = 'path'
fig = spacr.plot.plot_merged(src, settings)

In [None]:
# 1. Collect measurement data and save to database
# 2. Generate single object images.

src = 'path'
settings = {'input_folder':src+'/merged',
            'channels':[0,1,2,3],
            'cell_mask_dim':4,
            'cell_min_size':0,
            'nucleus_mask_dim':5,
            'nucleus_min_size':0,
            'pathogen_mask_dim':6,
            'pathogen_min_size':0,
            'cytoplasm_min_size':0,
            'save_png':True,
            'crop_mode':['cell'],
            'use_bounding_box':True,
            'png_size':[[224,224]],
            'normalize':[2,99],
            'png_dims':[0,1,2],
            'normalize_by':'fov', #'fov' or 'png'
            'save_measurements':True,
            'plot':False,
            'plot_filtration':False,
            'include_uninfected':False,
            'test_mode':False}

if __name__ == "__main__":
    spacr.measure.measure_crop(settings)

## Capture measurements and crop images

In [None]:
# Calculate recruitment.
from spacr.core import analyze_recruitment
%matplotlib inline

metadata_settings = {'target':'protein',
                     'cell_types':['HeLa'],
                     'cell_plate_metadata':None,
                     'pathogen_types':['RHDku80WT', 'RHDku80Dgra64', 'RHDku80Dgra64D14', 'RHDhxg','RHDku80Dgra14'],
                     'pathogen_plate_metadata':[['c1', 'c2', 'c3'],['c4','c5', 'c6', 'c7', 'c8','c9'], ['c10', 'c11', 'c12', 'c13', 'c14', 'c15'], ['c16','c17','c18'], ['c19', 'c20', 'c21', 'c22', 'c23', 'c24']],
                     'treatments':['BR1', 'BR2', 'BR3'],
                     'treatment_plate_metadata':[['c1', 'c4','c5','c10','c11','c16','c19','c20'],['c2','c6','c7','c12','c13','c17','c21','c22'],['c3','c8','c9','c14','c15','c18','c23','c24']],
                     'metadata_types':'col',
                     'channel_dims':[0,1,2,3],
                     'cell_chann_dim':3,
                     'cell_mask_dim':4,
                     'nucleus_chann_dim':0,
                     'nucleus_mask_dim':5,
                     'pathogen_chann_dim':2,
                     'pathogen_mask_dim':6,
                     'channel_of_interest':2}
                     
advanced_settings = {'plot':True,
                     'plot_nr':1,
                     'plot_control':True,
                     'figuresize':20,
                     'remove_background':True,
                     'backgrounds':100,
                     'include_noninfected':False,
                     'include_multiinfected':2,
                     'include_multinucleated':False,
                     'cells_per_well':100,
                     'pathogen_size_range':[300,10000000],
                     'nucleus_size_range':[750,10000000],
                     'cell_size_range':[4000,10000000],
                     'pathogen_intensity_range':[1000,10000000],
                     'nucleus_intensity_range':[0,10000000],
                     'cell_intensity_range':[0,10000000],
                     'target_intensity_min':300}

src = 'path'

dfs = analyze_recruitment(src,metadata_settings,advanced_settings)

## Curate masks

## Annotate Images

In [None]:
# Annotate single object images.
from spacr.annotate_app import annotate

src = 'path'
annotate(db=src+'/measurements/measurements.db',
         image_type="cell_png", 
         channels=['r','g','b'],
         annotation_column='test',
         geom="3200x2000", 
         img_size=(200, 200), 
         rows=10, 
         columns=18)

## Train CNN/Transformer model

In [None]:
# Generate a tar file containing all images in the database (this can be very slow for large datasets)

src = 'path'
spacr.generate_dataset(db_path=src+'/measurements/measurements.db',
                       dst='path',
                       file_type='cell_png',
                       experiment='test',
                       sample=20)

In [None]:
# Generate train and test folders with class subfolders for training DL models.

src = 'path'
spacr.generate_training_dataset(db_path=src+'/measurements/measurements.db',
                                dst=src+'train_data',
                                mode='annotation', # annotation, recruitment or metadata
                                annotation_column='test', # for mode = annotation
                                annotated_classes=[1,2], # for mode = annotation
                                classes=['nc','pc'], # for mode = annotation
                                size=10, # number of images per class
                                test_split=0.1,
                                metadata_type_by='col',
                                class_metadata=[['c1'],['c2']], # for mode = metadata
                                channel_of_interest=3) # for mode = recruitment

In [None]:
#Train a torch model to classify images. If test is True, a model will be tested on the test set (generated in preveous steps)

src = 'path'
settings = {'train':True,
            'test': False,
            'classes':['nc','pc'],
            'model_type':'maxvit_t',
            'optimizer_type':'adamw',
            'schedule':'reduce_lr_on_plateau', #reduce_lr_on_plateau, step_lr
            'loss_type':'focal_loss', #binary_cross_entropy_with_logits, #focal_loss
            'normalize':True,
            'image_size':224,
            'batch_size':12,
            'epochs':2,
            'val_split':0.1,
            'train_mode':'erm',
            'learning_rate':0.0001,
            'weight_decay':0.00001,
            'dropout_rate':0.1,
            'init_weights':True,
            'amsgrad':True,
            'use_checkpoint':True,
            'gradient_accumulation':True,
            'gradient_accumulation_steps':4,
            'intermedeate_save':True,
            'pin_memory':True,
            'num_workers':30,
            'verbose':True}

spacr.train_test_model(src, settings)

In [None]:
#

result_df = spacr.apply_model_to_tar(tar_path='path',
                                     model_path='path', #this model is trained on nc vs pc, cells filtered on cell size, parasite size and recruitment >2 for nc and <2 for pc
                                     file_type='cell_png',
                                     image_size=224,
                                     batch_size=12,
                                     normalize=True,
                                     preload='images',
                                     num_workers=30,
                                     verbose=True)

In [None]:
#

src = 'path'
res_df = spacr.merge_pred_mes(src,
                              pred_loc='path',
                              target='protein',
                              nucleus_dim=0,
                              parasite_dim=2,
                              cell_dim=3,
                              channel_of_interest=1,
                              parasite_size_min=0,
                              nucleus_size_min=0,
                              cell_size_min=0,
                              parasite_min=0,
                              nucleus_min=0,
                              cell_min=0,
                              target_min=0,
                              mask_chans=[0,1,2],
                              filter_data=False,
                              include_noninfected=False,
                              include_multiinfected=True, #bool or int
                              include_multinucleated=True,
                              cells_per_well=0,
                              save_filtered_filelist=False,
                              verbose=True)

dv_loc = src+'/measurements/'
spacr.generate_dependent_variable(df=res_df,
                                  dv_loc=dv_loc,
                                  pc_min=0.99,
                                  nc_max=0.95,
                                  agg_type='mean')

## Simulate pooled spatial phenotype screen

In [None]:
#For 1 simulation
settings = {
    'name':'test',
    'variable':'none',
    'src': 'path',
    'plot': True,
    'random_seed': True,
    'nr_plates': 4,
    'number_of_genes': 100,
    'number_of_active_genes': 10,
    'number_of_control_genes': 10,
    'avg_genes_per_well': 5,
    'sd_genes_per_well':2,
    'avg_cells_per_well': 100,
    'sd_cells_per_well':50,
    'positive_mean': 0.8,
    'positive_variance':0.15,
    'negative_mean':0.2,
    'negative_variance':0.15,
    'avg_reads_per_gene': 1000,
    'sd_reads_per_gene':500,
    'sequencing_error': 0.1,
    'well_ineq_coeff': 0.5,
    'gene_ineq_coeff': 0.1,
    'max_workers': 25}

i=1
spacr.run_and_save(i, settings, time_ls=[1], total_sims=1)

In [None]:
#For multiple simulation

#lopp over most variables
avg_genes_per_well_ls = [10]
avg_cells_per_well_ls = [100]
positive_mean_ls = [0.8]
avg_reads_per_gene_ls = [1000]
sequencing_error_ls = [0.01]
well_ineq_coeff_ls = [0.3] #lower mean more inequality (0-1)
gene_ineq_coeff_ls = [0.8]
nr_plates_ls = [8]
number_of_genes_ls = [1384]
number_of_active_genes_ls = [8]

settings = {
    'name':'plates_2_4_8',
    'variable':'all',
    'src': 'path',
    'plot': True,
    'random_seed': False,
    'replicates':2,
    'nr_plates': nr_plates_ls,
    'number_of_genes': number_of_genes_ls,
    'number_of_active_genes': number_of_active_genes_ls,
    'number_of_control_genes': 30,
    'avg_genes_per_well': avg_genes_per_well_ls,
    'avg_cells_per_well': avg_cells_per_well_ls,
    'positive_mean': positive_mean_ls,
    'avg_reads_per_gene': avg_reads_per_gene_ls,
    'sequencing_error': sequencing_error_ls,
    'well_ineq_coeff': well_ineq_coeff_ls,
    'gene_ineq_coeff': gene_ineq_coeff_ls,
    'max_workers': 25}

spacr.run_multiple_simulations(settings)