# Running SpaCr in colab with a GPU

Mount your google drive to access all your image files. This also ensures that the segmentations are saved to your google drive.

## Installation

Install spacr.

In [1]:
!pip install cellpose

Collecting cellpose
  Downloading cellpose-3.0.6-py3-none-any.whl (204 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m204.4/204.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting fastremap (from cellpose)
  Downloading fastremap-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting imagecodecs (from cellpose)
  Downloading imagecodecs-2024.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (39.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.6/39.6 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting roifile (from cellpose)
  Downloading roifile-2024.1.10-py3-none-any.whl (16 kB)
Installing collected packages: roifile, imagecodecs, fastremap, cellpose
Successfully installed cellpose-3.0.6 fastremap-1.14.1 imagecodecs-2024.1.1 roifile-2024.1.10


Check CUDA version and that GPU is working in cellpose and import other libraries.

In [2]:
!nvcc --version
!nvidia-smi

import os, shutil
import numpy as np
from glob import glob

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
Mon Mar 11 19:06:50 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                      

Mount your google drive here if you want to load your own images:

In [3]:
# #@markdown ###Run this cell to connect your Google Drive to Colab

#@markdown * Click on the URL.

#@markdown * Sign in your Google Account.

#@markdown * Copy the authorization code.

#@markdown * Enter the authorization code.

#@markdown * Click on "Files" site on the right. Refresh the site. Your Google Drive folder should now be available here as "drive".

#mounts user's Google Drive to Google Colab.

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## Generate cellpose masks

In [6]:
# 1. Organize images into npy stacks.
# 2. Preprocess images for object detection.
# 3. Generate masks.

settings = {'metadata_type':'cellvoyager',
            'experiment':'test',
            'channels':[0,1,2,3],
            'nucleus_channel':0,
            'nucleus_background':200,
            'nucleus_Signal_to_noise':10,
            'nucleus_CP_prob':0,
            'cell_channel':3,
            'cell_background':200,
            'cell_Signal_to_noise':10,
            'cell_CP_prob':-1,
            'pathogen_channel':2,
            'pathogen_background':200,
            'pathogen_Signal_to_noise':20,
            'pathogen_CP_prob':-1,
            'magnefication':20}

advanced_settings = {'preprocess':True,
                    'masks':True,
                    'custom_regex':None,
                    'save':True,
                    'plot':False,
                    'examples_to_plot':1,
                    'batch_size':50,
                    'randomize':True,
                    'timelapse':False,
                    'timelapse_displacement':None,
                    'timelapse_memory':3,
                    'timelapse_frame_limits':None,
                    'timelapse_remove_transient':False,
                    'timelapse_mode':'trackpy',
                     'timelapse_objects':['cells'],
                    'fps':2,
                    'remove_background':True,
                    'lower_quantile':0.01,
                    'merge':False,
                    'normalize_plots':True,
                    'all_to_mip':False,
                    'pick_slice':False,
                    'skip_mode':None,
                    'workers':30,
                    'verbose':True}

src = 'path'
spacr.preprocess_generate_masks(src,settings=settings,advanced_settings=advanced_settings)

/bin/bash: line 1: gui_mask: command not found


## Capture measurements and crop images

In [None]:
# 1. Collect measurement data and save to database
# 2. Generate single object images.

src = 'path'

settings = {'input_folder':src+'/merged',
            'channels':[0,1,2,3],
            'cell_mask_dim':4,
            'cell_min_size':0,
            'nuclei_mask_dim':5,
            'nucleus_min_size':0,
            'pathogen_mask_dim':6,
            'pathogen_min_size':0,
            'cytoplasm_min_size':0}

advanced_settings = {'save_png':True,
                     'crop_mode':['cell'],
                     'use_bounding_box':True,
                     'png_size':[[224,224]],
                     'normalize':[2,99],
                     'png_dims':[0,1,2],
                     'normalize_by':'png', #'fov' or 'png'
                     'save_measurements':True,
                     'plot':False,
                     'plot_filtration':False,
                     'include_uninfected':False,
                     'dialate_pngs':False,
                     'dialate_png_ratios':[0.2],
                     'timelapse':False,
                     'timelapse_objects':'cell',
                     'max_workers':30}

annotation_settings = {'experiment':'test',
                      'cells':'HeLa',
                      'cell_loc':None,
                      'pathogens':['dsag1','dgra14_1','dgra14_2','dgra14_3'],
                      'pathogen_loc':[['c1','c2','c3','c13','c14','c15'],['c4','c5','c6','c16','c17','c18'],['c7','c8','c9','c19','c20','c21'],['c10','c11','c12','c22','c23','c24']],
                      'treatments':['cm','lovastatin_20uM'],
                      'treatment_loc':[['c1','c2','c3','c4','c5','c6','c7','c8','c9','c10','c11','c12'],['c13','c14','c15','c16','c17','c18','c19','c20','c21','c22','c23','c24']],
                      'channel_of_interest':1,
                      'compartments':['pathogen', 'cytoplasm'],
                      'measurement':'mean_intensity',
                      'nr_imgs':32,
                      'um_per_pixel':0.1}

if __name__ == "__main__":
    spacr.measure_crop(settings, annotation_settings, advanced_settings)

## Curate masks

In [None]:
#must be run locally

## Annotate Images

In [None]:
#must be run locally

## Train CNN/Transformer model

In [None]:
# Generate a tar file containing all images in the database (this can be very slow for large datasets)

src = 'path'
spacr.generate_dataset(db_path=src+'/measurements/measurements.db',
                       dst='path',
                       file_type='cell_png',
                       experiment='test',
                       sample=20)

In [None]:
# Generate train and test folders with class subfolders for training DL models.

src = 'path'
spacr.generate_training_dataset(db_path=src+'/measurements/measurements.db',
                                dst=src+'train_data',
                                mode='annotation', # annotation, recruitment or metadata
                                annotation_column='test', # for mode = annotation
                                annotated_classes=[1,2], # for mode = annotation
                                classes=['nc','pc'], # for mode = annotation
                                size=10, # number of images per class
                                test_split=0.1,
                                metadata_type_by='col',
                                class_metadata=[['c1'],['c2']], # for mode = metadata
                                channel_of_interest=3) # for mode = recruitment

In [None]:
#Train a torch model to classify images. If test is True, a model will be tested on the test set (generated in preveous steps)

src = 'path'
settings = {'train':True,
            'test': False,
            'classes':['nc','pc'],
            'model_type':'maxvit_t',
            'optimizer_type':'adamw',
            'schedule':'reduce_lr_on_plateau', #reduce_lr_on_plateau, step_lr
            'loss_type':'focal_loss', #binary_cross_entropy_with_logits, #focal_loss
            'normalize':True,
            'image_size':224,
            'batch_size':12,
            'epochs':2,
            'val_split':0.1,
            'train_mode':'erm',
            'learning_rate':0.0001,
            'weight_decay':0.00001,
            'dropout_rate':0.1,
            'init_weights':True,
            'amsgrad':True,
            'use_checkpoint':True,
            'gradient_accumulation':True,
            'gradient_accumulation_steps':4,
            'intermedeate_save':True,
            'pin_memory':True,
            'num_workers':30,
            'verbose':True}

spacr.train_test_model(src, settings)

In [None]:
#

result_df = spacr.apply_model_to_tar(tar_path='path',
                                     model_path='path', #this model is trained on nc vs pc, cells filtered on cell size, parasite size and recruitment >2 for nc and <2 for pc
                                     file_type='cell_png',
                                     image_size=224,
                                     batch_size=12,
                                     normalize=True,
                                     preload='images',
                                     num_workers=30,
                                     verbose=True)

In [None]:
#

src = 'path'
res_df = spacr.merge_pred_mes(src,
                              pred_loc='path',
                              target='protein',
                              nucleus_dim=0,
                              parasite_dim=2,
                              cell_dim=3,
                              channel_of_interest=1,
                              parasite_size_min=0,
                              nucleus_size_min=0,
                              cell_size_min=0,
                              parasite_min=0,
                              nucleus_min=0,
                              cell_min=0,
                              target_min=0,
                              mask_chans=[0,1,2],
                              filter_data=False,
                              include_noninfected=False,
                              include_multiinfected=True, #bool or int
                              include_multinucleated=True,
                              cells_per_well=0,
                              save_filtered_filelist=False,
                              verbose=True)

dv_loc = src+'/measurements/'
spacr.generate_dependent_variable(df=res_df,
                                  dv_loc=dv_loc,
                                  pc_min=0.99,
                                  nc_max=0.95,
                                  agg_type='mean')

## Simulate pooled spatial phenotype screen

In [None]:
#For 1 simulation
settings = {
    'name':'test',
    'variable':'none',
    'src': 'path',
    'plot': True,
    'random_seed': True,
    'nr_plates': 4,
    'number_of_genes': 100,
    'number_of_active_genes': 10,
    'number_of_control_genes': 10,
    'avg_genes_per_well': 5,
    'sd_genes_per_well':2,
    'avg_cells_per_well': 100,
    'sd_cells_per_well':50,
    'positive_mean': 0.8,
    'positive_variance':0.15,
    'negative_mean':0.2,
    'negative_variance':0.15,
    'avg_reads_per_gene': 1000,
    'sd_reads_per_gene':500,
    'sequencing_error': 0.1,
    'well_ineq_coeff': 0.5,
    'gene_ineq_coeff': 0.1,
    'max_workers': 25}

i=1
spacr.run_and_save(i, settings, time_ls=[1], total_sims=1)

In [None]:
#For multiple simulation

#lopp over most variables
avg_genes_per_well_ls = [10]
avg_cells_per_well_ls = [100]
positive_mean_ls = [0.8]
avg_reads_per_gene_ls = [1000]
sequencing_error_ls = [0.01]
well_ineq_coeff_ls = [0.3] #lower mean more inequality (0-1)
gene_ineq_coeff_ls = [0.8]
nr_plates_ls = [8]
number_of_genes_ls = [1384]
number_of_active_genes_ls = [8]

settings = {
    'name':'plates_2_4_8',
    'variable':'all',
    'src': 'path',
    'plot': True,
    'random_seed': False,
    'replicates':2,
    'nr_plates': nr_plates_ls,
    'number_of_genes': number_of_genes_ls,
    'number_of_active_genes': number_of_active_genes_ls,
    'number_of_control_genes': 30,
    'avg_genes_per_well': avg_genes_per_well_ls,
    'avg_cells_per_well': avg_cells_per_well_ls,
    'positive_mean': positive_mean_ls,
    'avg_reads_per_gene': avg_reads_per_gene_ls,
    'sequencing_error': sequencing_error_ls,
    'well_ineq_coeff': well_ineq_coeff_ls,
    'gene_ineq_coeff': gene_ineq_coeff_ls,
    'max_workers': 25}

spacr.run_multiple_simulations(settings)