In [None]:
from spacr.core import preprocess_generate_masks
%matplotlib inline

# @title Preprocessing Settings { display-mode: "form" }
src = "path"  # @param {type:"string"}  # (path) path to source folder (where original images were stored)
metadata_type = "cellvoyager"  # @param ["cellvoyager", "cq1", "Nikon"] {type:"string"}  # (string) - type of filename metadata
custom_regex = None  # @param {type:"raw"}  # (regex) - Regular expression if filename metadata not in metadata_type
experiment = "screen"  # @param {type:"string"}  # (string) - Name of experiment
channels = [0, 1, 2, 3]  # @param {type:"raw"}  # (list) - list of integers representing available channels

cell_channel = 3  # @param {type:"integer"}  # (integer or NoneType) - Cell image dimension
cell_background = 100  # @param {type:"integer"}  # (integer) - Background value in cell images
cell_Signal_to_noise = 10  # @param {type:"integer"}  # (integer) - Signal to noise ratio for cell channel
cell_CP_prob = -1  # @param {type:"integer"}  # (integer) - Cellpose Cell probability
remove_background_cell = False  # @param {type:"boolean"}  # (bool) - Set background to 0 for cell channel

nucleus_channel = 0  # @param {type:"integer"}  # (Optional, integer or NoneType) - Nucleus image dimension
nucleus_background = 200  # @param {type:"integer"}  # (Optional, integer) - Background value in nucleus images
nucleus_Signal_to_noise = 5  # @param {type:"integer"}  # (Optional, integer) - Signal to noise ratio for nucleus channel
nucleus_CP_prob = 0  # @param {type:"integer"}  # (Optional, integer) - Cellpose Nucleus probability
remove_background_nucleus = False  # @param {type:"boolean"}  # (Optional, bool) - Set background to 0 for nucleus channel

pathogen_model = None  # @param {type:"raw"}  # (Optional, path or NoneType) - Custom cellpose model path for pathogen detection
pathogen_channel = 2  # @param {type:"integer"}  # (Optional, integer or NoneType) - Pathogen image dimension
pathogen_background = 150  # @param {type:"integer"}  # (Optional, integer) - Background value in pathogen images
pathogen_Signal_to_noise = 6  # @param {type:"integer"}  # (Optional, integer) - Signal to noise ratio for pathogen channel
pathogen_CP_prob = -2  # @param {type:"integer"}  # (Optional, integer) - Cellpose pathogen probability
remove_background_pathogen = True  # @param {type:"boolean"}  # (Optional, bool) - Set background to 0 for pathogen channel

consolidate = False  # @param {type:"boolean"}  # (bool) - Consolidate overlapping objects
magnification = 20  # @param [20, 40, 60, 100] {type:"raw"}  # (integer) - Objective magnification used
save = True  # @param {type:"boolean"}  # (bool) - Save masks and object data to database
preprocess = True  # @param {type:"boolean"}  # (bool) - Preprocess images
masks = True  # @param {type:"boolean"}  # (bool) - Generate masks
batch_size = 100  # @param {type:"integer"}  # (integer) - Number of images to be normalized together
filter = False  # @param {type:"boolean"}  # (bool) - Filter objects based on size
merge_pathogens = False  # @param {type:"boolean"}  # (bool) - Merge pathogens that share > 75% perimeter
plot = False  # @param {type:"boolean"}  # (bool) - Plot normalized intensity and object images
adjust_cells = True  # @param {type:"boolean"}  # (bool) - If cell, nucleus and pathogen: merge cells that share a pathogen
test_mode = False  # @param {type:"boolean"}  # (bool) - Test settings in test mode before analyzing entire experiment
test_images = 100  # @param {type:"integer"}  # (integer) - Number of images to analyze in test mode
random_test = True  # @param {type:"boolean"}  # (bool) - Randomize images for test mode

settings = {
    'src': src,
    'metadata_type': metadata_type,
    'custom_regex': custom_regex,
    'experiment': experiment,
    'channels': channels,
    'cell_channel': cell_channel,
    'cell_background': cell_background,
    'cell_Signal_to_noise': cell_Signal_to_noise,
    'cell_CP_prob': cell_CP_prob,
    'remove_background_cell': remove_background_cell,
    'nucleus_channel': nucleus_channel,
    'nucleus_background': nucleus_background,
    'nucleus_Signal_to_noise': nucleus_Signal_to_noise,
    'nucleus_CP_prob': nucleus_CP_prob,
    'remove_background_nucleus': remove_background_nucleus,
    'pathogen_model': pathogen_model,
    'pathogen_channel': pathogen_channel,
    'pathogen_background': pathogen_background,
    'pathogen_Signal_to_noise': pathogen_Signal_to_noise,
    'pathogen_CP_prob': pathogen_CP_prob,
    'remove_background_pathogen': remove_background_pathogen,
    'consolidate': consolidate,
    'magnification': magnification,
    'save': save,
    'preprocess': preprocess,
    'masks': masks,
    'batch_size': batch_size,
    'filter': filter,
    'merge_pathogens': merge_pathogens,
    'plot': plot,
    'adjust_cells': adjust_cells,
    'test_mode': test_mode,
    'test_images': test_images,
    'random_test': random_test
}

preprocess_generate_masks(settings)

In [None]:
from spacr.measure import measure_crop
%matplotlib inline

# @title Measurement & Cropping Settings { display-mode: "form" }

# === General ===
src = "path"  # @param {type:"string"}  # (path) Path to source folder (ends in /merged)
channels = [0, 1, 2, 3]  # @param {type:"raw"}  # (list) - Available channel indices

# === Object mask dimensions ===
cell_mask_dim = 4  # @param {type:"integer"}  # (int or None) - Cell mask dimension
nucleus_mask_dim = 5  # @param {type:"integer"}  # (int or None) - Nucleus mask dimension
pathogen_mask_dim = 6  # @param {type:"integer"}  # (int or None) - Pathogen mask dimension

# === Object size filters ===
cell_min_size = 2000  # @param {type:"integer"}  # (int) - Minimum cell area (px²)
nucleus_min_size = 1000  # @param {type:"integer"}  # (int) - Minimum nucleus area (px²)
pathogen_min_size = 400  # @param {type:"integer"}  # (int) - Minimum pathogen area (px²)
cytoplasm_min_size = 0  # @param {type:"integer"}  # (int) - Minimum cytoplasm area (px²)

# === Cropping settings ===
crop_mode = ["cell"]  # @param {type:"raw"}  # (list) - Objects to crop into PNGs ('cell', 'nuclei', 'pathogen')
use_bounding_box = False  # @param {type:"boolean"}  # (bool) - Use bounding box instead of exact object mask
png_size = [[224, 224]]  # @param {type:"raw"}  # (list of lists) - PNG image size
png_dims = [0, 1, 2]  # @param {type:"raw"}  # (list) - Channels to include in PNGs
normalize = False  # @param {type:"raw"}  # (bool or list) - Normalize PNGs to percentiles
normalize_by = "png"  # @param ["png", "fov"] {type:"string"}  # (str) - Normalize by 'png' or 'fov'

# === Saving and plotting ===
save_png = True  # @param {type:"boolean"}  # (bool) - Save PNGs
save_measurements = True  # @param {type:"boolean"}  # (bool) - Save object measurements
plot = False  # @param {type:"boolean"}  # (bool) - Plot images during analysis
plot_filtration = False  # @param {type:"boolean"}  # (bool) - Plot filtration steps
uninfected = False  # @param {type:"boolean"}  # (bool) - Include uninfected objects

# === Test mode ===
test_mode = False  # @param {type:"boolean"}  # (bool) - Activate test mode
test_nr = 10  # @param {type:"integer"}  # (int) - Number of images to analyze in test mode

# === Assemble settings dictionary and run ===
settings = {
    'src': src,
    'channels': channels,
    'cell_mask_dim': cell_mask_dim,
    'cell_min_size': cell_min_size,
    'nucleus_mask_dim': nucleus_mask_dim,
    'nucleus_min_size': nucleus_min_size,
    'pathogen_mask_dim': pathogen_mask_dim,
    'pathogen_min_size': pathogen_min_size,
    'cytoplasm_min_size': cytoplasm_min_size,
    'save_png': save_png,
    'crop_mode': crop_mode,
    'use_bounding_box': use_bounding_box,
    'png_size': png_size,
    'normalize': normalize,
    'png_dims': png_dims,
    'normalize_by': normalize_by,
    'save_measurements': save_measurements,
    'plot': plot,
    'plot_filtration': plot_filtration,
    'uninfected': uninfected,
    'test_mode': test_mode,
    'test_nr': test_nr
}

measure_crop(settings)


In [None]:
from spacr.ml import generate_ml_scores
%matplotlib inline

# @title ML Training Settings { display-mode: "form" }

# === Data and model configuration ===
src = "path"  # @param {type:"string"}  # (path) Path to source folder (where original images were stored)
model_type_ml = "xgboost"  # @param ["random_forest", "xgboost", "gradient_boosting"] {type:"string"}  # (str) Type of model
n_estimators = 100  # @param {type:"integer"}  # (int) Number of estimators in the model
test_size = 0.2  # @param {type:"number"}  # (float) Fraction of data used for the test set

# === Controls and grouping ===
location_column = "column_name"  # @param {type:"string"}  # (str) Column with control metadata
positive_control = "c2"  # @param {type:"string"}  # (str) Label for positive control
negative_control = "c1"  # @param {type:"string"}  # (str) Label for negative control
exclude = None  # @param {type:"raw"}  # (str or None) Rows to exclude in location_column

# === Filtering and data quality ===
nuclei_limit = 1  # @param {type:"integer"}  # (int) Max number of nuclei per cell
pathogen_limit = 3  # @param {type:"integer"}  # (int) Max number of pathogens per cell
minimum_cell_count = 25  # @param {type:"integer"}  # (int) Minimum number of cells per well

# === Heatmap and features ===
heatmap_feature = "predictions"  # @param {type:"string"}  # (str) Column to display in heatmap
grouping = "mean"  # @param ["mean", "median"] {type:"string"}  # (str) Grouping method
min_max = "allq"  # @param ["all", "allq"] {type:"string"}  # (str) Quantile normalization
cmap = "viridis"  # @param {type:"string"}  # (str) Colormap for heatmap

# === Feature selection and importance ===
remove_low_variance_features = True  # @param {type:"boolean"}  # (bool) Remove low-variance features
remove_highly_correlated_features = True  # @param {type:"boolean"}  # (bool) Remove highly correlated features
n_repeats = 10  # @param {type:"integer"}  # (int) Repeats for permutation importance
top_features = 30  # @param {type:"integer"}  # (int) Number of top features to display
channel_of_interest = 1  # @param {type:"integer"}  # (int) Channel used for class-specific analysis

# === Runtime ===
verbose = False  # @param {type:"boolean"}  # (bool) Display verbose output
n_jobs = 10  # @param {type:"integer"}  # (int) Number of threads to use

# === Assemble dictionary and run ===
settings = {
    'src': src,
    'model_type_ml': model_type_ml,
    'heatmap_feature': heatmap_feature,
    'grouping': grouping,
    'min_max': min_max,
    'cmap': cmap,
    'n_estimators': n_estimators,
    'test_size': test_size,
    'location_column': location_column,
    'positive_control': positive_control,
    'negative_control': negative_control,
    'exclude': exclude,
    'nuclei_limit': nuclei_limit,
    'pathogen_limit': pathogen_limit,
    'n_repeats': n_repeats,
    'top_features': top_features,
    'channel_of_interest': channel_of_interest,
    'minimum_cell_count': minimum_cell_count,
    'remove_low_variance_features': remove_low_variance_features,
    'remove_highly_correlated_features': remove_highly_correlated_features,
    'verbose': verbose,
    'n_jobs': n_jobs
}

results = generate_ml_scores(settings)


In [None]:
from spacr.ml import perform_regression
import pandas as pd
%matplotlib inline

# @title Regression Settings { display-mode: "form" }

# === Input Data ===
count_data = "path"  # @param {type:"string"}  # (path) Path or list of paths to sequencing count data
score_data = "path"  # @param {type:"string"}  # (path) Path or list of paths to score data
score_column = "column"  # @param {type:"string"}  # (str) Column with cell scores
metadata_files = ["path.csv", "path.csv"]  # @param {type:"raw"}  # (list) Paths to gene metadata CSV files

# === Control Genes and Thresholds ===
positive_control = "gene"  # @param {type:"string"}  # (str) Gene to highlight in volcano plot
negative_control = "gene"  # @param {type:"string"}  # (str) Gene to highlight in volcano plot
min_n = 3  # @param {type:"integer"}  # (int) Minimum number of cells per gRNA
fraction_threshold = None  # @param {type:"raw"}  # (float or None) Minimum gene fraction threshold

# === gRNA/Well Filtering ===
target_unique_count = 5  # @param {type:"integer"}  # (int) Number of expected unique gRNAs per well
tolerance = 0.02  # @param {type:"number"}  # (float) Tolerance for cell-per-well limit
min_cell_count = None  # @param {type:"raw"}  # (int or None) Minimum number of cells per well
control_wells = ["c1", "c2", "c3"]  # @param {type:"raw"}  # (list) Metadata to exclude from regression model
filter_column = "column"  # @param {type:"string"}  # (str) Column containing control metadata to remove

# === Regression Parameters ===
dependent_variable = "column"  # @param {type:"string"}  # (str) Column to regress
regression_type = "ols"  # @param ["ols", "glm", "mixed", "ridge", "lasso"] {type:"string"}  # (str) Type of regression
random_row_column_effects = False  # @param {type:"boolean"}  # (bool) Include row/column/plate effects
cov_type = None  # @param {type:"raw"}  # (str or None) Covariance estimator for OLS
plate = None  # @param {type:"raw"}  # (str or None) Replace plate values

# === Output Configuration ===
threshold_method = "var"  # @param ["std", "var"] {type:"string"}  # (str) Thresholding method
threshold_multiplier = 4  # @param {type:"integer"}  # (int) Multiplier for effect size threshold
transform = "log"  # @param {type:"string"}  # (str) Transformation for dependent variable
agg_type = "mean"  # @param ["mean", "median"] {type:"string"}  # (str) Aggregation for dependent variable
volcano = "gene"  # @param ["gene", "grna", "all"] {type:"string"}  # (str) Volcano result display mode
alpha = 0.8  # @param {type:"number"}  # (float) Alpha for hinge/lasso regressions
log_x = False  # @param {type:"boolean"}  # (bool) Log scale for X axis
log_y = False  # @param {type:"boolean"}  # (bool) Log scale for Y axis
y_lims = [[0, 9], [12, 16]]  # @param {type:"raw"}  # (list of lists) Limits for broken Y axis

# === Execute Regression ===
settings = {
    'count_data': count_data,
    'score_data': score_data,
    'score_column': score_column,
    'metadata_files': metadata_files,
    'positive_control': positive_control,
    'negative_control': negative_control,
    'min_n': min_n,
    'fraction_threshold': fraction_threshold,
    'target_unique_count': target_unique_count,
    'tolerance': tolerance,
    'log_x': log_x,
    'log_y': log_y,
    'x_lim': None,
    'control_wells': control_wells,
    'filter_column': filter_column,
    'dependent_variable': dependent_variable,
    'threshold_method': threshold_method,
    'threshold_multiplier': threshold_multiplier,
    'transform': transform,
    'agg_type': agg_type,
    'min_cell_count': min_cell_count,
    'regression_type': regression_type,
    'random_row_column_effects': random_row_column_effects,
    'y_lims': y_lims,
    'plate': plate,
    'cov_type': cov_type,
    'volcano': volcano,
    'alpha': alpha
}

coef_df = perform_regression(settings)


In [None]:
from spacr.io import generate_training_dataset
%matplotlib inline

# @title Training Dataset Settings { display-mode: "form" }

src = "path"  # @param {type:"string"}  # (path) Path to source folder (where original images are stored)
dataset_mode = "metadata_annotation"  # @param ["annotation", "measurement", "metadata", "annotation_metadata"] {type:"string"}  # (string) Mode for dataset generation
tables = ["cell"]  # @param {type:"raw"}  # (list of strings) Tables present in the database (excluding png_list)
test_split = 0.1  # @param {type:"number"}  # (float) Fraction of images used for test set

annotation_column = "test"  # @param {type:"string"}  # (Optional, string) Column used for annotation labels
annotated_classes = [1]  # @param {type:"raw"}  # (Optional, list of integers) Class labels in annotation column

metadata_type_by = "column_name"  # @param {type:"string"}  # (Optional, string) Column that defines metadata classes
class_metadata = ["c10", "c11", "c12", "c22", "c23", "c24"]  # @param {type:"raw"}  # (Optional, list) Metadata values defining classes

png_type = "cell_png"  # @param {type:"string"}  # (Optional, string) Filter by image path substring
nuclei_limit = False  # @param {type:"boolean"}  # (Optional, bool) Filter for number of nuclei per cell
pathogen_limit = 0  # @param {type:"integer"}  # (Optional, int) Filter for number of pathogens per cell
uninfected = True  # @param {type:"boolean"}  # (Optional, bool) Include uninfected cells
size = None  # @param {type:"raw"}  # (Optional, int or None) Max number of images per class

settings = {
    'src': src,
    'dataset_mode': dataset_mode,
    'tables': tables,
    'test_split': test_split,
    'annotation_column': annotation_column,
    'annotated_classes': annotated_classes,
    'metadata_type_by': metadata_type_by,
    'class_metadata': class_metadata,
    'png_type': png_type,
    'nuclei_limit': nuclei_limit,
    'pathogen_limit': pathogen_limitHow to Cite
-----------
If you use spaCR in your research, please cite:  
Olafsson EB, et al. SpaCr: Spatial phenotype analysis of CRISPR-Cas9 screens. *Manuscript in preparation*.,
    'uninfected': uninfected,
    'size': size
}

generate_training_dataset(settings)


In [None]:
from spacr.deep_spacr import train_test_model
%matplotlib inline

# @title Deep Learning Model Training Settings { display-mode: "form" }

src = "path"  # @param {type:"string"}  # (path) Path to source folder (ends with datasets/training)
train = False  # @param {type:"boolean"}  # (bool) Train the model
test = True  # @param {type:"boolean"}  # (bool) Test the model
custom_model = "path"  # @param {type:"string"}  # (path) Path to a custom model

classes = ["nc", "pc"]  # @param {type:"raw"}  # (list) Class folder names in datasets/training/train or test
model_type = "maxvit_t"  # @param {type:"string"}  # (string) Torch model architecture
optimizer_type = "adamw"  # @param {type:"string"}  # (string) Optimizer type
schedule = "reduce_lr_on_plateau"  # @param ["reduce_lr_on_plateau", "step_lr"] {type:"string"}  # (string) Scheduler type
loss_type = "focal_loss"  # @param ["focal_loss", "binary_cross_entropy_with_logits"] {type:"string"}  # (string) Loss function

normalize = True  # @param {type:"boolean"}  # (bool) Apply ImageNet normalization
image_size = 224  # @param {type:"integer"}  # (int) Size of input images
batch_size = 64  # @param {type:"integer"}  # (int) Number of images per batch
epochs = 100  # @param {type:"integer"}  # (int) Number of training epochs
val_split = 0.1  # @param {type:"number"}  # (float) Fraction of images for validation

learning_rate = 0.0001  # @param {type:"number"}  # (float) Learning rate
weight_decay = 0.00001  # @param {type:"number"}  # (float) Weight decay for regularization
dropout_rate = 0.1  # @param {type:"number"}  # (float) Dropout rate
init_weights = True  # @param {type:"boolean"}  # (bool) Use pretrained ImageNet weights

amsgrad = True  # @param {type:"boolean"}  # (bool) Use AMSGrad optimizer variant
use_checkpoint = True  # @param {type:"boolean"}  # (bool) Use gradient checkpointing to save VRAM
gradient_accumulation = True  # @param {type:"boolean"}  # (bool) Accumulate gradients over steps
gradient_accumulation_steps = 4  # @param {type:"integer"}  # (int) Number of steps to accumulate gradients

intermedeate_save = True  # @param {type:"boolean"}  # (bool) Save intermediate model states
pin_memory = True  # @param {type:"boolean"}  # (bool) Enable pinning memory in DataLoader
n_jobs = 30  # @param {type:"integer"}  # (int) Number of parallel jobs (threads)
train_channels = ["r", "g", "b"]  # @param {type:"raw"}  # (list of 'r', 'g', 'b') Channels used in training
augment = False  # @param {type:"boolean"}  # (bool) Enable dataset augmentation
verbose = True  # @param {type:"boolean"}  # (bool) Verbose output

settings = {
    'src': src,
    'train': train,
    'test': test,
    'custom_model': custom_model,
    'classes': classes,
    'model_type': model_type,
    'optimizer_type': optimizer_type,
    'schedule': schedule,
    'loss_type': loss_type,
    'normalize': normalize,
    'image_size': image_size,
    'batch_size': batch_size,
    'epochs': epochs,
    'val_split': val_split,
    'learning_rate': learning_rate,
    'weight_decay': weight_decay,
    'dropout_rate': dropout_rate,
    'init_weights': init_weights,
    'amsgrad': amsgrad,
    'use_checkpoint': use_checkpoint,
    'gradient_accumulation': gradient_accumulation,
    'gradient_accumulation_steps': gradient_accumulation_steps,
    'intermedeate_save': intermedeate_save,
    'pin_memory': pin_memory,
    'n_jobs': n_jobs,
    'train_channels': train_channels,
    'augment': augment,
    'verbose': verbose
}

train_test_model(settings)


In [None]:
from spacr.io import generate_dataset
%matplotlib inline

# @title Generate TAR Dataset Settings { display-mode: "form" }

src = "path"  # @param {type:"string"}  # (path) Path to source folder (where original images were stored)
file_metadata = "cell_png"  # @param {type:"string"}  # (Optional, string) Substring in file path used to filter images
experiment = "test"  # @param {type:"string"}  # (string) Name of dataset
sample = 10000  # @param {type:"integer"}  # (Optional, integer) Maximum number of images to include

settings = {
    'src': src,
    'file_metadata': file_metadata,
    'experiment': experiment,
    'sample': sample
}

generate_dataset(settings)


In [None]:
from spacr.deep_spacr import apply_model_to_tar
%matplotlib inline

# @title Apply Model to TAR Dataset Settings { display-mode: "form" }

dataset = "path.tar"  # @param {type:"string"}  # (path) Path to TAR dataset (ends with .tar)
model_path = "path.pth"  # @param {type:"string"}  # (path) Path to model file (ends with .pth)
file_type = "cell_png"  # @param {type:"string"}  # (Optional, string) Filter images by substring in file path
image_size = 224  # @param {type:"integer"}  # (int) Size of input images (height and width)
batch_size = 64  # @param {type:"integer"}  # (int) Number of images per batch
normalize = True  # @param {type:"boolean"}  # (bool) Apply ImageNet normalization
score_threshold = 0.5  # @param {type:"number"}  # (float) Threshold for classification score
n_jobs = 30  # @param {type:"integer"}  # (int) Number of threads to use
verbose = True  # @param {type:"boolean"}  # (bool) Enable verbose output

settings = {
    'dataset': dataset,
    'model_path': model_path,
    'file_type': file_type,
    'image_size': image_size,
    'batch_size': batch_size,
    'normalize': normalize,
    'score_threshold': score_threshold,
    'n_jobs': n_jobs,
    'verbose': verbose
}

result_df = apply_model_to_tar(settings)

In [None]:
from spacr.sequencing import generate_barecode_mapping
%matplotlib inline

# @title Generate Barcode Mapping Settings { display-mode: "form" }

src = "path"  # @param {type:"string"}  # (path) Path to source folder
regex = "^(?P<column>.{8})TGCTG.*TAAAC(?P<grna>.{20,21})AACTT.*AGAAG(?P<row_name>.{8}).*"  # @param {type:"string"}  # (regex) Pattern to extract barcodes
target_sequence = "TGCTGTTTCCAGCATAGCTCTTAAAC"  # @param {type:"string"}  # (string) Target sequence to locate grna
offset_start = -8  # @param {type:"integer"}  # (int) Offset for grna matching
expected_end = 89  # @param {type:"integer"}  # (int) Expected final position
column_csv = "path to column_barecodes.csv"  # @param {type:"string"}  # (path) CSV file with column barcodes
grna_csv = "path to grna_barcodes_RC.csv"  # @param {type:"string"}  # (path) CSV file with grna barcodes
row_csv = "path to row_barecodes_RC.csv"  # @param {type:"string"}  # (path) CSV file with row barcodes
save_h5 = True  # @param {type:"boolean"}  # (bool) Save results to HDF5
comp_type = "zlib"  # @param ["zlib", "gzip", "lzf", null] {type:"string"}  # (string) Compression type
comp_level = 5  # @param {type:"integer"}  # (int) Compression level
chunk_size = 10000  # @param {type:"integer"}  # (int) Number of reads per chunk
n_jobs = None  # @param {type:"raw"}  # (int or None) Number of parallel jobs
mode = "paired"  # @param ["paired", "single"] {type:"string"}  # (string) Read mode
single_direction = "R1"  # @param ["R1", "R2"] {type:"string"}  # (string) Direction if mode is single
test = False  # @param {type:"boolean"}  # (bool) Run in test mode
fill_na = True  # @param {type:"boolean"}  # (bool) Fill missing data with NA

settings = {
    'src': src,
    'regex': regex,
    'target_sequence': target_sequence,
    'offset_start': offset_start,
    'expected_end': expected_end,
    'column_csv': column_csv,
    'grna_csv': grna_csv,
    'row_csv': row_csv,
    'save_h5': save_h5,
    'comp_type': comp_type,
    'comp_level': comp_level,
    'chunk_size': chunk_size,
    'n_jobs': n_jobs,
    'mode': mode,
    'single_direction': single_direction,
    'test': test,
    'fill_na': fill_na
}

generate_barecode_mapping(settings)


In [None]:
# Description: Prepare a dataset for Cellpose training
from spacr.io import prepare_cellpose_dataset
%matplotlib inline

# @title Prepare Cellpose Dataset Settings { display-mode: "form" }

input_root = "path"  # @param {type:"string"}  # (path) Path to the dataset root folder
augment_data = True  # @param {type:"boolean"}  # (bool) Augment dataset by flipping and rotating images
train_fraction = 0.8  # @param {type:"number"}  # (float) Fraction of data to use for training
n_jobs = None  # @param {type:"raw"}  # (int or None) Number of parallel jobs (None = auto)

prepare_cellpose_dataset(
    input_root=input_root,
    augment_data=augment_data,
    train_fraction=train_fraction,
    n_jobs=n_jobs
)


In [None]:
# Description: Train a Cellpose model
from spacr.submodules import train_cellpose
%matplotlib inline

# @title Train Cellpose Settings { display-mode: "form" }

src = "path"  # @param {type:"string"}  # (path) path to training dataset
test = False  # @param {type:"boolean"}  # (bool) Run in test mode
normalize = False  # @param {type:"boolean"}  # (bool) Normalize images
percentiles = None  # @param {type:"raw"}  # (tuple or None) Percentiles to normalize to
invert = False  # @param {type:"boolean"}  # (bool) Invert image intensities
grayscale = True  # @param {type:"boolean"}  # (bool) Convert to grayscale
rescale = False  # @param {type:"boolean"}  # (bool) Rescale intensities
circular = False  # @param {type:"boolean"}  # (bool) Assume circular objects
channels = [0, 0]  # @param {type:"raw"}  # (list) Channels used for segmentation
model_name = "test"  # @param {type:"string"}  # (string) Name for saving model
model_type = "cyto"  # @param ["cyto", "nuclei", "cyto2"] {type:"string"}  # (string) Type of Cellpose model
Signal_to_noise = 10  # @param {type:"integer"}  # (int) Signal to noise ratio
background = 200  # @param {type:"integer"}  # (int) Background intensity
remove_background = False  # @param {type:"boolean"}  # (bool) Remove background
learning_rate = 0.2  # @param {type:"number"}  # (float) Learning rate
weight_decay = 1e-05  # @param {type:"number"}  # (float) Weight decay
batch_size = 8  # @param {type:"integer"}  # (int) Batch size
n_epochs = 25000  # @param {type:"integer"}  # (int) Number of training epochs
from_scratch = False  # @param {type:"boolean"}  # (bool) Train from scratch
diameter = 30  # @param {type:"integer"}  # (int) Expected diameter of objects
resize = False  # @param {type:"boolean"}  # (bool) Resize images before training
target_dimensions = 1000  # @param {type:"integer"}  # (int) Target image dimension after resizing
verbose = True  # @param {type:"boolean"}  # (bool) Verbose output

settings = {
    'src': src,
    'test': test,
    'normalize': normalize,
    'percentiles': percentiles,
    'invert': invert,
    'grayscale': grayscale,
    'rescale': rescale,
    'circular': circular,
    'channels': channels,
    'model_name': model_name,
    'model_type': model_type,
    'Signal_to_noise': Signal_to_noise,
    'background': background,
    'remove_background': remove_background,
    'learning_rate': learning_rate,
    'weight_decay': weight_decay,
    'batch_size': batch_size,
    'n_epochs': n_epochs,
    'from_scratch': from_scratch,
    'diameter': diameter,
    'resize': resize,
    'target_dimensions': target_dimensions,
    'verbose': verbose
}

train_cellpose(settings)
