# This example shows how to run Redesics on imaging-based ST data

## Load required packages for Redesics

In [1]:
import scanpy as sc
import numpy as np
import pandas as pd
import Redesics

In [2]:
# Set device to 'cuda' if GPU available, otherwise 'cpu'
device='cuda'

## Load scRNA-seq reference and ST data

In [3]:
# The ST data refer to a Mouse Ileum MERFISH dataset
# include three columns (x, y, gene)
st_data = pd.read_csv("example_data/st_data.csv")
st_data


Unnamed: 0,x,y,gene
0,1705,1271,Maoa
1,1725,1922,Maoa
2,1753,1863,Maoa
3,1760,1865,Maoa
4,1904,794,Maoa
...,...,...,...
819660,5704,38,Hrh1
819661,5685,43,Htr4
819662,5631,61,Taar6
819663,5720,62,Taar7a


In [4]:
# The scRNA-seq data refer to SCP1038 from https://singlecell.broadinstitute.org/single_cell
# We randomly selected 10000 cells for reducing file size of sc_data.h5ad in example_data folder
sc_data = sc.read("example_data/sc_data.h5ad")
sc_data.var_names_make_unique()
sc_data.obs['annotation'].value_counts()

TA                 3104
Stem cell          2073
Paneth cell        1012
B cell              946
Goblet cell         723
Enterocyte cell     721
T cell              337
Myocyte             172
Neuron              151
Fibroblast          146
Macrophage          133
Endocrine cell      131
Lymphatic           104
Vascular             88
Tuft cell            62
Glia                 51
Mfge8                34
ICC                   8
Microfold cell        4
Name: annotation, dtype: int64

In [5]:
# Get coordinate of transcriptes and genes
coordinate = np.asarray([st_data['x'], st_data['y']]).T
gene = list(st_data['gene'])

## Estimate Candidate Cell Locations

In [6]:
# Estimate Candidate Cell Locations
# use if __name__ == "__main__": to avoid implementing this function when multiprocessing
if __name__ == "__main__":
    
    model = Redesics.center_selection(coordinate, 
                                      prior_count=100, 
                                      subsample=1, 
                                      quantile_cutoff = 0.99,
                                      worker_number=4, 
                                      device=device).to(device)

    estimated_cell_locations = model.train(epochs=1000)
    
    


Estimating Candidate Cell Locations ...

  Initialing data ...
  Epoch: 0 Loss: 0.048934752121567726
  Epoch: 20 Loss: 0.03332716559711844
  Epoch: 40 Loss: 0.023686088430229575
  Epoch: 60 Loss: 0.019870332268998026
  Epoch: 80 Loss: 0.017215412722434847
  Epoch: 100 Loss: 0.014902363598812371
  Epoch: 120 Loss: 0.012777069810777903
  Epoch: 140 Loss: 0.011083917499985546
  Epoch: 160 Loss: 0.009532554323785008
  Epoch: 180 Loss: 0.008266610512277112
  Epoch: 200 Loss: 0.007225707929581404
  Epoch: 220 Loss: 0.006382292398135178
  Epoch: 240 Loss: 0.0056716911832336335
  Epoch: 260 Loss: 0.005082011982565746
  Epoch: 280 Loss: 0.004569826135179028
  Epoch: 300 Loss: 0.004174549013958312
  Epoch: 320 Loss: 0.0038213983796304093
  Epoch: 340 Loss: 0.0035324870003969407
  Epoch: 360 Loss: 0.0032893650371115654
  Epoch: 380 Loss: 0.003055556320992764
  Epoch: 400 Loss: 0.002851004906813614
  Epoch: 420 Loss: 0.0027051096815266648
  Epoch: 440 Loss: 0.002567238706978969
  Epoch: 460 Loss:

## Run Redesics for cell segmentation and get output files

In [7]:
model = Redesics.segmentation(coordinate, 
                              gene,
                              estimated_cell_locations,
                              sc_data,
                              prior_count = 100, 
                              worker_number = 4, 
                              quantile_cutoff = 0.9999,
                              output_dir = "test_output",
                              device = device).to(device)

model.train(epochs=500)



Running Redesics ...

Initialing data ...

Cell type distribution of sc/snRNA:
TA                 1972
Stem cell          1463
Paneth cell         787
Enterocyte cell     674
B cell              638
Goblet cell         560
T cell              243
Endocrine cell      126
Myocyte             123
Neuron              106
Macrophage           99
Fibroblast           90
Vascular             65
Lymphatic            53
Tuft cell            51
Glia                 34
Mfge8                20
ICC                   8
Microfold cell        3
Name: annotation, dtype: int64
Intersection Gene: 214

Auto select batch size: 16394

  Begin training ...
  Epoch: 1/500 	Segmentation Loss: 0.22752407 	Deconvolution Loss: 0.5090062
  Epoch: 2/500 	Segmentation Loss: 0.020084172 	Deconvolution Loss: 0.50118244
  Epoch: 3/500 	Segmentation Loss: 0.01258281 	Deconvolution Loss: 0.49448603
  Epoch: 4/500 	Segmentation Loss: 0.022121958 	Deconvolution Loss: 0.49108115
  Epoch: 5/500 	Segmentation Loss: 0.036466

  Epoch: 97/500 	Segmentation Loss: 0.03392844 	Deconvolution Loss: 0.16421348
  Epoch: 98/500 	Segmentation Loss: 0.032842916 	Deconvolution Loss: 0.1637594
  Epoch: 99/500 	Segmentation Loss: 0.033131234 	Deconvolution Loss: 0.16289698
  Epoch: 100/500 	Segmentation Loss: 0.032588694 	Deconvolution Loss: 0.16348366
  Epoch: 101/500 	Segmentation Loss: 0.032319654 	Deconvolution Loss: 0.16253825
  Epoch: 102/500 	Segmentation Loss: 0.03239492 	Deconvolution Loss: 0.16122968
  Epoch: 103/500 	Segmentation Loss: 0.032277524 	Deconvolution Loss: 0.16080463
  Epoch: 104/500 	Segmentation Loss: 0.032088533 	Deconvolution Loss: 0.161157
  Epoch: 105/500 	Segmentation Loss: 0.03157562 	Deconvolution Loss: 0.16019566
  Epoch: 106/500 	Segmentation Loss: 0.03224295 	Deconvolution Loss: 0.15901133
  Epoch: 107/500 	Segmentation Loss: 0.03147854 	Deconvolution Loss: 0.15898477
  Epoch: 108/500 	Segmentation Loss: 0.031110102 	Deconvolution Loss: 0.15872253
  Epoch: 109/500 	Segmentation Loss: 0.

  Epoch: 199/500 	Segmentation Loss: 0.0259659 	Deconvolution Loss: 0.13356192
  Epoch: 200/500 	Segmentation Loss: 0.025387652 	Deconvolution Loss: 0.13432305
  Epoch: 201/500 	Segmentation Loss: 0.025249764 	Deconvolution Loss: 0.13355476
  Epoch: 202/500 	Segmentation Loss: 0.025062088 	Deconvolution Loss: 0.13257301
  Epoch: 203/500 	Segmentation Loss: 0.025133621 	Deconvolution Loss: 0.13347426
  Epoch: 204/500 	Segmentation Loss: 0.025336012 	Deconvolution Loss: 0.13256472
  Epoch: 205/500 	Segmentation Loss: 0.024847543 	Deconvolution Loss: 0.1327629
  Epoch: 206/500 	Segmentation Loss: 0.024927517 	Deconvolution Loss: 0.13186362
  Epoch: 207/500 	Segmentation Loss: 0.025051508 	Deconvolution Loss: 0.1326993
  Epoch: 208/500 	Segmentation Loss: 0.025320107 	Deconvolution Loss: 0.13181715
  Epoch: 209/500 	Segmentation Loss: 0.025405215 	Deconvolution Loss: 0.1329537
  Epoch: 210/500 	Segmentation Loss: 0.025084743 	Deconvolution Loss: 0.13263533
  Epoch: 211/500 	Segmentation Lo

  Epoch: 301/500 	Segmentation Loss: 0.02262657 	Deconvolution Loss: 0.12445705
  Epoch: 302/500 	Segmentation Loss: 0.023297817 	Deconvolution Loss: 0.12411224
  Epoch: 303/500 	Segmentation Loss: 0.022558514 	Deconvolution Loss: 0.12366513
  Epoch: 304/500 	Segmentation Loss: 0.022053996 	Deconvolution Loss: 0.12383754
  Epoch: 305/500 	Segmentation Loss: 0.022158641 	Deconvolution Loss: 0.124861374
  Epoch: 306/500 	Segmentation Loss: 0.02261035 	Deconvolution Loss: 0.123625904
  Epoch: 307/500 	Segmentation Loss: 0.021978373 	Deconvolution Loss: 0.12313556
  Epoch: 308/500 	Segmentation Loss: 0.022321157 	Deconvolution Loss: 0.123741746
  Epoch: 309/500 	Segmentation Loss: 0.022548942 	Deconvolution Loss: 0.12325858
  Epoch: 310/500 	Segmentation Loss: 0.022326335 	Deconvolution Loss: 0.123565696
  Epoch: 311/500 	Segmentation Loss: 0.02260096 	Deconvolution Loss: 0.12394602
  Epoch: 312/500 	Segmentation Loss: 0.022401929 	Deconvolution Loss: 0.123571366
  Epoch: 313/500 	Segmenta

  Epoch: 403/500 	Segmentation Loss: 0.021383405 	Deconvolution Loss: 0.1205675
  Epoch: 404/500 	Segmentation Loss: 0.021060323 	Deconvolution Loss: 0.1201844
  Epoch: 405/500 	Segmentation Loss: 0.0208814 	Deconvolution Loss: 0.12021107
  Epoch: 406/500 	Segmentation Loss: 0.020989208 	Deconvolution Loss: 0.120710544
  Epoch: 407/500 	Segmentation Loss: 0.020953316 	Deconvolution Loss: 0.121224925
  Epoch: 408/500 	Segmentation Loss: 0.021270322 	Deconvolution Loss: 0.12081684
  Epoch: 409/500 	Segmentation Loss: 0.021368712 	Deconvolution Loss: 0.12077163
  Epoch: 410/500 	Segmentation Loss: 0.021324366 	Deconvolution Loss: 0.12113953
  Epoch: 411/500 	Segmentation Loss: 0.021111019 	Deconvolution Loss: 0.11987453
  Epoch: 412/500 	Segmentation Loss: 0.020957692 	Deconvolution Loss: 0.12032653
  Epoch: 413/500 	Segmentation Loss: 0.02175167 	Deconvolution Loss: 0.1202722
  Epoch: 414/500 	Segmentation Loss: 0.02099424 	Deconvolution Loss: 0.120723695
  Epoch: 415/500 	Segmentation L

#### Parameter for Redesics segmentation ####

coordinate : Numpy Array, required
    The coordinate of transcripts. dimention: [Transcrips number, 2]
            
gene : list, required
    Gene name of transcrips

candidate_cell_center : Numpy Array, required
    Coordinates of candidate cell centers. This can be obtained by center_selection
    program.

sc_data : Scanpy AnnData, required
    Scanpy AnnData of single cell reference. Must include annotation information.
    Thus: sc_data.obs.annotation

prior_count : int, default=100 (important)
    Prior estimate of number of transcripts in each cells. No need to be precise

number_min_genes : int, default=10
    Minimun number of genes for single cells.
    sc.pp.filter_cells(sc_data, min_genes=number_min_genes)

number_cell_state : int, default=10
    Number of cell state for each cell type reference.

cell_count_cutoff : int, AutoSelect
    Minimun number of transcripts in each segmented cells.
    default to be prior_count/5.

k_number : int, default=100
    k_number for KNN-graph.

k_number_for_distance: int, default=8
    k_number for calculating distance score

quantile_cutoff : float, default=0.999
    Cutoff of the quantile function for distance distribution. Used for calculating 
    distance score

alphashape_value : int, default=5
    Alpha value for running alphashape program.

top_N_single_cell: int, default=20
    Number of single cell used for cell type inference and expression prediction.

worker_number : int, default=1
    Number of CPU cores.

output_dir : str, default="./"
    output directory 

device : str, default='cpu'
    Set device for running segmentation.