# Whole notebook in a nutshell

bash /scratch/iss_decoding/nf/workflow-assign-peaks-to-cells/local_run.sh \
    /scratch/tl10/assign/assign.yaml

In [3]:
!cat /scratch/tl10/assign/assign.yaml

tsv : /scratch/iss_decoding/data/assign/in/assign.csv
target_col : Name
separator : '\\t'
n_gene_min : 4
out_dir : ./out/

sif_assignment : /scratch/iss_decoding/sifs/assignment.sif


# Or step by step ...

In [1]:
import napari

In [2]:
viewer = napari.Viewer()



In [12]:
from bin.label_to_shapely import get_shapely
from bin.str_indexing import get_STRtree_per_channel
from bin.assign import assign
import tifffile as tf
import pandas as pd

In [2]:
lab_img = tf.imread(f"./out/cellpose_segmentation.tif")
decoded = pd.read_csv(f"./out/EMBL_training_mouse_brain_decoded_df.tsv", sep="\t")

In [8]:
viewer.add_labels(lab_img)

<Labels layer 'lab_img [1]' at 0x7f47383b37f0>

In [3]:
decoded

Unnamed: 0,Name,Code,Probability,y_int,x_int,index_code,axis,R0_C0,R1_C0,R2_C0,...,R2_C2,R3_C2,R4_C2,R5_C2,R0_C3,R1_C3,R2_C3,R3_C3,R4_C3,R5_C3
0,background,0000,0.964294,1,153,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Fos,CTGCAC,0.158820,2,829,132141.0,1,0,0,5,...,2,3,0,0,0,0,0,0,0,0
2,Bdnf,TTCGTC,0.269279,1,969,331231.0,1,0,0,0,...,2,0,0,9,898,5,0,0,0,0
3,Trh,CCATAA,0.524485,1,1044,114344.0,1,0,0,45,...,4,0,0,0,0,0,0,0,0,0
4,Pdyn,CGGACA,0.797838,2,2087,122414.0,1,0,0,4,...,0,0,4,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32238,background,0000,0.964294,4110,1043,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32239,background,0000,0.964294,4110,1162,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32240,background,0000,0.964294,4110,1583,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32241,background,0000,0.964294,4110,1788,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
viewer.add_points(decoded[['y_int', 'x_int']])

<Points layer 'Points' at 0x7f48cc79b7c0>

In [4]:
lab_shapely, _ = get_shapely(lab_img)

In [5]:
decoded = decoded[decoded.Probability > 0.9]

In [6]:
decoded

Unnamed: 0,Name,Code,Probability,y_int,x_int,index_code,axis,R0_C0,R1_C0,R2_C0,...,R2_C2,R3_C2,R4_C2,R5_C2,R0_C3,R1_C3,R2_C3,R3_C3,R4_C3,R5_C3
0,background,0000,0.964294,1,153,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Itgam,CTCGCG,0.905801,2,2553,131212.0,1,0,0,0,...,6,0,2,0,0,12,0,0,0,0
11,Slc6a3,TGAACC,0.926105,1,3598,324411.0,1,0,0,1,...,0,2,23,6,532,6,0,0,0,0
15,background,0000,0.964294,2,473,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
17,Neurod1,ATCCGA,0.945812,2,980,431124.0,1,23,0,0,...,31,39,0,0,0,4,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32238,background,0000,0.964294,4110,1043,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32239,background,0000,0.964294,4110,1162,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32240,background,0000,0.964294,4110,1583,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32241,background,0000,0.964294,4110,1788,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
viewer.add_points(decoded[['y_int', 'x_int']], name= 'high_prob_RNAs', face_color="red")

<Points layer 'high_prob_RNAs' at 0x7f48cc0ceb50>

In [7]:
decoded.columns

Index(['Name', 'Code', 'Probability', 'y_int', 'x_int', 'index_code', 'axis',
       'R0_C0', 'R1_C0', 'R2_C0', 'R3_C0', 'R4_C0', 'R5_C0', 'R0_C1', 'R1_C1',
       'R2_C1', 'R3_C1', 'R4_C1', 'R5_C1', 'R0_C2', 'R1_C2', 'R2_C2', 'R3_C2',
       'R4_C2', 'R5_C2', 'R0_C3', 'R1_C3', 'R2_C3', 'R3_C3', 'R4_C3', 'R5_C3'],
      dtype='object')

In [8]:
decoded

Unnamed: 0,Name,Code,Probability,y_int,x_int,index_code,axis,R0_C0,R1_C0,R2_C0,...,R2_C2,R3_C2,R4_C2,R5_C2,R0_C3,R1_C3,R2_C3,R3_C3,R4_C3,R5_C3
0,background,0000,0.964294,1,153,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Itgam,CTCGCG,0.905801,2,2553,131212.0,1,0,0,0,...,6,0,2,0,0,12,0,0,0,0
11,Slc6a3,TGAACC,0.926105,1,3598,324411.0,1,0,0,1,...,0,2,23,6,532,6,0,0,0,0
15,background,0000,0.964294,2,473,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
17,Neurod1,ATCCGA,0.945812,2,980,431124.0,1,23,0,0,...,31,39,0,0,0,4,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32238,background,0000,0.964294,4110,1043,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32239,background,0000,0.964294,4110,1162,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32240,background,0000,0.964294,4110,1583,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32241,background,0000,0.964294,4110,1788,,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
str_trees = get_STRtree_per_channel(decoded, ch_col_name="name",x_col="x_int", y_col="y_int")

In [10]:
str_trees['Ache'].geometries

array([<POINT (903 46)>, <POINT (1758 57)>, <POINT (2568 82)>,
       <POINT (1846 149)>, <POINT (1624 157)>, <POINT (2564 331)>,
       <POINT (2240 352)>, <POINT (2876 387)>, <POINT (1053 487)>,
       <POINT (3956 580)>, <POINT (1646 587)>, <POINT (918 598)>,
       <POINT (3254 597)>, <POINT (1933 611)>, <POINT (2557 613)>,
       <POINT (1809 666)>, <POINT (1418 674)>, <POINT (3035 674)>,
       <POINT (3965 697)>, <POINT (2835 713)>, <POINT (875 723)>,
       <POINT (1334 734)>, <POINT (3807 750)>, <POINT (2389 807)>,
       <POINT (3718 808)>, <POINT (2419 819)>, <POINT (2086 860)>,
       <POINT (2650 864)>, <POINT (1899 870)>, <POINT (878 895)>,
       <POINT (2765 934)>, <POINT (2342 1011)>, <POINT (1567 1012)>,
       <POINT (1519 1036)>, <POINT (872 1042)>, <POINT (917 1059)>,
       <POINT (857 1078)>, <POINT (880 1089)>, <POINT (863 1106)>,
       <POINT (909 1106)>, <POINT (3218 1120)>, <POINT (1478 1146)>,
       <POINT (1196 1151)>, <POINT (1160 1225)>, <POINT (2298 12

In [11]:
lab_shapely

{1: <MULTIPOLYGON (((58 0, 58 2, 57 3, 57 24, 56 25, 56 26, 57 27, 57 37, 58 38,...>,
 2: <MULTIPOLYGON (((718 0, 718 1, 717 2, 717 5, 718 6, 718 9, 719 10, 719 11, 7...>,
 3: <MULTIPOLYGON (((859 0, 859 2, 858 3, 858 6, 859 7, 859 10, 860 11, 860 12, ...>,
 4: <MULTIPOLYGON (((927 0, 927 1, 926 2, 926 3, 925 4, 925 7, 924 8, 924 11, 92...>,
 5: <MULTIPOLYGON (((2158 0, 2158 2, 2157 3, 2157 5, 2158 6, 2158 9, 2159 10, 21...>,
 6: <MULTIPOLYGON (((2483 0, 2483 3, 2482 4, 2482 8, 2481 9, 2481 11, 2482 12, 2...>,
 7: <MULTIPOLYGON (((2979 0, 2979 4, 2980 5, 2980 8, 2981 9, 2981 10, 2982 11, 2...>,
 8: <MULTIPOLYGON (((3031 0, 3031 1, 3032 2, 3032 7, 3033 8, 3033 12, 3034 13, 3...>,
 9: <MULTIPOLYGON (((3438 0, 3438 1, 3437 2, 3437 3, 3436 4, 3436 5, 3435 6, 343...>,
 10: <MULTIPOLYGON (((3488 0, 3488 1, 3489 2, 3489 4, 3490 5, 3490 6, 3492 8, 349...>,
 11: <MULTIPOLYGON (((3647 0, 3647 1, 3646 2, 3646 4, 3645 5, 3645 8, 3644 9, 364...>,
 12: <MULTIPOLYGON (((3709 0, 3707 2, 3706 2, 3706 5

In [41]:
def assign(trees, cells):

    spot_counts = {}
    cell_centroids = {}
    ys, xs, chs, cell_indexes = [], [], [], []
    for cell_index in cells:
        cell = cells[cell_index]
        if cell.centroid.is_empty: # ignore the shapes that don't have centoird
           continue
        cell_centroids[cell_index] = {"y": cell.centroid.y, "x": cell.centroid.x}

        current_counts = {}
        for ch in trees:
            print(ch)
            potential_inside = trees[ch].query(cell)
            print(potential_inside)
            true_in = [trees[ch].geometries[i] for i in potential_inside if cell.is_valid and cell.contains(trees[ch].geometries[i])]
            for sp in true_in:
                ys.append(sp.y)
                xs.append(sp.x)
                chs.append(ch)
                cell_indexes.append(cell_index)
            current_counts[ch] = len(true_in)
        spot_counts[cell_index] = current_counts
        del cell
        del current_counts
    return 

In [13]:
count_df, centroid_df = assign(str_trees, lab_shapely)

In [14]:
import anndata as ad

In [31]:
adata = ad.AnnData(count_df.loc[:, ~count_df.columns.isin(['background', 'infeasible'])])



In [32]:
adata

AnnData object with n_obs × n_vars = 1493 × 43

In [33]:
adata.obsm['spatial'] = centroid_df[['x', 'y']].values
adata.obs['sample'] = "mouse_brain"

In [34]:
# Remove cells with no mRNA
adata.obs['total_counts'] = adata.X.sum(1)
adata.obs['n_genes_by_counts'] = (adata.X > 0).sum(1)

In [35]:
adata.obs

Unnamed: 0,sample,total_counts,n_genes_by_counts
1,mouse_brain,0,0
2,mouse_brain,3,2
3,mouse_brain,0,0
4,mouse_brain,6,5
5,mouse_brain,0,0
...,...,...,...
1489,mouse_brain,0,0
1490,mouse_brain,0,0
1491,mouse_brain,0,0
1492,mouse_brain,0,0


In [36]:
adata = adata[adata.obs.n_genes_by_counts > 0, :]

In [37]:
adata

View of AnnData object with n_obs × n_vars = 638 × 43
    obs: 'sample', 'total_counts', 'n_genes_by_counts'
    obsm: 'spatial'

In [38]:
adata.obs

Unnamed: 0,sample,total_counts,n_genes_by_counts
2,mouse_brain,3,2
4,mouse_brain,6,5
6,mouse_brain,3,3
11,mouse_brain,1,1
12,mouse_brain,1,1
...,...,...,...
1259,mouse_brain,3,1
1261,mouse_brain,2,1
1262,mouse_brain,1,1
1263,mouse_brain,1,1
