In [1]:
%matplotlib inline

# Understand Bering object
This tutorial shows the data structure of Bering object. We will use Nanostring CosMx NSCLC (He et al., 2022) data as an example.

### Import packages & data

In [2]:
import numpy as np
import pandas as pd
import tifffile as tiff
import matplotlib.pyplot as plt 

import Bering as br

In [3]:
# load data
df_spots_seg = pd.read_csv('spots_seg.txt', sep='\t', header=0, index_col=0)
df_spots_unseg = pd.read_csv('spots_unseg.txt', sep='\t', header=0, index_col=0)
img = tiff.imread('image.tif')
channels = ['Nuclei', 'PanCK', 'Membrane']

### Visualize spots
Spots data include spots that are segmented (annotated) and unsegmented (without cell annotations).

In [4]:
df_spots_seg.head() # visualize the segmented data

Unnamed: 0,x,y,z,features,segmented,components,fov,labels
13342297,347.82858,48.57143,4,S100P,1,Cytoplasm,10,tumor
13342298,325.1,42.125,4,RAC1,1,Membrane,10,tumor
13342299,344.16248,43.912502,4,ITGB1,1,Cytoplasm,10,tumor
13342300,327.53,48.96,4,GSTP1,1,Cytoplasm,10,tumor
13342301,353.9625,19.949999,4,CLDN4,1,Membrane,10,tumor


In [5]:
# x, y, z are 3d coordinates of the transcripts on the image. 
print(f'min x, y, z: {df_spots_seg[["x", "y", "z"]].min().values}')
print(f'max x, y, z: {df_spots_seg[["x", "y", "z"]].max().values}')

min x, y, z: [ 9.51666641 10.775      -1.        ]
max x, y, z: [5461.38769531 3637.5612793     8.        ]


In [6]:
# columm "features" contains the names of the genes for individual transcripts
df_spots_seg['features'].value_counts()

MZT2A       58704
DUSP5       55854
MALAT1      29428
HSPA1A      18811
OLFM4       17501
            ...  
CHGA           95
ARG1           91
ADGRE1         88
NegPrb19       87
NegPrb15       87
Name: features, Length: 980, dtype: int64

In [7]:
# columns "segmented" and "labels" represent the segmented cell ids and cell type annotations. 
# This is the ground truth for the cell segmention and cell type classification task.
print(df_spots_seg['segmented'].value_counts())
print(df_spots_seg['labels'].value_counts())

2669    2002
975     1917
1330    1889
934     1615
559     1490
        ... 
2445      20
2779      20
3133      20
2647      20
630       20
Name: segmented, Length: 3129, dtype: int64
tumor          480533
myeloid        246703
fibroblast     102280
endothelial     92674
epithelial      75477
B               62654
T               29920
pDC             17912
NK              12644
Name: labels, dtype: int64


In [8]:
df_spots_unseg.head() # visualize unsegmented data

Unnamed: 0,components,features,fov,x,y,z
13131760,Membrane,S100A10,10,4667.9624,225.46251,4
13131761,0,FGR,10,5063.0,974.55005,4
13131762,0,IL4R,10,3770.371,1280.6572,4
13131763,Membrane,S100A10,10,4671.217,1350.0834,4
13131764,0,IGHA1,10,2105.9167,2135.4167,4


### Create Bering object

In [9]:
bg = br.BrGraph(df_spots_seg, df_spots_unseg, img, channels)
bg

<Bering.objects.bering.Bering_Graph at 0x2ba02b692460>

### Cell metadata

In [10]:
bg.segmented.head() # visualize the metadata of the segmented cells

Unnamed: 0_level_0,cx,cy,dx,dy,d,labels
segmented,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,330.050018,36.464283,78.366302,61.139999,78.366302,tumor
1,630.05777,30.633572,113.07495,53.514284,113.07495,endothelial
2,773.12738,35.350002,61.014648,48.891116,61.014648,epithelial
3,1950.387451,21.155557,59.865234,25.408331,59.865234,myeloid
4,2082.986694,23.441666,60.033447,27.858335,60.033447,myeloid


### Feature (gene) metadata

In [11]:
bg.features.head() # visualize the metadata of the features (genes)

Unnamed: 0_level_0,counts
features,Unnamed: 1_level_1
AATK,340
ABL1,327
ABL2,319
ACE,301
ACE2,233


### Other information

In [12]:
print(f'Number of transcripts: {bg.n_transcripts}')
print(f'Number of features: {bg.n_features}')
print(f'Number of cells: {bg.n_segmented}')
print(f'Number of labels: {bg.n_labels}')

print(f'minimal x, y coordinates: {bg.XMIN}, {bg.YMIN}')
print(f'maximal x, y coordinates: {bg.XMAX}, {bg.YMAX}')

print(f'Device of Bering object: {bg.device}')

Number of transcripts: 1331334
Number of features: 980
Number of cells: 3129
Number of labels: 10
minimal x, y coordinates: 9.51666641235352, 10.775000000000093
maximal x, y coordinates: 5461.54296875, 3637.561279296875
Device of Bering object: cuda
