# Install Necessary Packages and Load Data

In [None]:
!pip install iMAP scanpy loompy

Collecting iMAP
  Downloading imap-1.0.0.tar.gz (77 kB)
[K     |████████████████████████████████| 77 kB 3.2 MB/s 
[?25hCollecting scanpy
  Downloading scanpy-1.9.1-py3-none-any.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 33.9 MB/s 
[?25hCollecting loompy
  Downloading loompy-3.0.7.tar.gz (4.8 MB)
[K     |████████████████████████████████| 4.8 MB 41.0 MB/s 
Collecting annoy
  Downloading annoy-1.17.0.tar.gz (646 kB)
[K     |████████████████████████████████| 646 kB 56.6 MB/s 
Collecting umap-learn>=0.3.10
  Downloading umap-learn-0.5.3.tar.gz (88 kB)
[K     |████████████████████████████████| 88 kB 6.1 MB/s 
[?25hCollecting session-info
  Downloading session_info-1.0.0.tar.gz (24 kB)
Collecting matplotlib>=3.4
  Downloading matplotlib-3.5.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.2 MB)
[K     |████████████████████████████████| 11.2 MB 47.7 MB/s 
Collecting anndata>=0.7.4
  Downloading anndata-0.8.0-py3-none-any.whl (96 kB)
[K     |█████████████

In [None]:
!pip install git+https://github.com/theislab/squidpy # install squidpy for cell-cell communication analysis

Collecting git+https://github.com/theislab/squidpy
  Cloning https://github.com/theislab/squidpy to /tmp/pip-req-build-fykcla50
  Running command git clone -q https://github.com/theislab/squidpy /tmp/pip-req-build-fykcla50
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting scikit-image>=0.19
  Downloading scikit_image-0.19.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (13.5 MB)
[K     |████████████████████████████████| 13.5 MB 5.2 MB/s 
[?25hCollecting numba>=0.52.0
  Downloading numba-0.55.1-1-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 38.6 MB/s 
[?25hCollecting docrep>=0.3.1
  Downloading docrep-0.3.2.tar.gz (33 kB)
Collecting validators>=0.18.2
  Downloading validators-0.19.0.tar.gz (30 kB)
Collecting matplotlib-scaleba

In [None]:
import scanpy as sc
import numpy as np
import imap
import squidpy as sq

  from .collection import imread_collection_wrapper


In [None]:
adata = sc.read_h5ad('/content/drive/MyDrive/data/crc_concat.h5ad')

In [None]:
adata

AnnData object with n_obs × n_vars = 53018 × 3892
    obs: 'ClusterID', 'ClusterName', 'Global_Cluster', 'Platform', 'Sample', 'Sub_Cluster', 'Sub_ClusterID', 'Tissue', 'n_genes', 'n_counts', 'batch', 'P_batch'
    var: 'Selected-0', 'vst_mean-0', 'vst_variable-0', 'vst_variance-0', 'vst_variance_expected-0', 'vst_variance_standardized-0', 'n_cells-0', 'highly_variable-0', 'means-0', 'dispersions-0', 'dispersions_norm-0', 'Selected-1', 'vst_mean-1', 'vst_variable-1', 'vst_variance-1', 'vst_variance_expected-1', 'vst_variance_standardized-1', 'n_cells-1', 'highly_variable-1', 'means-1', 'dispersions-1', 'dispersions_norm-1', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'hvg'
    obsm: 'pca_cell_embeddings', 'umap_cell_embeddings'
    layers: 'norm_data', 'scale_data'

In [None]:
adata_10x = adata[adata.obs['Platform'] == '10X']
adata_smartseq2 = adata[adata.obs['Platform'] != '10X']

In [None]:
adata = sc.read_h5ad('/content/drive/MyDrive/CRC/crc_imap.h5ad')

# Calculate Cell-cell Communication Score

In [None]:
res = sq.gr.ligrec(
    adata_10x,
    n_perms=1000,
    cluster_key="celltype",
    copy=True,
    use_raw=False,
    transmitter_params={"categories": "ligand"},
    receiver_params={"categories": "receptor"}
)

In [None]:
res = sq.gr.ligrec(
    adata_smartseq2,
    n_perms=1000,
    cluster_key="celltype",
    copy=True,
    use_raw=False,
    transmitter_params={"categories": "ligand"},
    receiver_params={"categories": "receptor"}
)

In [None]:
res = sq.gr.ligrec(
    adata,
    n_perms=1000,
    cluster_key="celltype",
    copy=True,
    use_raw=False,
    transmitter_params={"categories": "ligand"},
    receiver_params={"categories": "receptor"}
)

array([[0.        , 0.3036845 , 0.        , ..., 0.        , 0.        ,
        3.55408144],
       [0.85271353, 2.18486834, 0.        , ..., 0.08563549, 0.        ,
        3.69033384],
       [1.0211699 , 0.67287207, 0.        , ..., 0.        , 0.        ,
        4.69783545],
       ...,
       [2.54793048, 1.6780293 , 0.        , ..., 0.        , 0.        ,
        4.14295197],
       [0.05602583, 1.5106163 , 0.        , ..., 0.05602579, 0.        ,
        2.6630652 ],
       [2.34689569, 2.03533983, 0.        , ..., 0.        , 0.        ,
        2.36067057]])

In [None]:
adata = sc.read_loom('/content/drive/MyDrive/CRC/crc_seurat.loom', sparse=False)

In [None]:
adata.write_h5ad('/content/drive/MyDrive/CRC/crc_seurat.h5ad')

In [None]:
adata = sc.read_h5ad('/content/drive/MyDrive/CRC/crc_seurat.h5ad')

In [None]:
adata.X

array([[0.        , 0.5672975 , 0.        , ..., 0.        , 0.        ,
        3.5560186 ],
       [0.79927653, 1.3627195 , 0.        , ..., 0.        , 0.        ,
        3.7273355 ],
       [0.5230687 , 0.1786391 , 0.        , ..., 0.        , 0.        ,
        4.165626  ],
       ...,
       [2.5479305 , 1.6780293 , 0.        , ..., 0.        , 0.        ,
        4.142952  ],
       [0.05602583, 1.5106163 , 0.        , ..., 0.05602579, 0.        ,
        2.6630652 ],
       [2.3468957 , 2.0353398 , 0.        , ..., 0.        , 0.        ,
        2.3606706 ]], dtype=float32)

In [None]:
sq.pl.ligrec(res, alpha=0.005, save='crc_ligen_show.pdf') # draw the plot

Output hidden; open in https://colab.research.google.com to view.

# Calculate Accuracy

In [10]:
import pandas as pd

In [11]:
pairs_true = pd.read_csv('/content/drive/MyDrive/CRC/crc_smartseq_original.csv')

In [16]:
t1

array([[False, False, False, ..., False, False, False],
       [False, False, False, ...,  True,  True,  True],
       [False, False, False, ...,  True,  True,  True],
       ...,
       [False, False, False, ..., False, False,  True],
       [ True,  True,  True, ...,  True, False, False],
       [False, False, False, ...,  True,  True,  True]])

In [12]:
t1 = (pairs_true.loc[2:,list(pairs_true.columns)[2:]].values.astype('float'))<0.05

In [14]:
pair_awgan = pd.read_csv('/content/drive/MyDrive/CRC/crc_10x_awgan_correct.csv')

In [15]:
pair_awgan

Unnamed: 0,cluster_1,Unnamed: 1,B cell,B cell.1,B cell.2,B cell.3,B cell.4,CD4 T cell,CD4 T cell.1,CD4 T cell.2,...,ILC,ILC.1,ILC.2,ILC.3,ILC.4,Myeloid cell,Myeloid cell.1,Myeloid cell.2,Myeloid cell.3,Myeloid cell.4
0,cluster_2,,B cell,CD4 T cell,CD8 T cell,ILC,Myeloid cell,B cell,CD4 T cell,CD8 T cell,...,B cell,CD4 T cell,CD8 T cell,ILC,Myeloid cell,B cell,CD4 T cell,CD8 T cell,ILC,Myeloid cell
1,source,target,,,,,,,,,...,,,,,,,,,,
2,FYN,JAK2,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0
3,CCL2,JAK2,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,0.983,1.0,1.0,1.0,0.0
4,KIT,JAK2,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,0.976,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1012,CD200,CD200R1,0.005,0.0,0.0,0.0,0.124,0.0,0.0,0.0,...,1.0,0.999,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0
1013,CD200R1,CD200,0.005,0.0,1.0,1.0,1.0,0.0,0.0,1.0,...,0.0,0.0,1.0,1.0,1.0,0.124,0.0,1.0,1.0,1.0
1014,SERPINF1,PLXDC2,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,...,,,,,,1.0,1.0,1.0,1.0,0.0
1015,ADO,ADORA2A,0.258,0.0,0.0,1.0,1.0,0.01,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0


In [17]:
awgan_data = (pair_awgan.loc[2:,list(pair_awgan.columns)[2:]].values.astype('float'))<0.05

In [18]:
awgan_data

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False,  True],
       [False, False, False, ..., False, False,  True],
       ...,
       [False, False, False, ..., False, False,  True],
       [False,  True,  True, ...,  True, False, False],
       [False, False, False, ...,  True,  True,  True]])

In [19]:
pair_imap = pd.read_csv('/content/drive/MyDrive/CRC/crc_10x_imap_correct.csv')

In [20]:
pair_imap.shape

(1017, 27)

In [21]:
imap_data = (pair_imap.loc[2:,list(pair_imap.columns)[2:]].values.astype('float'))<0.05

In [None]:
# real count number: 1015*25

In [None]:
abs(float('nan') - float('nan'))<0.01

False

In [None]:
count = 0
for i in range(2, 1017):
  for j in list(pair_imap.columns)[2:]:
    if abs(float(pair_imap.loc[i,j]) - float(pairs_true.loc[i,j]))<0.01 or abs(float(pair_imap.loc[i,j]) - float(pairs_true.loc[i,j]))==float('nan'):
      count+=1
count/(1015*25)

0.5521576354679802

In [None]:
count = 0
for i in range(2, 1017):
  for j in list(pair_awgan.columns)[2:]:
    if abs(float(pair_awgan.loc[i,j]) - float(pairs_true.loc[i,j]))<0.01 or abs(float(pair_awgan.loc[i,j]) - float(pairs_true.loc[i,j]))==float('nan'):
      count+=1
count/(1015*25)

0.6364137931034483

In [None]:
count = 0
for i in range(0,1015):
  for j in range(0,25):
    if t1[i,j] == awgan_data[i,j]:
      count+=1
count/(1015*25)

0.8695960591133005

In [None]:
count = 0
for i in range(0,1015):
  for j in range(0,25):
    if t1[i,j] == imap_data[i,j]:
      count+=1
count/(1015*25)

0.7583054187192119

In [None]:
for i in range(2, 1017):
  for j in range(2,27):
    pairs_true.iloc[i,j] = float(pairs_true.iloc[i,j])

In [None]:
pair_seurat = res['pvalues']

In [None]:
pair_seurat.to_csv('/content/drive/MyDrive/CRC/crc_seurat.csv')

In [None]:
pair_seurat = pd.read_csv('/content/drive/MyDrive/CRC/crc_seurat.csv')

In [None]:
seurat_data = (pair_seurat.loc[2:,list(pair_seurat.columns)[2:]].values.astype('float'))<0.05

In [None]:
count = 0
for i in range(0,1015):
  for j in range(0,25):
    if t1[i,j] == seurat_data[i,j]:
      count+=1
count/(1015*25)

0.7970049261083744

# Calculate Precision and Recall

In [22]:
import pandas as pd

In [23]:
pairs_true = pd.read_csv('/content/drive/MyDrive/CRC/crc_smartseq_original.csv')

In [24]:
t1 = (pairs_true.loc[2:,list(pairs_true.columns)[2:]].values.astype('float'))<0.05

In [25]:
pair_awgan = pd.read_csv('/content/drive/MyDrive/CRC/crc_10x_awgan_correct.csv')

In [26]:
pair_awgan

Unnamed: 0,cluster_1,Unnamed: 1,B cell,B cell.1,B cell.2,B cell.3,B cell.4,CD4 T cell,CD4 T cell.1,CD4 T cell.2,...,ILC,ILC.1,ILC.2,ILC.3,ILC.4,Myeloid cell,Myeloid cell.1,Myeloid cell.2,Myeloid cell.3,Myeloid cell.4
0,cluster_2,,B cell,CD4 T cell,CD8 T cell,ILC,Myeloid cell,B cell,CD4 T cell,CD8 T cell,...,B cell,CD4 T cell,CD8 T cell,ILC,Myeloid cell,B cell,CD4 T cell,CD8 T cell,ILC,Myeloid cell
1,source,target,,,,,,,,,...,,,,,,,,,,
2,FYN,JAK2,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0
3,CCL2,JAK2,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,0.983,1.0,1.0,1.0,0.0
4,KIT,JAK2,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,0.976,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1012,CD200,CD200R1,0.005,0.0,0.0,0.0,0.124,0.0,0.0,0.0,...,1.0,0.999,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0
1013,CD200R1,CD200,0.005,0.0,1.0,1.0,1.0,0.0,0.0,1.0,...,0.0,0.0,1.0,1.0,1.0,0.124,0.0,1.0,1.0,1.0
1014,SERPINF1,PLXDC2,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,...,,,,,,1.0,1.0,1.0,1.0,0.0
1015,ADO,ADORA2A,0.258,0.0,0.0,1.0,1.0,0.01,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0


In [27]:
awgan_data = (pair_awgan.loc[2:,list(pair_awgan.columns)[2:]].values.astype('float'))<0.05

In [28]:
awgan_data

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False,  True],
       [False, False, False, ..., False, False,  True],
       ...,
       [False, False, False, ..., False, False,  True],
       [False,  True,  True, ...,  True, False, False],
       [False, False, False, ...,  True,  True,  True]])

In [29]:
pair_imap = pd.read_csv('/content/drive/MyDrive/CRC/crc_10x_imap_correct.csv')

In [30]:
pair_imap.shape

(1017, 27)

In [31]:
imap_data = (pair_imap.loc[2:,list(pair_imap.columns)[2:]].values.astype('float'))<0.05

In [37]:
pair_seurat = pd.read_csv('/content/drive/MyDrive/CRC/crc_seurat.csv')

In [38]:
seurat_data = (pair_seurat.loc[2:,list(pair_seurat.columns)[2:]].values.astype('float'))<0.05

array([[False, False, False, ..., False, False, False],
       [False, False, False, ...,  True,  True,  True],
       [False, False, False, ..., False,  True,  True],
       ...,
       [False, False, False, ..., False,  True,  True],
       [ True, False, False, ..., False, False, False],
       [False, False, False, ...,  True,  True,  True]])

In [42]:
# Precision

In [47]:
import numpy as np

In [60]:
# respan
count = 0
for i in range(0,1015):
  for j in range(0,25):
    if (t1[i,j] == awgan_data[i,j]) and (t1[i,j]==True):
      count+=1
count/np.sum(awgan_data*1)

0.7663841470693565

In [52]:
# iMAP
count = 0
for i in range(0,1015):
  for j in range(0,25):
    if (t1[i,j] == imap_data[i,j]) and (t1[i,j]==True):
      count+=1
count/np.sum(imap_data*1)

0.5779362815026153

In [53]:
# Seurat
count = 0
for i in range(0,1015):
  for j in range(0,25):
    if (t1[i,j] == seurat_data[i,j]) and (t1[i,j]==True):
      count+=1
count/np.sum(seurat_data*1)

0.8490013315579228

In [None]:
# Recall

In [None]:
import numpy as np

In [54]:
# respan
count = 0
dom = 0
for i in range(0,1015):
  for j in range(0,25):
    if (t1[i,j] == awgan_data[i,j]) and (t1[i,j]==True):
      count+=1
    if (awgan_data[i,j] == False) and (t1[i,j]==True):
      dom += 1
count/(count+dom)

0.8260422027792074

In [55]:
# imap
count = 0
dom = 0
for i in range(0,1015):
  for j in range(0,25):
    if (t1[i,j] == imap_data[i,j]) and (t1[i,j]==True):
      count+=1
    if (imap_data[i,j] == False) and (t1[i,j]==True):
      dom += 1
count/(count+dom)

0.7819094184251159

In [56]:
# seurat
count = 0
dom = 0
for i in range(0,1015):
  for j in range(0,25):
    if (t1[i,j] == seurat_data[i,j]) and (t1[i,j]==True):
      count+=1
    if (seurat_data[i,j] == False) and (t1[i,j]==True):
      dom += 1
count/(count+dom)

0.41019042717447246

In [None]:
# Recall

In [33]:
list(pair_imap.columns)[2:]

['B cell',
 'B cell.1',
 'B cell.2',
 'B cell.3',
 'B cell.4',
 'CD4 T cell',
 'CD4 T cell.1',
 'CD4 T cell.2',
 'CD4 T cell.3',
 'CD4 T cell.4',
 'CD8 T cell',
 'CD8 T cell.1',
 'CD8 T cell.2',
 'CD8 T cell.3',
 'CD8 T cell.4',
 'ILC',
 'ILC.1',
 'ILC.2',
 'ILC.3',
 'ILC.4',
 'Myeloid cell',
 'Myeloid cell.1',
 'Myeloid cell.2',
 'Myeloid cell.3',
 'Myeloid cell.4']

In [34]:
# real count number: 1015*25

In [36]:
abs(float('nan') - float('nan'))<0.01

False

In [None]:
count = 0
for i in range(0,1015):
  for j in range(0,25):
    if t1[i,j] == awgan_data[i,j]:
      count+=1
count/(1015*25)

0.8695960591133005

In [None]:
count = 0
for i in range(0,1015):
  for j in range(0,25):
    if t1[i,j] == imap_data[i,j]:
      count+=1
count/(1015*25)

0.7583054187192119

In [None]:
for i in range(2, 1017):
  for j in range(2,27):
    pairs_true.iloc[i,j] = float(pairs_true.iloc[i,j])

In [None]:
pair_seurat = res['pvalues']

In [None]:
pair_seurat.to_csv('/content/drive/MyDrive/CRC/crc_seurat.csv')

In [None]:
pair_seurat = pd.read_csv('/content/drive/MyDrive/CRC/crc_seurat.csv')

In [None]:
seurat_data = (pair_seurat.loc[2:,list(pair_seurat.columns)[2:]].values.astype('float'))<0.05

In [None]:
count = 0
for i in range(0,1015):
  for j in range(0,25):
    if t1[i,j] == seurat_data[i,j]:
      count+=1
count/(1015*25)

0.7970049261083744