### Cell-cell interaction analysis of MUC6 cells
Want to analyse cell-cell interactions with CellphoneDB
- with full counts disease MUC6 cells vs all
- with less counts (more cells) disease MUC6 cells vs all
--> in all cases downsample to set number of cells per donor per cell type

In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
import sys
import os
from collections import Counter

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import to_hex
import ast

In [2]:
import random

In [3]:
sc.logging.print_header()

scanpy==1.7.1 anndata==0.8.0 umap==0.4.6 numpy==1.20.1 scipy==1.6.1 pandas==1.2.3 scikit-learn==0.24.1 statsmodels==0.12.2 python-igraph==0.8.3 louvain==0.7.0 leidenalg==0.8.3


In [4]:
ad1 = sc.read_h5ad('/nfs/team205/ao15/Megagut/Annotations_v3/h5ad/pooled_healthy_disease.remapped.allgenes.fine_annot.no_doublets.20230322.h5ad')

In [5]:
ad1

AnnData object with n_obs × n_vars = 1358576 × 36601
    var: 'gene_ids', 'feature_type', 'mito', 'ribo', 'hb', 'cc', 'ig', 'tcr', 'n_counts-0', 'n_counts_raw-0', 'n_counts_spliced-0', 'n_counts_unspliced-0', 'n_cells-0', 'n_cells_raw-0', 'n_cells_spliced-0', 'n_cells_unspliced-0', 'n_counts-1', 'n_counts_raw-1', 'n_counts_spliced-1', 'n_counts_unspliced-1', 'n_cells-1', 'n_cells_raw-1', 'n_cells_spliced-1', 'n_cells_unspliced-1'
    uns: 'age_unified_colors', 'control_vs_disease_colors', 'disease_colors', 'level_1_annot_colors', 'level_3_annot_colors', 'neighbors', 'organ_groups_colors', 'organ_unified_colors', 'study_colors', 'umap'
    obsm: 'X_mde', 'X_scANVI', 'X_umap', '_scvi_extra_continuous_covs'
    obsp: 'connectivities', 'distances'

In [6]:
#function for downsampling
def downsample_mask(clustering, to_thin, n_cells=200):
    mask = np.array([i not in to_thin for i in clustering])
    numtemp = np.arange(len(clustering))
    for clus in to_thin:
        inds = set(numtemp[[i==clus for i in clustering]])
        keep = random.sample(inds, round(n_cells))
        mask[keep] = True
    return mask

In [7]:
ad1.obs = pd.read_csv('/nfs/team205/ao15/Megagut/Annotations_v3/metadata/pooled_healthy_disease.remapped.allgenes.fine_annot.no_doublets.20230322.csv',index_col=0)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [8]:
ad1

AnnData object with n_obs × n_vars = 1358576 × 36601
    obs: 'latent_cell_probability', 'latent_RT_efficiency', 'cecilia22_predH', 'cecilia22_predH_prob', 'cecilia22_predH_uncertain', 'cecilia22_predL', 'cecilia22_predL_prob', 'cecilia22_predL_uncertain', 'elmentaite21_pred', 'elmentaite21_pred_prob', 'elmentaite21_pred_uncertain', 'suo22_pred', 'suo22_pred_prob', 'suo22_pred_uncertain', 'n_counts', 'log1p_n_counts', 'n_genes', 'log1p_n_genes', 'percent_mito', 'n_counts_mito', 'percent_ribo', 'n_counts_ribo', 'percent_hb', 'n_counts_hb', 'percent_top50', 'n_counts_raw', 'log1p_n_counts_raw', 'n_genes_raw', 'log1p_n_genes_raw', 'percent_mito_raw', 'n_counts_mito_raw', 'percent_ribo_raw', 'n_counts_ribo_raw', 'percent_hb_raw', 'n_counts_hb_raw', 'percent_top50_raw', 'n_counts_spliced', 'log1p_n_counts_spliced', 'n_genes_spliced', 'log1p_n_genes_spliced', 'percent_mito_spliced', 'n_counts_mito_spliced', 'percent_ribo_spliced', 'n_counts_ribo_spliced', 'percent_hb_spliced', 'n_counts_hb_s

In [9]:
ad1.obs.disease.value_counts()

control                   489029
inutero                   333624
neighbouring_cancer       152292
cancer_gastric            101594
neighbouring_inflammed     77136
pediatric_IBD              67092
crohns_disease             48074
ulcerative_colitis         35891
cancer_colorectal          28684
neighbouring_polyps        15648
preterm                     9512
Name: disease, dtype: int64

In [10]:
ad1 = ad1[~ad1.obs.disease.isin(['inutero','preterm'])].copy()

In [11]:
ad1.X.max()

40699.0

In [12]:
sc.pp.normalize_per_cell(ad1, counts_per_cell_after=1e4)
#sc.pp.log1p(ad1) - only normalise for cellphoneDB, no log transform

In [13]:
ad1.X.max()

9702.602

In [14]:
#Prepare categories for downsampling (per donor_celltype combination)
ad1.obs['donor_celltype']=ad1.obs['donorID_unified'].astype(str)+'_'+ad1.obs['level_3_annot'].astype(str)
ad1.obs.donor_celltype.value_counts()

D55_Keratinocyte_stratified        14295
D56_Keratinocyte_stratified        11511
D58_Keratinocyte_stratified        10007
D54_Keratinocyte_stratified         8921
D11_Colonocyte                      7335
                                   ...  
D56_Pericyte                           1
D141_Pericyte                          1
D92_BEST4_enterocyte_colonocyte        1
D18_Macrophage_MMP9                    1
D87_T/NK_cycling                       1
Name: donor_celltype, Length: 7827, dtype: int64

In [15]:
#Downsample to 50 cells per cell type per donor
vc=ad1.obs.donor_celltype.value_counts().loc[lambda x: x>50].reset_index()['index']
mask_p = downsample_mask(ad1.obs['donor_celltype'], vc.tolist(), n_cells=50)
ad1 = ad1[mask_p]
ad1.obs.donor_celltype.value_counts()

D123_Cycling             50
Dpool3_Trm_CD8           50
D10_EC_venous            50
D127_Trm_CD8             50
D41_EC_venous            50
                         ..
D121_Macrophage_LYVE1     1
D52_Enteroendocrine       1
D40_DC_langerhans         1
D58_EC_arterial_2         1
D95_Macrophage_LYVE1      1
Name: donor_celltype, Length: 7827, dtype: int64

In [16]:
ad1

View of AnnData object with n_obs × n_vars = 192141 × 36601
    obs: 'latent_cell_probability', 'latent_RT_efficiency', 'cecilia22_predH', 'cecilia22_predH_prob', 'cecilia22_predH_uncertain', 'cecilia22_predL', 'cecilia22_predL_prob', 'cecilia22_predL_uncertain', 'elmentaite21_pred', 'elmentaite21_pred_prob', 'elmentaite21_pred_uncertain', 'suo22_pred', 'suo22_pred_prob', 'suo22_pred_uncertain', 'n_counts', 'log1p_n_counts', 'n_genes', 'log1p_n_genes', 'percent_mito', 'n_counts_mito', 'percent_ribo', 'n_counts_ribo', 'percent_hb', 'n_counts_hb', 'percent_top50', 'n_counts_raw', 'log1p_n_counts_raw', 'n_genes_raw', 'log1p_n_genes_raw', 'percent_mito_raw', 'n_counts_mito_raw', 'percent_ribo_raw', 'n_counts_ribo_raw', 'percent_hb_raw', 'n_counts_hb_raw', 'percent_top50_raw', 'n_counts_spliced', 'log1p_n_counts_spliced', 'n_genes_spliced', 'log1p_n_genes_spliced', 'percent_mito_spliced', 'n_counts_mito_spliced', 'percent_ribo_spliced', 'n_counts_ribo_spliced', 'percent_hb_spliced', 'n_coun

In [17]:
ad1.obs.organ_groups.value_counts()

Large_intestine    55277
Small_intestine    51706
Stomach            50574
Oral_mucosa        23181
Oesophagus          5964
Salivary_gland      5439
Name: organ_groups, dtype: int64

In [18]:
#filter to cells in small intestine only
ad1 = ad1[ad1.obs.organ_groups.isin(['Small_intestine'])].copy()

In [19]:
ad1

AnnData object with n_obs × n_vars = 51706 × 36601
    obs: 'latent_cell_probability', 'latent_RT_efficiency', 'cecilia22_predH', 'cecilia22_predH_prob', 'cecilia22_predH_uncertain', 'cecilia22_predL', 'cecilia22_predL_prob', 'cecilia22_predL_uncertain', 'elmentaite21_pred', 'elmentaite21_pred_prob', 'elmentaite21_pred_uncertain', 'suo22_pred', 'suo22_pred_prob', 'suo22_pred_uncertain', 'n_counts', 'log1p_n_counts', 'n_genes', 'log1p_n_genes', 'percent_mito', 'n_counts_mito', 'percent_ribo', 'n_counts_ribo', 'percent_hb', 'n_counts_hb', 'percent_top50', 'n_counts_raw', 'log1p_n_counts_raw', 'n_genes_raw', 'log1p_n_genes_raw', 'percent_mito_raw', 'n_counts_mito_raw', 'percent_ribo_raw', 'n_counts_ribo_raw', 'percent_hb_raw', 'n_counts_hb_raw', 'percent_top50_raw', 'n_counts_spliced', 'log1p_n_counts_spliced', 'n_genes_spliced', 'log1p_n_genes_spliced', 'percent_mito_spliced', 'n_counts_mito_spliced', 'percent_ribo_spliced', 'n_counts_ribo_spliced', 'percent_hb_spliced', 'n_counts_hb_spl

In [20]:
df = pd.DataFrame(ad1.X.todense(), index=ad1.obs_names, columns=ad1.var_names)
#tranpose
df = df.T
df

index,AACCGCGGTTCCCTTG-HCA_A_GT12934998,AACTCTTCAAGCTGAG-HCA_A_GT12934998,AATCGGTCAATGCCAT-HCA_A_GT12934998,ACATCAGGTTCAGCGC-HCA_A_GT12934998,ACCCACTGTATAATGG-HCA_A_GT12934998,ACGAGGATCGGTCTAA-HCA_A_GT12934998,ACGGCCACAATCCAAC-HCA_A_GT12934998,ACTGAACGTACCGCTG-HCA_A_GT12934998,ACTGCTCAGTCCAGGA-HCA_A_GT12934998,AGGTCATAGCTGCAAG-HCA_A_GT12934998,...,TTCGAAGGTTCGCGAC-GSM4766849,TTCTCAAGTGAGCGAT-GSM4766849,TTGACTTAGTGCGTGA-GSM4766849,TTGCCGTGTGTGCCTG-GSM4766849,TTGGAACCAGTGACAG-GSM4766849,TTGGCAACAGTGGAGT-GSM4766849,TTTACTGGTGGACGAT-GSM4766849,TTTACTGTCCTTGCCA-GSM4766849,TTTGGTTAGGCCCGTT-GSM4766849,TTTGGTTTCGGCCGAT-GSM4766849
MIR1302-2HG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FAM138A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OR4F5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL627309.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL627309.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AC141272.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC023491.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC007325.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC007325.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
df.to_csv('/home/jovyan/ao15/Megagut/Annotations_v3/disease_analysis/interactions_cellphoneDB/counts/pooled_healthy_disease.remapped.allgenes.AP_SI.counts_normalisedonly.csv')

In [22]:
meta = ad1.obs.level_3_annot
meta

index
AACCGCGGTTCCCTTG-HCA_A_GT12934998         Monocyte
AACTCTTCAAGCTGAG-HCA_A_GT12934998    Tnaive/cm_CD8
AATCGGTCAATGCCAT-HCA_A_GT12934998         Monocyte
ACATCAGGTTCAGCGC-HCA_A_GT12934998    Tnaive/cm_CD4
ACCCACTGTATAATGG-HCA_A_GT12934998    B_plasmablast
                                         ...      
TTGGCAACAGTGGAGT-GSM4766849               Monocyte
TTTACTGGTGGACGAT-GSM4766849          NK_CD56bright
TTTACTGTCCTTGCCA-GSM4766849               Trm_Th17
TTTGGTTAGGCCCGTT-GSM4766849                NK_CD16
TTTGGTTTCGGCCGAT-GSM4766849             Enterocyte
Name: level_3_annot, Length: 51706, dtype: object

In [23]:
meta.to_csv('/home/jovyan/ao15/Megagut/Annotations_v3/disease_analysis/interactions_cellphoneDB/meta/pooled_healthy_disease.remapped.allgenes.AP_SI.meta.csv')

In [24]:
ad1.obs.disease.value_counts()

control                   18818
pediatric_IBD              9910
crohns_disease             9734
neighbouring_inflammed     8052
neighbouring_polyps        4524
neighbouring_cancer         668
Name: disease, dtype: int64

In [25]:
healthy = ad1[ad1.obs.disease.isin(['control'])].copy()

In [26]:
disease = ad1[ad1.obs.disease.isin(['crohns_disease','pediatric_IBD'])].copy()

In [27]:
healthy

AnnData object with n_obs × n_vars = 18818 × 36601
    obs: 'latent_cell_probability', 'latent_RT_efficiency', 'cecilia22_predH', 'cecilia22_predH_prob', 'cecilia22_predH_uncertain', 'cecilia22_predL', 'cecilia22_predL_prob', 'cecilia22_predL_uncertain', 'elmentaite21_pred', 'elmentaite21_pred_prob', 'elmentaite21_pred_uncertain', 'suo22_pred', 'suo22_pred_prob', 'suo22_pred_uncertain', 'n_counts', 'log1p_n_counts', 'n_genes', 'log1p_n_genes', 'percent_mito', 'n_counts_mito', 'percent_ribo', 'n_counts_ribo', 'percent_hb', 'n_counts_hb', 'percent_top50', 'n_counts_raw', 'log1p_n_counts_raw', 'n_genes_raw', 'log1p_n_genes_raw', 'percent_mito_raw', 'n_counts_mito_raw', 'percent_ribo_raw', 'n_counts_ribo_raw', 'percent_hb_raw', 'n_counts_hb_raw', 'percent_top50_raw', 'n_counts_spliced', 'log1p_n_counts_spliced', 'n_genes_spliced', 'log1p_n_genes_spliced', 'percent_mito_spliced', 'n_counts_mito_spliced', 'percent_ribo_spliced', 'n_counts_ribo_spliced', 'percent_hb_spliced', 'n_counts_hb_spl

In [28]:
disease

AnnData object with n_obs × n_vars = 19644 × 36601
    obs: 'latent_cell_probability', 'latent_RT_efficiency', 'cecilia22_predH', 'cecilia22_predH_prob', 'cecilia22_predH_uncertain', 'cecilia22_predL', 'cecilia22_predL_prob', 'cecilia22_predL_uncertain', 'elmentaite21_pred', 'elmentaite21_pred_prob', 'elmentaite21_pred_uncertain', 'suo22_pred', 'suo22_pred_prob', 'suo22_pred_uncertain', 'n_counts', 'log1p_n_counts', 'n_genes', 'log1p_n_genes', 'percent_mito', 'n_counts_mito', 'percent_ribo', 'n_counts_ribo', 'percent_hb', 'n_counts_hb', 'percent_top50', 'n_counts_raw', 'log1p_n_counts_raw', 'n_genes_raw', 'log1p_n_genes_raw', 'percent_mito_raw', 'n_counts_mito_raw', 'percent_ribo_raw', 'n_counts_ribo_raw', 'percent_hb_raw', 'n_counts_hb_raw', 'percent_top50_raw', 'n_counts_spliced', 'log1p_n_counts_spliced', 'n_genes_spliced', 'log1p_n_genes_spliced', 'percent_mito_spliced', 'n_counts_mito_spliced', 'percent_ribo_spliced', 'n_counts_ribo_spliced', 'percent_hb_spliced', 'n_counts_hb_spl

In [29]:
df = pd.DataFrame(healthy.X.todense(), index=healthy.obs_names, columns=healthy.var_names)
#tranpose
df = df.T
df

index,AACCGCGGTTCCCTTG-HCA_A_GT12934998,AACTCTTCAAGCTGAG-HCA_A_GT12934998,AATCGGTCAATGCCAT-HCA_A_GT12934998,ACATCAGGTTCAGCGC-HCA_A_GT12934998,ACCCACTGTATAATGG-HCA_A_GT12934998,ACGAGGATCGGTCTAA-HCA_A_GT12934998,ACGGCCACAATCCAAC-HCA_A_GT12934998,ACTGAACGTACCGCTG-HCA_A_GT12934998,ACTGCTCAGTCCAGGA-HCA_A_GT12934998,AGGTCATAGCTGCAAG-HCA_A_GT12934998,...,TTTCCTCGTTAAGATG-HT-188-Adult-Duo,TTTGCGCCACTAGTAC-HT-188-Adult-Duo,TTTGCGCGTGAGTATA-HT-188-Adult-Duo,TTTGCGCTCAACACCA-HT-188-Adult-Duo,TTTGCGCTCAGAGCTT-HT-188-Adult-Duo,TTTGCGCTCTTGAGGT-HT-188-Adult-Duo,TTTGGTTAGAGTAATC-HT-188-Adult-Duo,TTTGGTTTCAAGGTAA-HT-188-Adult-Duo,TTTGTCACAAGCGAGT-HT-188-Adult-Duo,TTTGTCATCTCGCATC-HT-188-Adult-Duo
MIR1302-2HG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FAM138A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OR4F5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL627309.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL627309.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AC141272.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC023491.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC007325.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC007325.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
df.to_csv('/home/jovyan/ao15/Megagut/Annotations_v3/disease_analysis/interactions_cellphoneDB/counts/pooled_healthy.remapped.allgenes.AP_SI.counts_normalisedonly.csv')

In [31]:
df = pd.DataFrame(disease.X.todense(), index=disease.obs_names, columns=disease.var_names)
#tranpose
df = df.T
df

index,AAACCTGAGGCCCTCA-4918STDY7273964,AAACGGGGTAAAGGAG-4918STDY7273964,AAAGATGAGTCCAGGA-4918STDY7273964,AAAGATGCAAGCCCAC-4918STDY7273964,AAAGATGTCAACGAAA-4918STDY7273964,AAAGCAAAGACTGGGT-4918STDY7273964,AAAGCAAAGAGCTGGT-4918STDY7273964,AAAGCAACACCATCCT-4918STDY7273964,AAAGTAGAGAATGTTG-4918STDY7273964,AAAGTAGAGTCCAGGA-4918STDY7273964,...,TTCGAAGGTTCGCGAC-GSM4766849,TTCTCAAGTGAGCGAT-GSM4766849,TTGACTTAGTGCGTGA-GSM4766849,TTGCCGTGTGTGCCTG-GSM4766849,TTGGAACCAGTGACAG-GSM4766849,TTGGCAACAGTGGAGT-GSM4766849,TTTACTGGTGGACGAT-GSM4766849,TTTACTGTCCTTGCCA-GSM4766849,TTTGGTTAGGCCCGTT-GSM4766849,TTTGGTTTCGGCCGAT-GSM4766849
MIR1302-2HG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FAM138A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OR4F5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL627309.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AL627309.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AC141272.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC023491.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC007325.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AC007325.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
df.to_csv('/home/jovyan/ao15/Megagut/Annotations_v3/disease_analysis/interactions_cellphoneDB/counts/pooled_disease.remapped.allgenes.AP_SI.counts_normalisedonly.csv')

In [33]:
meta = healthy.obs.level_3_annot
meta

index
AACCGCGGTTCCCTTG-HCA_A_GT12934998                       Monocyte
AACTCTTCAAGCTGAG-HCA_A_GT12934998                  Tnaive/cm_CD8
AATCGGTCAATGCCAT-HCA_A_GT12934998                       Monocyte
ACATCAGGTTCAGCGC-HCA_A_GT12934998                  Tnaive/cm_CD4
ACCCACTGTATAATGG-HCA_A_GT12934998                  B_plasmablast
                                                ...             
TTTGCGCTCTTGAGGT-HT-188-Adult-Duo              Goblet_progenitor
TTTGGTTAGAGTAATC-HT-188-Adult-Duo                   B_plasma_IgG
TTTGGTTTCAAGGTAA-HT-188-Adult-Duo    BEST4_enterocyte_colonocyte
TTTGTCACAAGCGAGT-HT-188-Adult-Duo                Epithelial_stem
TTTGTCATCTCGCATC-HT-188-Adult-Duo                Epithelial_stem
Name: level_3_annot, Length: 18818, dtype: object

In [34]:
meta.to_csv('/home/jovyan/ao15/Megagut/Annotations_v3/disease_analysis/interactions_cellphoneDB/meta/pooled_healthy.remapped.allgenes.AP_SI.meta.csv')

In [35]:
meta = disease.obs.level_3_annot
meta

index
AAACCTGAGGCCCTCA-4918STDY7273964         gdT_naive
AAACGGGGTAAAGGAG-4918STDY7273964           B_GC_II
AAAGATGAGTCCAGGA-4918STDY7273964    Goblet_cycling
AAAGATGCAAGCCCAC-4918STDY7273964     Tnaive/cm_CD4
AAAGATGTCAACGAAA-4918STDY7273964            B_GC_I
                                         ...      
TTGGCAACAGTGGAGT-GSM4766849               Monocyte
TTTACTGGTGGACGAT-GSM4766849          NK_CD56bright
TTTACTGTCCTTGCCA-GSM4766849               Trm_Th17
TTTGGTTAGGCCCGTT-GSM4766849                NK_CD16
TTTGGTTTCGGCCGAT-GSM4766849             Enterocyte
Name: level_3_annot, Length: 19644, dtype: object

In [36]:
meta.to_csv('/home/jovyan/ao15/Megagut/Annotations_v3/disease_analysis/interactions_cellphoneDB/meta/pooled_disease.remapped.allgenes.AP_SI.meta.csv')

### Run CPDB using CellGenIT singularity container /nfs/cellgeni/singularity/images/cellphonedb-v3.0.2.sif