In [1]:
import scanpy as sc
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import networkx as nx
from collections import defaultdict
import scprep

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
adata_ref = sc.read_h5ad('data/reference.h5ad')

In [4]:
threshold = int(adata_ref.n_vars * 0.1)
de = pd.DataFrame(adata_ref.uns['rank_genes_groups']['names']).iloc[:threshold]

In [5]:
de_ct = de.copy()
for ct in de.columns:
    de_ct[ct] = adata_ref.var.loc[de[ct]]['SYMBOL'].values

In [6]:
for gene_cluster in range(6):
    gc = pd.read_csv(f'results/Cluster_{gene_cluster}_omnipath_genes.tsv', sep='\t', header=None)
    gene_cluster_edges_with_cell_state_specificity = []

    for row in gc.iterrows():    
        gene1 = row[1][0]
        celltypes_gene1 = (de_ct == gene1).sum(axis=0)[(de_ct == gene1).sum(axis=0) > 0].index
        
        gene2 = row[1][1]
        celltypes_gene2 = (de_ct == gene2).sum(axis=0)[(de_ct == gene2).sum(axis=0) > 0].index
        
        edgetype = row[1][2]
        
        if (len(celltypes_gene1) > 0) & (len(celltypes_gene2) > 0):
            gene_cluster_edges_with_cell_state_specificity.append(f"{gene1}\t{gene2}\t{edgetype}\t{','.join(celltypes_gene1)}\t{','.join(celltypes_gene2)}")

    with open(f'results/Cluster_{gene_cluster}_omnipath_genes_with_cell_types.tsv', 'w') as f:
        for edge in gene_cluster_edges_with_cell_state_specificity:
            f.write(f"{edge}\n")

In [15]:
gene_cluster_cell_types = {
0:['B_preGC', 'B_mem', 'Macrophages_M1', 'DC_cDC1', 'NKT', 'Mast', 'Monocytes', 'Endo', 'DC_cDC2', 'VSMC', 'T_CD4+', 'ILC', 'B_activated', 'Macrophages_M2', 'DC_pDC', 'NK'],
1:['FDC', 'B_GC_LZ', 'T_CD4+_TfH_GC', 'B_GC_prePB', 'B_Cycling', 'B_GC_DZ'],
2:['DC_CCR7+', 'T_TfR', 'T_Treg', 'T_CD4+_naive', 'T_CD8+_naive', 'T_TIM3+', 'T_CD8+_cytotoxic', 'T_CD4+_TfH', 'T_CD8+_CD161+'],
3:['B_naive'],
4:['B_plasma'],
5:['B_IFN']}

In [28]:
for gene_cluster in range(6):
    df = pd.read_csv(f'results/Cluster_{gene_cluster}_omnipath_genes_with_cell_types.tsv', sep='\t', header=None)
    #ccc = pd.DataFrame(0, index=adata_ref.obs['Subset'].cat.categories, columns=adata_ref.obs['Subset'].cat.categories)
    ccc = pd.DataFrame(0, index=gene_cluster_cell_types[gene_cluster], columns=gene_cluster_cell_types[gene_cluster])
    ccc_intracellular = pd.DataFrame(0, index=gene_cluster_cell_types[gene_cluster], columns=gene_cluster_cell_types[gene_cluster])
    
    for row in df.iterrows():
        edgetype = row[1][2]
        source_celltypes = row[1][3].split(',')
        target_celltypes = row[1][4].split(',')
        
        source_celltypes = set(source_celltypes).intersection(gene_cluster_cell_types[gene_cluster])
        target_celltypes = set(target_celltypes).intersection(gene_cluster_cell_types[gene_cluster])
        
        if 'transcriptional' in edgetype:
            # only intracelltype relationships
            possible_cell_types = set(source_celltypes).intersection(target_celltypes)

            for type in possible_cell_types:
                ccc_intracellular[type][type] += 1
        
        if 'translational' in edgetype:
            # intracelltype and intercelltype
            for source in source_celltypes:
                for target in target_celltypes:
                    ccc[source][target] += 1
                
    ccc= ccc.rename_axis('Source')\
      .reset_index()\
      .melt('Source', value_name='Weight', var_name='Target')\
      .reset_index(drop=True)
    
    ccc.to_csv(f'results/Cluster_{gene_cluster}_intercellular_CCC_graph.tsv', sep='\t')
    
    
    ccc_intracellular= ccc_intracellular.rename_axis('Source')\
      .reset_index()\
      .melt('Source', value_name='Weight', var_name='Target')\
      .reset_index(drop=True)

    ccc_intracellular.to_csv(f'results/Cluster_{gene_cluster}_intracellular_CCC_graph.tsv', sep='\t')

In [58]:
for gene_cluster in range(6):
    ccc_intercellular = pd.read_csv(f'results/Cluster_{gene_cluster}_CCC_graph.tsv', sep='\t', index_col=0)
    ccc_intracellular = pd.read_csv(f'results/Cluster_{gene_cluster}_intracellular_CCC_graph.tsv', sep='\t', index_col=0)
    df = pd.concat((pd.concat((ccc_intercellular, ccc_intracellular)).reset_index(), pd.Series(['intercellular']*len(ccc_intercellular) + ['intracellular']*len(ccc_intracellular))), axis=1, ignore_index=True).iloc[:, 1:]
    df.to_csv(f'results/CCC_{gene_cluster}_with_intracellular.csv')

In [19]:
ccc = pd.read_csv(f'results/Cluster_0_CCC_graph.tsv', sep='\t', index_col=0)
for c in range(1,6):
    ccc = pd.concat((ccc, pd.read_csv(f'results/Cluster_{c}_intercellular_CCC_graph.tsv', sep='\t', index_col=0)['Weight']), axis=1)
    
ccc.columns = ['Source', 'Target', 'Gene_Cluster_0', 'Gene_Cluster_1', 'Gene_Cluster_2', 'Gene_Cluster_3', 'Gene_Cluster_4', 'Gene_Cluster_5']
ccc.to_csv('results/all_intercellular_CCC.csv')