In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
from collections import Counter
import scipy
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from sklearn import metrics
import gget
import tabulate
from sklearn.neighbors import BallTree
from networkx.drawing.nx_agraph import graphviz_layout
from scipy.stats import gmean
import networkx as nx
import sys

# locals
sys.path.append('../')
import utils as ut

sys.path.append("/home/cstansbu/.local/lib/python3.7/site-packages/pycircos/")
import pycircos

KeyboardInterrupt: 

In [None]:
fPath = "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/LR/LewisLabUSCS/Mouse/Mouse-2020-Baccin-LR-pairs.xlsx"

lr = pd.read_excel(fPath, engine="openpyxl")
print(lr.shape)

lr['ligand'] = lr['Ligand.Mouse'].str.upper()
lr['receptor'] = lr['Receptor.Mouse'].str.upper()
lr.head()

In [None]:
cardOutDir = "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/CARDOuputs/"

dfList = []

for f in os.listdir(cardOutDir):
    if 'macrophage' in f:
        fullPath = f"{cardOutDir}{f}"
        key = f.split("_")[0]
        df = pd.read_csv(fullPath)
        df = df.rename(columns={'Unnamed: 0' : 'spotId'})
        df['key'] = key
        dfList.append(df)

df = pd.concat(dfList, ignore_index=True)
print(f"{df.shape=}")
print(df['key'].value_counts())
print()
df.head()

In [None]:
sptDir =  "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/CARDInputs/"

spt = {}

for f in os.listdir(sptDir):
    if "spt" in f and "macrophage" in f:
        print(f)
        fullPath = f"{sptDir}{f}"
        key = f.split("_")[0]
        sdf = pd.read_csv(fullPath)
        sdf = sdf.rename(columns={'Unnamed: 0' : 'gene'})
        sdf = sdf.set_index('gene')
        sdf = sdf.T
        sdf.index = sdf.index.str.replace("-", ".")
        spt[key] = sdf
    
print('done')

In [None]:
cardDir =  "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/CARDInputs/"
xyPath =  "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/combinedEmbedding.csv"
xy = pd.read_csv(xyPath)
cTypes = sorted(list(xy['cellTypes'].unique()))
print(cTypes)
print()

keys = ['ND', 'HFD8', 'HFD14']

rna = {}
labels = {}

for key in keys:
    labelPath = f"{cardDir}{key}_macrophage_clusters.csv"
    lf = pd.read_csv(labelPath)
    
    lf = pd.merge(lf, xy[['x', 'y', 'cellId', 'colors']], 
                  how='left',
                  left_on='cellId',
                  right_on='cellId')
    
    labels[key] = lf
    
    rnaPath = f"{cardDir}{key}_macrophage_rna.csv"
    rf = pd.read_csv(rnaPath)
    rf = rf.T
    new_header = rf.iloc[0] 
    rf = rf[1:] 
    rf.columns = new_header 
    rf.index.names = ['cellId']
    
    rf = ut.normalize(rf, 1e6)
    rna[key] = rf

print('done')

In [None]:
allGenes = rna['ND'].columns.to_list()
allLR = list(set(lr['ligand'].to_list() + lr['receptor'].to_list()))
allEx = [x for x in allLR if x in allGenes]

print(f"{len(allGenes)=} {len(allLR)=}")
print(f"{len(allEx)}")

In [None]:
# build the combined data structure

rdf = []
meta = []

for key in ['ND', 'HFD8', 'HFD14']:
    # reformat the index
    rf = rna[key].copy()
    rf.index = rf.index + "_" + key
    rf = rf[allEx] # only LR genes
    rdf.append(rf)
    
    lf = labels[key].copy()
    lf['cellId'] = lf['cellId'] + "_" + key
    
    meta.append(lf)
    
rdf = pd.concat(rdf)
print(f"{rdf.shape=}")

meta = pd.concat(meta)
print(f"{meta.shape=}")

In [None]:
""" Innate vs adaptive """

innate = [
    'Dendritic cells',
    'Mac1',
    'Mac2',
    'Mac3',
    'Mac4',
    'Mac5',
    'Monocytes',
    'NK cells',
]

adaptive = [
    'B cells',
    'T cells',
]

innateIds = meta[meta['cellType'].isin(innate)]['cellId'].to_list()
adapIds = meta[meta['cellType'].isin(adaptive)]['cellId'].to_list()

ingroup = rdf[rdf.index.isin(innateIds)]
outgroup = rdf[rdf.index.isin(adapIds)]

print(f"{ingroup.shape=}")
print(f"{outgroup.shape=}")

In [None]:

res = []

for i, gene in enumerate(allEx):
    
    if i % 100 == 0:
        print(f'gene {i+1}:{len(allEx)}...')
    score, pval = scipy.stats.ranksums(ingroup[gene], 
                                       outgroup[gene],
                                       alternative='two-sided')

    imean = ingroup[gene].mean()
    omean = outgroup[gene].mean()
    icount = ingroup[gene].astype(bool).sum()
    ocount = outgroup[gene].astype(bool).sum()
    
    lfc = np.log2(imean + 0.001) - np.log2(omean + 0.001)
    
    row = {
        'gene' : gene,
        'score' : score,
        'pval' : pval,
        'log2foldchange' : lfc,
        'ingroupMean' : imean,
        'outgroupMean' : omean,
        'ingroupCount' : icount,
        'outgroupCount' : ocount,
        'ingroupProp' : icount / len(ingroup),
        'outgroupProp' : ocount / len(outgroup),
     }
    
    res.append(row)
    
    
res = pd.DataFrame(res)
res.head()

In [None]:
alpha = 0.05
n = 2

ntests = len(allEx)
alphaHat = 1 - ((1-alpha) ** (1/ntests))
print(f"{alpha=} {ntests=} {alphaHat}")
pdf = res.copy()
pdf = pdf[pdf['pval'] < alpha]

# build the edgelist from the LR pairs data 
ldf = lr[(lr['ligand'].isin(pdf['gene'])) & (lr['receptor'].isin(pdf['gene']))]
ldf = ldf[['ligand', 'receptor']].reset_index(drop=True)

# drop genes not in LR pairs
keepList = list(set(ldf['ligand'].to_list() + ldf['receptor'].to_list()))
pdf = pdf[pdf['gene'].isin(keepList)]


# annotate the DEG with the targets 
def getTarget(x, search_as, ldf):
    return_as = 'ligand'
    if search_as == 'ligand':
        return_as = 'receptor'
    
    return ldf[ldf[search_as] == x][return_as].to_list()

pdf['receptors'] = pdf['gene'].apply(lambda x: getTarget(x, 'ligand', ldf))
pdf['ligands'] = pdf['gene'].apply(lambda x: getTarget(x, 'receptor', ldf))
pdf['nlig'] = pdf['ligands'].apply(lambda x : len(x))
pdf['nrec'] = pdf['receptors'].apply(lambda x : len(x))

pdf = pdf[(pdf['nlig'] > n) | (pdf['nrec'] > n)]





G = nx.DiGraph()

nodes = [(k, v) for k, v in pdf.set_index('gene').to_dict('index').items()]
print(len(nodes))
G.add_nodes_from(nodes)




for idx, ligand in pdf.iterrows():
    ligKey = ligand['gene']
    for recKey in ligand['receptors']:
        receptor = pdf[pdf['gene'] == recKey].T
        
        if ligKey in G.nodes() and recKey in G.nodes():
            G.add_weighted_edges_from([(ligKey, recKey, ligand['nrec'])])
        

        

# G.pos = nx.circular_layout(G)
# G.pos = nx.kamada_kawai_layout(G, scale=1)
G.pos = nx.random_layout(G)

# G.pos = graphviz_layout(G, prog='neato')

plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 11, 11
plt.rcParams['figure.facecolor'] = "w"

labels = {}

for n in G.nodes():
    labels[n] = str(n).lower().capitalize()
    
    
mask = pdf['log2foldchange'] > 0
innateGenes = pdf[mask]['gene'].to_list()
adaptiveGenes = pdf[~mask]['gene'].to_list()
print(f"{len(innateGenes)=} {len(adaptiveGenes)=}" )
    
nx.draw_networkx_nodes(G, 
                       G.pos,
                       nodelist=innateGenes,
                       node_color='C0',
                       node_size=500,
                       linewidths=1,
                       edgecolors='k',
                      )

nx.draw_networkx_nodes(G, 
                       G.pos,
                       nodelist=adaptiveGenes,
                       node_color='C2',
                       node_size=500,
                       linewidths=1,
                       edgecolors='k',
                      )

nx.draw_networkx_labels(G, 
                        G.pos,
                        labels=labels,
                        font_size=5)

nx.draw_networkx_edges(G, 
                       G.pos,
                       connectionstyle="arc3,rad=0.05",
                      )

print()

In [None]:

# # drop results not in LR pairs
# keepList = list(set(ldf['ligand'].to_list() + ldf['receptor'].to_list()))
# pdf['geneName'] = pdf['gene'].apply(lambda x: str(x).lower().capitalize())
# pdf = pdf[pdf['gene'].isin(keepList)]
# pdf = res.sort_values(by='log2foldchange', ascending=False)


# labels = dict(zip(pdf['gene'].values, pdf['geneName'].values))
# innateGenes = pdf[pdf['log2foldchange'] < 0]['gene'].to_list()


# plt.rcParams['figure.dpi'] = 300
# plt.rcParams['figure.figsize'] = 11, 11
# plt.rcParams['figure.facecolor'] = "w"
# G = nx.from_pandas_edgelist(ldf,
#                             source='ligand',
#                             target='receptor',
#                             create_using=nx.DiGraph())

# G.pos = nx.circular_layout(G)
# # G.pos = nx.bipartite_layout(G, innateGenes, align='horizontal')
# nx.draw_networkx(G, 
#                  pos=G.pos,
#                  node_size=300,
#                  edgecolors='k',
#                  linewidths=1,
#                  labels=labels,
#                  font_size=3)

# G.nodes()

# # merge in the metadata
# node_attr = pdf.set_index('gene').to_dict('index')
# nx.set_node_attributes(G, node_attr)  

         
    


# innateGenes = pdf[pdf['log2foldchange'] > 0]['gene'].to_list()
# adaptiveGenes = pdf[pdf['log2foldchange'] < 0]['gene'].to_list()


# nodesize = 20
# nx.draw_networkx_nodes(G,
#                        pos=G.pos,
#                        nodelist=innateGenes,
#                        node_size=nodesize,
#                        node_color='C0',
#                        edgecolors='k',
#                        linewidths=1)

# nx.draw_networkx_nodes(G,
#                        pos=G.pos,
#                        nodelist=adaptiveGenes,
#                        node_size=nodesize,
#                        node_color='C2',
#                        edgecolors='k',
#                        linewidths=1)

# # nx.draw_networkx_labels(G, 
#                         pos=G.pos)
     

# nx.draw_networkx_edges(G,
#                        pos=G.pos,
#                        width=1,
#                        edge_color='k')  
    
# for idx, row in pdf.iterrows():
    
    


# top = 20

# pdf = pd.concat([pdf.head(top), pdf.tail(top)])
# pdf

