In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
from collections import Counter
import squidpy as sq
from scipy.spatial.distance import pdist, squareform
import scipy
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from sklearn import metrics
import tabulate
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram
import itertools
import umap
from importlib import reload
from scipy import stats

# locals
import utils as ut
reload(ut)

sc.settings.verbosity = 3
import gget

# locals
import utils as ut
reload(ut)

2023-03-06 13:21:43.753720: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-06 13:21:43.888687: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/slurm/lib64:
2023-03-06 13:21:43.888710: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-03-06 13:21:43.914666: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-03-06 13:21:44.7762

ModuleNotFoundError: No module named 'utils'

In [None]:
fPath = "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/LR/LewisLabUSCS/Mouse/Mouse-2020-Baccin-LR-pairs.xlsx"

lr = pd.read_excel(fPath)
print(lr.shape)

lr['ligand'] = lr['Ligand.Mouse'].str.upper()
lr['receptor'] = lr['Receptor.Mouse'].str.upper()
lr.head()

In [None]:
clusterPath = "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/scanpy/clusters.csv"
cf = pd.read_parquet(clusterPath)
print(cf.shape)
print(cf['cellType'].unique())
cf.head()

In [None]:
keys = ['ND', 'HFD8', 'HFD14']

alpha = 0.05
lft = 0

ligands = []
receptors = []

for k in keys:
    
    df = cf[cf['key'] == k]
    
    for cid in df['clusterId'].unique():
        
        cdf = df[(df['clusterId'] == cid) & (df['pvals'] < alpha)]
        cType = cdf['cellType'].unique()[0]
        
        # get ligands per cell type
        mask = cdf['gene'].isin(lr['ligand'])
        t = cdf[mask]
        t = t[t['logfoldchanges'] > lft]
        t = t.sort_values(by='logfoldchanges', ascending=False)
        ligands.append(t)    
        
        # get receptors per cell type
        mask = cdf['gene'].isin(lr['receptor'])
        t = cdf[mask]
        t = t[t['logfoldchanges'] > lft] 
        t = t.sort_values(by='logfoldchanges', ascending=False)
        receptors.append(t)            
    
ligands = pd.concat(ligands, ignore_index=True)
receptors = pd.concat(receptors, ignore_index=True)


sumL = pd.DataFrame(ligands[['key', 'cellType']].value_counts().reset_index())
sumL = sumL.sort_values(by=['key', 0], ascending=False)
sumR = pd.DataFrame(receptors[['key', 'cellType']].value_counts().reset_index())
sumR = sumR.sort_values(by=['key', 0], ascending=False)


print(tabulate.tabulate(sumL, 
                        headers=['Time', "Cell Type", "Ligands"], 
                        tablefmt='simple',
                        showindex=False))
print()
print(tabulate.tabulate(sumR, 
                        headers=['Time', "Cell Type", "Receptors"],
                        tablefmt='simple',
                        showindex=False))

In [None]:
plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 6, 4
plt.rcParams['figure.facecolor'] = 'w'

colorlist = ut.ncolor(3, cmap='viridis')

plttr = sumR.sort_values(by=['key', 'cellType'], ascending=[True, False])

sns.barplot(data=plttr,
            x='cellType',
            y=0,
            edgecolor='k',
            palette=colorlist,
            hue='key')

_ = plt.xticks(rotation=45)
plt.ylabel('Number of Receptors')
plt.title("Receptors")
plt.xlabel("")

ax = plt.gca()
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles=handles[0:], labels=labels[0:])

In [None]:
cellType = 'Macrophages'
lft = 0

# get all ligand/receptors
ligands = set(lr['ligand'].to_list())
receptors = set(lr['receptor'].to_list())

# get cell type specific upregulated genes
df = cf[(cf['cellType'] == cellType) & (cf['logfoldchanges'] > lft)]

res = []

for key in keys:
    
    # get all L/Rs expressed for cell type at time t
    ctGenes = set(df[df['key'] == key]['gene'].to_list())    
    ctLigands = ligands.intersection(ctGenes)
    ctReceptors = receptors.intersection(ctGenes)
    
    print(key, cellType, "L:", len(ctLigands), "R:", len(ctReceptors))
    
    for cType in cf['cellType'].unique():
        if not cType == cellType:
            cdf = cf[(cf['key'] == key) & (cf['cellType'] == cType)]
            otherGenes = set(cdf['gene'].to_list())
            
            otherLigands = ligands.intersection(otherGenes)
            otherReceptors = receptors.intersection(otherGenes)
            
            """ Cell Type ligands to other receptors """
            matches = lr[(lr['ligand'].isin(ctLigands)) & (lr['receptor'].isin(otherReceptors))]
            matches = matches[['ligand', 'receptor', 'Ligand.CC']]
            matches['ligand_cType'] = cellType
            matches['receptor_cType'] = cType
            matches['key'] = key
            res.append(matches)
            
            """ Cell type receptors and other ligands """
            matches = lr[(lr['ligand'].isin(otherLigands)) & (lr['receptor'].isin(ctReceptors))]
            matches = matches[['ligand', 'receptor', 'Ligand.CC']]
            matches['ligand_cType'] = cType
            matches['receptor_cType'] = cellType
            matches['key'] = key
            res.append(matches)

    
res = pd.concat(res, ignore_index=True)
print(res['Ligand.CC'].value_counts())
res.head()
outpath = f"/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/scanpy/{cellType}_LR.csv"
res.to_csv(outpath)
res.head()

In [None]:
d = res[['ligand_cType', 'receptor_cType', 'key']].value_counts().reset_index()
d = d.sort_values(by=['key', 'ligand_cType', 'receptor_cType'], ascending=[False, True, True])
d.columns = ['Cell Type Ligand', 'Cell Type Receptor', 'key', 'Pairs']

p = pd.pivot_table(d, index=['Cell Type Ligand', 'Cell Type Receptor'],values='Pairs', columns='key')
p = p.reset_index()
p = p[['Cell Type Ligand', 'Cell Type Receptor', 'ND', 'HFD8', 'HFD14']]
print(tabulate.tabulate(p, 
                        headers=p.columns, 
                        tablefmt='latex', 
                        showindex=False))

In [None]:
"""All vs all """

lft = 0
alpha = 0.05

# get all ligand/receptors
ligands = set(lr['ligand'].to_list())
receptors = set(lr['receptor'].to_list())

# get all up-regulated genes
df = cf[(cf['logfoldchanges'] > lft) & (cf['pvals'] < alpha)]

res = []

for key in keys:
    
    kdf = df[df['key'] == key]
    
    types = cf['cellType'].unique()
    
    
    for ci in types:
        for cj in types:
            
            # extract cell type specific genes
            
            ciDf = kdf[kdf['cellType'] == ci]
            cjDf = kdf[kdf['cellType'] == cj]
            
            ciGenes = ciDf['gene'].to_list()
            cjGenes = cjDf['gene'].to_list()
            
            ciL = ligands.intersection(ciGenes)
            ciR = receptors.intersection(ciGenes)
            
            cjL = ligands.intersection(cjGenes)
            cjR = receptors.intersection(cjGenes)
            
            """ ciL vs cjR """
            matches = lr[(lr['ligand'].isin(ciL)) & (lr['receptor'].isin(cjR))]
            matches = matches[['ligand', 'receptor', 'Ligand.CC']]
            matches['ligand_cType'] = ci
            matches['receptor_cType'] = cj
            matches['key'] = key
            res.append(matches)
            
            """ ciR vs cjL """
            matches = lr[(lr['ligand'].isin(cjL)) & (lr['receptor'].isin(ciR))]
            matches = matches[['ligand', 'receptor', 'Ligand.CC']]
            matches['ligand_cType'] = cj
            matches['receptor_cType'] = ci
            res.append(matches)
     
    
res = pd.concat(res, ignore_index=True)
print(res['Ligand.CC'].value_counts())
res.head()
outpath = f"/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/scanpy/all_LR.csv"
res.to_csv(outpath)
res.head()

In [None]:
d = res[['ligand_cType', 'receptor_cType', 'key']].value_counts().reset_index()
d = d.sort_values(by=['key', 'ligand_cType', 'receptor_cType'], ascending=[False, True, True])
d.columns = ['Cell Type Ligand', 'Cell Type Receptor', 'key', 'Pairs']

p = pd.pivot_table(d, index=['Cell Type Ligand', 'Cell Type Receptor'],values='Pairs', columns='key')
p = p.reset_index()
p = p[['Cell Type Ligand', 'Cell Type Receptor', 'ND', 'HFD8', 'HFD14']]
p.head()

In [None]:
df.head()

In [None]:
# merge in the lgo fold changes

ddf = df[['gene', 'key', 'logfoldchanges', 'cellType']]
ddf = ddf.sort_values(by=['cellType', 'logfoldchanges'], ascending=[False, False])
ddf = ddf.drop_duplicates()


# merge ligand LFC
res2 = pd.merge(res, 
                ddf, 
                left_on=['ligand', 'ligand_cType', 'key'],
                right_on=['gene', 'cellType', 'key'])

res2 = res2.rename(columns={'logfoldchanges' : 'ligandLFC'})
res2 = res2.drop(columns=['gene', 'cellType'])


# merge receptor LFC
res2 = pd.merge(res2, 
                ddf, 
                left_on=['receptor', 'receptor_cType', 'key'],
                right_on=['gene', 'cellType', 'key'])

res2 = res2.rename(columns={'logfoldchanges' : 'receptorLFC'})
res2 = res2.drop(columns=['gene', 'cellType'])


outpath = f"/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/scanpy/allPairs.csv"
res2.to_csv(outpath, index=False)


res2.head()

In [None]:
ligandQuery = 'Macrophages'
receptorQuery = 'Macrophages'
n = 10
thresh = 2
grouped = False

sortby = 'ligand'
typeCol = f"{sortby}_cType"
lfcCol = f"{sortby}LFC"

mask = (res2['ligand_cType'] == ligandQuery) & (res2['receptor_cType'] == receptorQuery) 

t = res2[mask]

if grouped:
    # group over all times
    grped = t.groupby(['ligand', 'receptor'])['ligandLFC', 'receptorLFC'].min().reset_index()
    t = grped[(grped['ligandLFC'] > thresh) & (grped['receptorLFC'] > thresh)].reset_index(drop=True)

t = t.sort_values(['ligandLFC', 'receptorLFC'], ascending=[False, False])
lq = t['ligand'].head(n).to_list()
rq = t['receptor'].head(n).to_list()

list(zip(lq, rq))

# print(", ".join([f"'{x}'" for x in lq]))
# print()

# print(", ".join([f"'{x}'" for x in rq]))



In [None]:
# break

In [None]:
time = "HFD14"

# sizes = [x/p[time].sum() for x in p[time]]
sizes = p[time]

plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 5, 4
plt.rcParams['figure.facecolor'] = "w"

g = sns.scatterplot(data=p,
                    x='Cell Type Ligand',
                    y='Cell Type Receptor',
                    sizes=(np.min(sizes), np.max(sizes)),
                    edgecolor='k',
                    size=sizes)

plt.ylabel('Receptors')
plt.xlabel('Ligands')
_ = plt.xticks(rotation=90)
g.legend(loc='center left',
         bbox_to_anchor=(1.01, 0.65), 
         ncol=1, 
         labelspacing = 1.5,
         borderpad = 1,
         title='LR Pairs')

plt.title(time)

In [None]:
keys = ['ND', 'HFD8', 'HFD14']

comps = list(itertools.combinations(keys, 2))

colorLB = -50
colorUB = 75

for col1, col2 in comps:
    p1 = p[col1]
    p2 = p[col2]
    
    diffs = p2 - p1
    diffs = diffs.astype(float).to_numpy()
    
    colors = np.where(diffs < 0, "r", "g")
    
    
    plt.rcParams['figure.dpi'] = 300
    plt.rcParams['figure.figsize'] = 3, 3
    plt.rcParams['figure.facecolor'] = "w"

    g = sns.scatterplot(data=p,
                        x='Cell Type Ligand',
                        y='Cell Type Receptor',
                        hue=diffs, 
                        edgecolor='k',
                        palette='RdYlGn',
                        hue_norm=(colorLB, colorUB),
                        legend=False,
                        zorder=3,
                        size=diffs)

    plt.ylabel('Receptors')
    plt.xlabel('Ligands')
    _ = plt.xticks(rotation=90)
    
    print(diffs.min(), diffs.max())
    
    norm = plt.Normalize(colorLB, colorUB)
    sm = plt.cm.ScalarMappable(cmap="RdYlGn", norm=norm)
    sm.set_array([])
    plt.grid(True, zorder=0)
    
    plt.gca().figure.colorbar(sm, label='Change in LR Pairs')
    
#     plt.title(f"{col2} - {col1}")
    print(f"{col2} - {col1}")
    plt.gca().set_aspect('equal')
    plt.show()

