In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt
import seaborn as sns
from textwrap import wrap
import scipy
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from sklearn import metrics
import itertools
from sklearn.decomposition import PCA
from scipy.sparse import csgraph
import umap
from collections import Counter
import sklearn
import scipy.stats as sps
from scipy.spatial.distance import cdist
from importlib import reload
import pyreadr
import mygene

import gget

# locals
import utils as ut
reload(ut)

In [None]:
pd.Float64Dtype()

In [None]:
break

In [None]:
cardDir =  "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/CARDInputs/"
xyPath =  "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/combinedEmbedding.csv"
xy = pd.read_csv(xyPath)
cTypes = sorted(list(xy['cellTypes'].unique()))
xy['metaTypes'] = xy['cellTypes']
print(cTypes)
print()

keys = ['ND', 'HFD8', 'HFD14']

rna = {}
labels = {}

for key in keys:
    print(f"{key}...")
    labelPath = f"{cardDir}{key}_macrophage_clusters.csv"
    lf = pd.read_csv(labelPath)
    
    lf = pd.merge(lf, xy[['x', 'y', 'cellId', 'colors', 'metaTypes']], 
                  how='left',
                  left_on='cellId',
                  right_on='cellId')
    
    labels[key] = lf
    
    rnaPath = f"{cardDir}{key}_macrophage_rna.csv"
    rf = pd.read_csv(rnaPath)
    rf = rf.T
    new_header = rf.iloc[0] 
    rf = rf[1:] 
    rf.columns = new_header 
    rf.index.names = ['cellId']
    
    rf = ut.normalize(rf, 1e6)
    
    rna[key] = rf

print('done')

In [None]:
dataDir = '/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/macSpectrum/data/'

def getMapper(ginfo):
    """ A function to return a mappable dict from the ginfo output """
    mapper = {}
    for d in ginfo['out']:
        if 'symbol' in d:
            mapper[d['query']] = d['symbol'].upper()
        else:
            mapper[d['query']] = 'NAN'
    return mapper

mf = []

for f in os.listdir(dataDir):
    if "M" in f:
        key = f.replace(".rda", "")
        fullPath = f"{dataDir}{f}"
        ef = pyreadr.read_r(fullPath)
        ef = ef[key] # extract the dataframe
        ef['key'] = key.replace("m.", "").replace("_mean", "")
        print(f"{key=} {ef.shape=}")
        mf.append(ef)
        
        
mf = pd.concat(mf)
print(f"{mf.shape=}")

mg = mygene.MyGeneInfo()
ginfo = mg.querymany(mf['GeneID'].unique(), 
                     scopes='ensembl.gene',
                     fields='symbol', 
                     species='mouse',
                     returnall=True)

mapper = getMapper(ginfo)
mf['gene'] = mf['GeneID'].map(mapper)

# drop unmapped genes
mf = mf[mf['gene'] != "NAN"]

print(f"{mf.shape=}")
mf.head()

In [None]:
macTypes = [
    'Mac1',
    'Mac2',
    'Mac3',
    'Mac4',
    'Mac5',
]


res = []

for key in keys:
    rf = rna[key]
    lf = labels[key]
    lf = lf.drop_duplicates(subset='cellId')
    
    # get macrophages
    cellIds = lf[lf['cellType'].isin(macTypes)]['cellId'].to_list()
    rf = rf[rf.index.isin(cellIds)]
    print(f"{key=} {rf.shape=}")
    
    for mtype in mf['key'].unique():
        mac = mf[mf['key'] == mtype]
        
        # extract columns expressed 
        cols = [x for x in mac['gene'] if x in rf.columns]
        
        mac = mac[mac['gene'].isin(cols)]
        mac = mac[['gene', 'value']].set_index('gene')
        mac = mac.reindex(cols).T
        rf2 = rf[cols]
        print(f"{mac.shape=} {rf2.shape=}")
        
        rf2 = rf2.apply(lambda x: scipy.stats.pearsonr(np.squeeze(mac.T), x), axis=1).reset_index(drop=False)
        rf2.columns = ['cellId', 'pear']
        rf2['r'], rf2['pval'] = rf2['pear'].str
        rf2['key'] = key
        rf2['mtype'] = mtype
        rf2 = rf2.drop(columns='pear')
        rf2 = pd.merge(rf2, lf[['cellId', 'cellType', 'x', 'y', 'metaTypes']], 
                       how='left', 
                       right_on='cellId',
                       left_on='cellId')
        res.append(rf2)
    
res = pd.concat(res)
print(f"{res.shape=}")
res.head()

In [None]:
res['cellType'].value_counts()

In [None]:
plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 6, 3
plt.rcParams['figure.facecolor'] = "w"

alpha = 0.05

pdf = res.copy()
pdfm= pdf.reset_index(drop=False)
mask = (pdf['pval'] < alpha)
pdf = pdf[mask]
pdf = pdf.sort_values(by='cellType')

sns.boxplot(data=pdf, 
            x='cellType',
            y='r',
            hue='mtype',
            showfliers=False,
            boxprops={'zorder' : 2},
            palette='viridis')

plt.legend(loc='upper right', bbox_to_anchor=(1.2, 1.02))
plt.grid(True, zorder=0)
plt.ylabel('Correlation')
plt.xlabel('')

In [None]:
# pdf = res.copy()

# pdf = pd.pivot_table(pdf, 
#                      index=['cellId', 'key', 'cellType'],
#                      columns=['mtype'],
#                      values=['r']).reset_index()

# pdf.columns = ["".join(x) for x in pdf.columns]
# pdf = pdf.sort_values(by=['cellType', 'key'], ascending=[True, False])

# plt.rcParams['figure.dpi'] = 300
# plt.rcParams['figure.figsize'] = 4, 4
# plt.rcParams['figure.facecolor'] = "w"

# sns.scatterplot(data=pdf, 
#                 x='rM1',
#                 y='rM2',
#                 s=2,
#                 alpha=0.6, 
#                 hue='cellType')

In [None]:
pdf = res.copy()

pdf = pd.pivot_table(pdf, 
                     index=['cellId', 'key', 'cellType'],
                     columns=['mtype'],
                     values=['r']).reset_index()

pdf.columns = ["".join(x) for x in pdf.columns]
pdf = pdf.sort_values(by=['cellType', 'key'], ascending=[True, False])

plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 7, 2
plt.rcParams['figure.facecolor'] = "w"

colors = [
    '#9835e4',
    '#e48035',
    '#8e0152',
    '#9acd61',
    '#276419',
]

fig, axs = plt.subplots(1, 3, sharey=True)
axs = axs.ravel()

for i, m in enumerate(['rM0', 'rM1','rM2']):
    
    sns.boxplot(data=pdf, 
                    x=m,
                    y='cellType',
                    # hue='key',
                    palette=colors,
                    # showfliers=False,
                    flierprops={'marker' : ".", 'markersize' : 2},
                    boxprops={'zorder' : 2},
                    ax=axs[i])
    
    axs[i].set_ylabel("")
    axs[i].set_xlabel(f'{m.replace("r", "")}')
    axs[i].axvline(x=0, zorder=1, ls=":", c='k')

    axs[i].set_ylabel("")
    axs[i].set_xlabel(f'{m.replace("r", "")}')
    axs[i].axvline(x=0, zorder=1, ls="--", c='grey')

plt.tight_layout()
# plt.legend(loc='upper right', bbox_to_anchor=(1.5, 1.014))
# plt.ylabel("")
# plt.xlabel("M0 Correlation")
# plt.xlim([-1, 1])

In [None]:
pdf = res.copy()

pdf = pd.pivot_table(pdf, 
                     index=['cellId', 'key', 'cellType'],
                     columns=['mtype'],
                     values=['r']).reset_index()

pdf.columns = ["".join(x) for x in pdf.columns]
pdf = pdf.sort_values(by=['cellType', 'key'], ascending=[True, False])

keyMap = {
    'ND' : "ND", 
    'HFD8' : "8w",
    'HFD14' : "14w",
}

pdf['label'] = pdf['key'].map(keyMap)

plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 7, 2
plt.rcParams['figure.facecolor'] = "w"

fig, axs = plt.subplots(1, 3, sharey=True)
axs = axs.ravel()

for i, m in enumerate(['rM0', 'rM1','rM2']):
    
    sns.boxplot(data=pdf, 
                    x=m,
                    y='label',
                    # hue='key',
                    palette='viridis',
                    # showfliers=False,
                    flierprops={'marker' : ".", 'markersize' : 2},
                    boxprops={'zorder' : 2},
                    ax=axs[i])
    
    axs[i].set_ylabel("")
    axs[i].set_xlabel(f'{m.replace("r", "")}')
    axs[i].axvline(x=0, zorder=1, ls=":", c='k')

    axs[i].set_ylabel("")
    axs[i].set_xlabel(f'{m.replace("r", "")}')
    axs[i].axvline(x=0, zorder=1, ls="--", c='grey')

plt.tight_layout()

plt.tight_layout()
# plt.legend(loc='upper right', bbox_to_anchor=(1.5, 1.014))
# plt.ylabel("")
# plt.xlabel("M0 Correlation")
# plt.xlim([-1, 1])

In [None]:
# get the nd mac1 cells
key = "ND"
rf = rna[key]
lf = labels[key]
lf = lf.drop_duplicates(subset='cellId')

# get macrophages
cellIds = lf[lf['cellType'] == 'Mac1']['cellId'].to_list()
atm = rf[rf.index.isin(cellIds)].mean(axis=0)

print(atm.shape)

# define the lam phenotype
key = "HFD14"
rf = rna[key]
lf = labels[key]
lf = lf.drop_duplicates(subset='cellId')

# get macrophages
cellIds = lf[lf['cellType'] == 'Mac5']['cellId'].to_list()
lam = rf[rf.index.isin(cellIds)].mean(axis=0)
print(lam.shape)

ref = pd.concat([atm, lam], axis=1).reset_index(drop=False)
ref.columns = ['gene', 'atm', 'lam']
ref['diff'] = ref['atm'] - ref['lam']
ref['sum'] = ref['atm'] + ref['lam']

# drop zero-diff genes 
mask = (ref['diff'] != 0)
ref = ref[mask]

genes = ref['gene'].to_list()
print(f"{len(genes)=}")

ref.head()

In [None]:
macTypes = [
    'Mac1',
    'Mac2',
    'Mac3',
    'Mac4',
    'Mac5',
    # 'Monocytes',
]


res = []

for key in keys:
    rf = rna[key]
    lf = labels[key]
    lf = lf.drop_duplicates(subset='cellId')
    
    # get macrophages
    cellIds = lf[lf['cellType'].isin(macTypes)]['cellId'].to_list()
    rf = rf[rf.index.isin(cellIds)]
    
    rf = rf[genes]
    print(f"{key=} {rf.shape=}")
    
    for mtype in ['atm', 'lam']:
        
        v = ref[mtype]
        
        rf2 = rf.apply(lambda x: scipy.stats.pearsonr(np.squeeze(v), x), axis=1).reset_index(drop=False)
        rf2.columns = ['cellId', 'pear']
        rf2['r'], rf2['pval'] = rf2['pear'].str
        rf2['key'] = key
        rf2['mtype'] = mtype
        
        rf2 = rf2.drop(columns='pear')
        rf2 = pd.merge(rf2, lf[['cellId', 'cellType', 'x', 'y', 'metaTypes']], 
                       how='left', 
                       right_on='cellId',
                       left_on='cellId')
        res.append(rf2)
               
res = pd.concat(res)
print(f"{res.shape=}")
res.head()

In [None]:
pdf = res.copy()

pdf = pd.pivot_table(pdf, 
                     index=['cellId', 'key', 'cellType'],
                     columns=['mtype'],
                     values=['r']).reset_index()

pdf.columns = ["".join(x) for x in pdf.columns]
pdf = pdf.sort_values(by=['cellType', 'key'], ascending=[True, False])

keyMap = {
    'ND' : "ND", 
    'HFD8' : "8w",
    'HFD14' : "14w",
}

pdf['label'] = pdf['key'].map(keyMap)

mask = pdf['cellType'].isin(['Mac1', 'Mac4', 'Mac5'])
pdf = pdf[mask]


colors = [
    '#9835e4',
    # '#e48035',
    # '#8e0152',
    '#9acd61',
    '#276419',
]


plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 3, 3
plt.rcParams['figure.facecolor'] = "w"


# group by means
grped = pdf.groupby('cellType').agg(
 lam = ('rlam', np.median),
 atm = ('ratm', np.median)
).reset_index()

sns.scatterplot(data=grped, 
                x='atm',
                y='lam',
                s=50,
                ec='k',
                legend=False,
                palette=colors,
                # palette='Set1',
                zorder=4,
                hue='cellType',
                alpha=0.8)

sns.scatterplot(data=pdf, 
                x='ratm',
                y='rlam',
                s=3,
                ec='k',
                palette=colors,
                # palette='Set1',
                alpha=0.8, 
                hue='cellType')

plt.legend(loc='upper right', bbox_to_anchor=(1.5, 1.5))
plt.gca().invert_xaxis()
plt.ylabel("LAM")
plt.xlabel("ATM")

In [None]:
break

In [None]:



line = Line.from_points(point_a=[0,1], point_b=[1,1])
point = (1.2, 0.5) # example of a point not "within" the line segment


# sns.scatterplot(data=grped, 
#                 x='atm',
#                 y='lam',
#                 s=50,
#                 ec='k',
#                 legend=False,
#                 palette=colors,
#                 # palette='Set1',
#                 zorder=4,
#                 hue='cellType',
#                 alpha=0.8)


# def linfunc(points, m, b):
#     """function to get value along a fitted line """
     
    


In [None]:
break

In [None]:
?np.polyfit

In [None]:
pdf = res.copy()

pdf = pd.pivot_table(pdf, 
                     index=['cellId', 'key', 'cellType'],
                     columns=['mtype'],
                     values=['r']).reset_index()

pdf.columns = ["".join(x) for x in pdf.columns]
pdf = pdf.sort_values(by=['cellType', 'key'], ascending=[True, False])

keyMap = {
    'ND' : "ND", 
    'HFD8' : "8w",
    'HFD14' : "14w",
}

pdf['label'] = pdf['key'].map(keyMap)


sns.stripplot(data=pdf, 
              x='ratm',
              y='cellType')

In [None]:
# sns.lmplot(data=pdf, 
#                 x='ratm',
#                 y='rlam',
#                 # s=3,
#                 # ec='k',
#                 palette=colors,
#                 # alpha=0.6, 
#                 hue='cellType'
#           )

In [None]:
break

In [None]:
# # load marker gene list
# cType = "Macrophages"
# uit = 0.01
# """
# Ubiquitousness Index (0-1). 
# 0 indicates the gene is not expressed in any cell cluster and 
# 1 (maximum) indicates that the gene is expressed in all cell clusters. 
# """

# pandPath = "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/panglaodb/pandb.tsv.gz"        
# pandf = ut.read_panglaodb(pandPath)

# pandf = pandf[pandf['cell type'] == cType]
# pandf = pandf[pandf['ubiquitousness index'] > uit]
# mgenes = list(set(pandf['gene'].to_list()))
# print(f"{len(mgenes)=}")

# pandf.head()

In [None]:
cType = "Macrophages"
t = 0.5 # percent of the population the must express the gene
xId = '00190'
yId = '04145'

xPathId = f"mmu{xId}"
xGenes = ut.parseKEGG(xPathId)
xPathname = ut.getPathname(xPathId)
xGenes = [x.upper() for x in xGenes]
print(f"{xPathname=}  {len(xGenes)=}")

yPathId = f"mmu{yId}"
yGenes = ut.parseKEGG(yPathId)
yPathname = ut.getPathname(yPathId)
yGenes = [x.upper() for x in yGenes]
print(f"{yPathname=} {len(yGenes)=}")

res = []

for key in keys:
    rf = rna[key]
    lf = labels[key]
    
    cellIds = lf[lf['metaTypes'] == cType]['cellId'].to_list()
    rf = rf[rf.index.isin(cellIds)]
    
    # require that pathway genes are expresses
    xCols = [x for x in xGenes if x in rf.columns]
    yCols = [x for x in yGenes if x in rf.columns]
    
    # filter out the genes not expressed in t percent of the 
    # population
    n = len(rf)
    thresh = int(n * t)

    rfBool = rf.astype(bool).astype(int)
    xSums = rfBool[xCols].sum(axis=0)
    ySums = rfBool[yCols].sum(axis=0)
    
    xCols = xSums[xSums > thresh].index.to_list()
    yCols = ySums[ySums > thresh].index.to_list()
    
    print(f"{key} {len(xCols)} genes in {xPathname} in > {thresh} cells ({t*100}%)")
    print(f"{key} {len(yCols)} genes in {yPathname} in > {thresh} cells ({t*100}%)")
    print()
    
    assert(len(xCols) > 0)
    assert(len(yCols) > 0)
    
    xAgg = rf[xCols].mean(axis=1)
    yAgg = rf[yCols].mean(axis=1)
    
    # xAgg = rf[xCols].median(axis=1)
    # yAgg = rf[yCols].median(axis=1)
    
    af = pd.concat([xAgg, yAgg], axis=1)
    af.columns = ['x', 'y']
    af = af.reset_index(drop=False)
    af['key'] = key
    
    res.append(af)

res = pd.concat(res, ignore_index=True)
res.head()

In [None]:
plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 5, 5
plt.rcParams['figure.facecolor'] = "w"

pdf = res.copy()
pdf['xlog'] = np.log(pdf['x'])
pdf['ylog'] = np.log(pdf['y'])

g = sns.lmplot(data=pdf,
               x="xlog", 
               y="ylog", 
               col="key", 
               hue="key", 
               palette='viridis',
               scatter_kws={'linewidths': 0.5,
                            'edgecolor': 'k'},
               y_jitter=.02, 
               truncate=False)


g.set_axis_labels(xPathname.replace("pathway", ""), 
                  yPathname.replace("pathway", ""))

# g.set_titles("ND", "8w", "14w")
plt.tight_layout()

In [None]:
break

In [None]:
pathways = {    
    "00561" :  "Glycerolipid metabolism",
    "00600" :  "Sphingolipid metabolism",
    "00190" :  "Oxidative phosphorylation",
    "04142" :   "Lysosome",
    "04146" :   "Peroxisome",
    "04140" :   "Autophagy",
    "04210" :   "Apoptosis",
    "04216" :   "Ferroptosis",
    "04217" :   "Necroptosis",
}

pwayGenes = {}

for pid, pname in pathways.items():
    pidFull = f"mmu{pid}"
    g = ut.parseKEGG(pidFull)
    g = [x.upper() for x in g]
    print(f"{pname=} {len(g)}")
    pwayGenes[pname] = g

print('done')

In [None]:
res = []

macTypes = [
    'Mac1',
    'Mac2',
    'Mac3',
    'Mac4',
    'Mac5',
]

for key in keys:
    rf = rna[key]
    lf = labels[key]
    lf = lf.drop_duplicates(subset='cellId')
    
    cellIds = lf[lf['cellType'].isin(macTypes)]['cellId'].to_list()
    rf = rf[rf.index.isin(cellIds)]
    print(f"{key}...")
    
    for pid, pname in pathways.items():
        genes = pwayGenes[pname]
        exG = [x for x in genes if x in rf.columns]
        
        meanExp = rf[exG].mean(axis=1).reset_index(drop=False)
        meanExp.columns = ['cellId', 'mean']
        meanExp['pway'] = pname
        meanExp['key'] = key
        
        meanExp = pd.merge(meanExp, lf[['cellId', 'cellType', 'x', 'y', 'metaTypes']], 
                       how='left', 
                       right_on='cellId',
                       left_on='cellId')
        
        res.append(meanExp)
    
res = pd.concat(res)
print(f"{res.shape=}")
res.head()

In [None]:
pdf = res.copy()
pdf = pdf.fillna(0)
print(f"{pdf.shape=}")
pdf = pdf[pdf['mean'] > 0]
print(f"{pdf.shape=}")

g = sns.FacetGrid(pdf, 
                  row="pway", 
                  hue="key", 
                  height=1, 
                  aspect=6,
                  palette='viridis')

# Draw the densities in a few steps
g.map(sns.kdeplot, "mean",
      bw_adjust=.5, 
      clip_on=False,
      fill=True, 
      log_scale=True,
      alpha=0.8, 
      linewidth=1.5)

g.map(sns.kdeplot, "mean", 
      clip_on=False, 
      color="k", 
      log_scale=True,
      lw=1, 
      bw_adjust=.5)

g.set_titles("")

g.set(yticks=[],
      ylabel="")

g.despine(bottom=False, left=True)

for ax, label in zip(g.axes.flat, pdf['pway'].unique()):
    ax.text(0, 1, label, 
            # fontweight="bold", 
            # color=color,
            ha="left", 
            va="center", 
            transform=ax.transAxes)


plt.legend(loc='upper right', bbox_to_anchor=(1.4, 1.02))
plt.xlabel('Mean Expression')

In [None]:
# # try with fold changes

# lfdf = []

# macTypes = [
#     'Mac1',
#     'Mac2',
#     'Mac3',
#     'Mac4',
#     'Mac5',
# ]

# for key in keys:
#     rf = rna[key]
#     lf = labels[key]
#     lf = lf.drop_duplicates(subset='cellId')
    
    
    
# #     cellIds = lf[lf['cellType'].isin(macTypes)]['cellId'].to_list()
# #     rf = rf[rf.index.isin(cellIds)]
# #     print(f"{key}...")
    
#     for mtype in macTypes:
        
#         cellIds = lf[lf['cellType'] == mtype]['cellId'].to_list()
        
#         mask = rf.index.isin(cellIds)
#         inGroup = rf[mask]
#         outGroup = rf[~mask]
        
#         for g in rf.columns:
            
#             score, pval = scipy.stats.ranksums(inGroup[g], 
#                                            outGroup[g],
#                                            alternative='two-sided')

#             lfc = np.log2(inGroup[g].mean()+1) - np.log2(outGroup[g].mean()+1)
            
#             row = {
#                 'key' : key,
#                 'mtype' : mtype,
#                 'gene' : g,
#                 'score' : score,
#                 'pval' : pval,
#                 'log2foldchange' : lfc, 
#                 'inGroup' : inGroup[g].mean(),
#                 'outGroup' : outGroup[g].mean(),
#             }
#             lfdf.append(row)
            

        
        
        
        
        
#     break
    
    
# lfdf = pd.DataFrame(lfdf)
# lfdf.head(10)



In [None]:
res = []

macTypes = [
    'Mac1',
    'Mac2',
    'Mac3',
    'Mac4',
    'Mac5',
]

pwayGenes = {
    'Proliferation' : ['MKI67']
}

for key in keys:
    rf = rna[key]
    lf = labels[key]
    lf = lf.drop_duplicates(subset='cellId')
    
    cellIds = lf[lf['cellType'].isin(macTypes)]['cellId'].to_list()
    rf = rf[rf.index.isin(cellIds)]
    print(f"\n{key}...")
    
    for pname, genes in pwayGenes.items():
        xGenes = [x for x in genes if x in rf.columns]
        
        pdf = rf[xGenes].reset_index(drop=False)
        print(pdf.head())
        
        # print(pname)
        break
        
    break
        
#         print(f"{pname} {len(xGenes)}")
        
#         meanExp = rf[xGenes].astype(bool).sum(axis=1).reset_index(drop=False)
#         meanExp.columns = ['cellId', 'mean']
#         meanExp['meanNorm'] = meanExp['mean'] / len(xGenes)
#         meanExp['pway'] = pname
#         meanExp['key'] = key
        
#         meanExp = pd.merge(meanExp, lf[['cellId', 'cellType','metaTypes']], 
#                        how='left', 
#                        right_on='cellId',
#                        left_on='cellId')
        
#         res.append(meanExp)

    
# res = pd.concat(res)
# print(f"{res.shape=}")
# res.head()

In [None]:
t = rf[xGenes].astype(float)

plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 10, 10
plt.rcParams['figure.facecolor'] = "w"

sns.heatmap(data=t.T)

In [None]:
# t = 0.1 # require t percent of the cells to express this gene

# res = []

# macTypes = [
#     'Mac1',
#     'Mac2',
#     'Mac3',
#     'Mac4',
#     'Mac5',
# ]

# for key in keys:
#     rf = rna[key]
#     lf = labels[key]
#     lf = lf.drop_duplicates(subset='cellId')
    
#     cellIds = lf[lf['cellType'].isin(macTypes)]['cellId'].to_list()
#     rf = rf[rf.index.isin(cellIds)]
#     print(f"\n{key}...")
    
#     n = len(rf)
#     thresh = int(n * t)
#     rfBool = rf.astype(bool).astype(int)
    
#     for pid, pname in pathways.items():
#         genes = pwayGenes[pname]
#         xGenes = [x for x in genes if x in rf.columns]
        
#         geneCounts = rfBool[xGenes].sum(axis=0)
#         cols = geneCounts[geneCounts > thresh].index.to_list()
#         print(f"{pname=} {len(cols)=}")

#         # meanExp = rf[cols].mean(axis=1).reset_index(drop=False)
#         meanExp = rf[cols].max(axis=1).reset_index(drop=False)
#         meanExp.columns = ['cellId', 'mean']
#         meanExp['pway'] = pname
#         meanExp['key'] = key
        
#         meanExp = pd.merge(meanExp, lf[['cellId', 'cellType','metaTypes']], 
#                        how='left', 
#                        right_on='cellId',
#                        left_on='cellId')
        
#         res.append(meanExp)

    
# res = pd.concat(res)
# print(f"{res.shape=}")
# res.head()

In [None]:

res = []

macTypes = [
    'Mac1',
    'Mac2',
    'Mac3',
    'Mac4',
    'Mac5',
]

for key in keys:
    rf = rna[key]
    lf = labels[key]
    lf = lf.drop_duplicates(subset='cellId')
    
    cellIds = lf[lf['cellType'].isin(macTypes)]['cellId'].to_list()
    rf = rf[rf.index.isin(cellIds)]
    print(f"\n{key}...")
    
    n = len(rf)
    thresh = int(n * t)
    rfBool = rf.astype(bool).astype(int)
    
    for pid, pname in pathways.items():
        genes = pwayGenes[pname]
        xGenes = [x for x in genes if x in rf.columns]
        
        print(f"{pname} {len(xGenes)}")
        
        meanExp = rf[xGenes].astype(bool).sum(axis=1).reset_index(drop=False)
        meanExp.columns = ['cellId', 'mean']
        meanExp['meanNorm'] = meanExp['mean'] / len(xGenes)
        meanExp['pway'] = pname
        meanExp['key'] = key
        
        meanExp = pd.merge(meanExp, lf[['cellId', 'cellType','metaTypes']], 
                       how='left', 
                       right_on='cellId',
                       left_on='cellId')
        
        res.append(meanExp)

    
res = pd.concat(res)
print(f"{res.shape=}")
res.head()

In [None]:
# pdf = res.copy()
# pdf = pdf.fillna(0)
# print(f"{pdf.shape=}")
# pdf = pdf[pdf['mean'] > 0]
# pdf['index'] = pdf['key'] + "_" + pdf['cellId']
# pdf = pdf.set_index('index')
# print(f"{pdf.shape=}")

# k = pdf[['pway', 'mean']].value_counts().reset_index(drop=False)
# k.columns = ['pway', 'geneCount', 'cellCount']
# k.head()

# sns.barplot(data=k, 
#             y='cellCount',
#              x="geneCount",)

In [None]:
res.head()

In [None]:
pdf = res.copy()
pdf = pdf.fillna(0)
print(f"{pdf.shape=}")
pdf = pdf[pdf['mean'] > 0]
pdf['index'] = pdf['key'] + "_" + pdf['cellId']
pdf = pdf.set_index('index')
pdf = pdf.reset_index(drop=False)
print(f"{pdf.shape=}")

# # mask = pdf['cellType'].isin(['Mac1', 'Mac4', 'Mac5'])
# # pdf = pdf[mask]

g = sns.FacetGrid(pdf, 
                  row="pway", 
                  hue="cellType", 
                  height=1, 
                  aspect=6,
                  palette='Spectral')

# Draw the densities in a few steps
g.map(sns.stripplot, 
      "meanNorm", 
      # orient='h',
      # stat='probability',
      # bw_adjust=.5, 
      # clip_on=False,
      # fill=True, 
      # log_scale=True,
      alpha=0.8)


g.set_titles("")

g.set(ylabel="", 
      # ylim=[0, 0.2],
      # yticks=[],
     )
# # g.despine(bottom=False, left=True)

# for ax, label in zip(g.axes.flat, pdf['pway'].unique()):
#     ax.text(0.1, 1, label, 
#             # fontweight="bold", 
#             # color=color,
#             ha="left", 
#             va="center", 
#             transform=ax.transAxes)


# plt.legend(loc='upper right', bbox_to_anchor=(1.4, 1.02))
# # plt.xlabel('Mean Expression')

In [None]:
# pdf = res.copy()
# print(f"{pdf.shape=}")
# pdf = pdf[pdf['mean'] > 0]
# print(f"{pdf.shape=}")


# # pdf = pd.pivot_table(pdf, 
# #                      index=['pway', 'cellId'],
# #                      columns='key').reset_index(drop=False)

# # pdf.columns = pdf.columns.droplevel()
# # pdf.columns = ['pway', 'cellId', 'HFD14', 'HFD8', "ND"]
# # pdf = pdf[['pway', 'cellId', "ND", "HFD8", "HFD14"]]
# # pdf = pdf.fillna(0)

# plt.rcParams['figure.dpi'] = 300
# plt.rcParams['figure.facecolor'] = "w"


# sns.stripplot(data=pdf,
#               x='mean',
#               y='pway',
#               hue='key',
#               dodge=True,
#               jitter=True,
#               alpha=0.1,
#               zorder=3,
#               palette='viridis',
#               size=2,)
              
# plt.xscale('log')
# plt.grid(True, zorder=1)
# plt.legend(loc='upper right', bbox_to_anchor=(1.4, 1.015))



In [None]:
# sns.swarmplot(data=pdf, 
#               x="mean", 
#               y="pway", 
#               hue="key",
#               dodge=True, 
#               s=2,
#               alpha=.1, 
#               zorder=2,
#               palette='viridis',
# )
# plt.grid(True, zorder=0)

# plt.xscale('log')

# plt.legend(loc='upper right')
# plt.ylabel("")
# plt.xlabel('Mean Expression (CPM)')

In [None]:
# plt.rcParams['figure.dpi'] = 300
# plt.rcParams['figure.figsize'] = 8, 8
# plt.rcParams['figure.facecolor'] = "w"

# pdf = res.copy()
# pdf['xlog'] = np.log(pdf['x'])
# pdf['ylog'] = np.log(pdf['y'])

# g = sns.catplot(data=pdf,
#                 x="xlog", 
#                 y="ylog", 
#                 hue="key", 
#                 # col="diet",
#                 capsize=.2, 
#                 palette="viridis", 
#                 errorbar="se",
#                 kind="point", 
#                 height=6, 
#                 aspect=.75,
# )

In [None]:
# plt.rcParams['figure.dpi'] = 300
# plt.rcParams['figure.figsize'] = 8, 8
# plt.rcParams['figure.facecolor'] = "w"

# pdf = res.copy()
# pdf['xlog'] = np.log(pdf['x'])
# pdf['ylog'] = np.log(pdf['y'])

# g = sns.swarmplot(data=pdf, 
#                   x="xlog", 
#                   y="ylog", 
#                   hue="key")


In [None]:
plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 8, 8
plt.rcParams['figure.facecolor'] = "w"

pdf = res.copy()
pdf['xlog'] = np.log(pdf['x'])
pdf['ylog'] = np.log(pdf['y'])

h = sns.jointplot(data=pdf,
              dropna=True,
              x='xlog', 
              y='ylog', 
              hue='key',
              kind='kde',
              alpha=0.5,
              ec='k',
              size=10,
              linewidths=0.5,
              palette='viridis')

h.set_axis_labels(xPathname, 
                  yPathname, 
                  fontsize=16)

