In [1]:
import pandas as pd
import numpy as np
import os
import sys
import seaborn as sns
import matplotlib.pyplot as plt
import gget
import utils as ut
import matplotlib.patches as mpatches

In [2]:
cardOutDir = "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/CARDOuputs/"

dfList = []

for f in os.listdir(cardOutDir):
    
    if 'macrophage' in f and not 'refined' in f:
        fullPath = f"{cardOutDir}{f}"
        key = f.split("_")[0]
        df = pd.read_csv(fullPath)
        df = df.rename(columns={'Unnamed: 0' : 'spotId'})
        df['key'] = key
        dfList.append(df)

df = pd.concat(dfList, ignore_index=True)
print(f"{df.shape=}")
print(df['key'].value_counts())
print()
df.head()

df.shape=(7418, 13)
HFD8     3391
ND       2034
HFD14    1993
Name: key, dtype: int64



Unnamed: 0,spotId,Mac5,Monocytes,T cells,Mac4,Mac1,NK cells,Dendritic cells,Stromal cells,B cells,Mac2,Mac3,key
0,AAACATTTCCCGGATT.1,0.219997,0.041542,0.055275,0.007548,0.004188,0.17086,0.03468007,0.409201,0.04104,0.004757,0.010912,HFD14
1,AAACCGGGTAGGTACC.1,0.450618,0.129969,0.04007,0.088368,0.013757,0.019633,0.001280364,0.187514,0.031021,0.028867,0.008903,HFD14
2,AAACCGTTCGTCCAGG.1,0.310869,0.10834,0.044845,0.004256,0.004016,0.187004,0.0003752028,0.283055,0.03248,0.02156,0.0032,HFD14
3,AAACCTAAGCAGCCGG.1,0.558332,0.057287,0.052469,0.009084,0.023214,0.043516,1.198581e-07,0.239973,0.011113,0.004948,6.4e-05,HFD14
4,AAACCTCATGAAGTTG.1,0.234411,0.158933,0.041466,0.029749,0.037127,0.122802,0.01377525,0.216199,0.049224,0.053188,0.043126,HFD14


In [3]:
sptDir =  "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/CARDInputs/"

spt = {}

for f in os.listdir(sptDir):
    if "spt" in f and "macrophage" in f:
        print(f)
        fullPath = f"{sptDir}{f}"
        key = f.split("_")[0]
        sdf = pd.read_csv(fullPath)
        sdf = sdf.rename(columns={'Unnamed: 0' : 'gene'})
        sdf = sdf.set_index('gene')
        sdf = sdf.T
        sdf.index = sdf.index.str.replace("-", ".")
        spt[key] = sdf
    
print('done')

HFD8_macrophage_spt.csv
HFD14_macrophage_spt.csv
ND_macrophage_spt.csv
done


In [4]:
coordDir = "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/CARDInputs/"

dfList = []

for f in os.listdir(coordDir):
    if "coord" in f and "macrophage" in f:
        fullPath = f"{coordDir}{f}"
        key = f.split("_")[0]
        cdf = pd.read_csv(fullPath)
        cdf = cdf.rename(columns={'Unnamed: 0' : 'spotId'})
        cdf['key'] = key
        dfList.append(cdf)

cdf = pd.concat(dfList, ignore_index=True)
print(f"{cdf.shape=}")
print(cdf['key'].value_counts())
print()
cdf.head()

cdf.shape=(7424, 4)
HFD8     3394
ND       2036
HFD14    1994
Name: key, dtype: int64



Unnamed: 0,spotId,x,y,key
0,AAACAAGTATCTCCCA.1,1263.912793,1164.571691,ND
1,AAACAGCTTTCAGAAG.1,331.221482,1043.23363,ND
2,AAACAGGGTCTATATT.1,371.414081,1112.975138,ND
3,AAACATTTCCCGGATT.1,1213.938136,1356.563577,ND
4,AAACCCGAACGAAATC.1,1394.17128,1077.242752,ND


In [5]:
"""Merge CARD predictions with spatial coordinates"""
df = pd.merge(cdf, df,
              how='left', 
              left_on=['spotId', 'key'],
              right_on=['spotId', 'key'])

df = df.fillna(0)
df.head()

Unnamed: 0,spotId,x,y,key,Mac5,Monocytes,T cells,Mac4,Mac1,NK cells,Dendritic cells,Stromal cells,B cells,Mac2,Mac3
0,AAACAAGTATCTCCCA.1,1263.912793,1164.571691,ND,0.040438,0.020917,0.007951,0.05702,0.017706,0.011742,0.001127,0.815034,0.009559,0.017354,0.001153
1,AAACAGCTTTCAGAAG.1,331.221482,1043.23363,ND,0.00675,0.032602,0.003236,0.013079,0.004132,0.008373,0.00165,0.919321,0.001394,0.006769,0.002694
2,AAACAGGGTCTATATT.1,371.414081,1112.975138,ND,0.011082,0.020347,0.016427,0.037635,0.015098,0.016469,0.000553,0.850407,0.021293,0.010147,0.000543
3,AAACATTTCCCGGATT.1,1213.938136,1356.563577,ND,0.046854,0.009198,0.005974,0.056906,0.013504,0.00843,0.002188,0.842345,0.003075,0.010028,0.001498
4,AAACCCGAACGAAATC.1,1394.17128,1077.242752,ND,0.023904,0.019047,0.017204,0.029673,0.020391,0.024473,0.004858,0.821855,0.01961,0.016781,0.002202


In [6]:
imDir = "/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/images/"

imgs = {}

for key in df['key'].unique():
    imPath = f"{imDir}{key}.npy"
    img = np.load(imPath)
    print(key, img.shape)
    imgs[key] = img

print('done')

FileNotFoundError: [Errno 2] No such file or directory: '/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/images/ND.npy'

In [None]:
# cTypes = [
#     'Macrophages', 
#     'Monocytes',
#     'T cells',
#     'NK cells', 
#     'Dendritic cells', 
#     'Stromal cells', 
#     'B cells'
# ]

# def vcorrcoef(X,y):
#     """Fast vector matrix correlation """
#     Xm = np.reshape(np.mean(X,axis=1),(X.shape[0],1))
#     ym = np.mean(y)
#     r_num = np.sum((X-Xm)*(y-ym),axis=1)
#     r_den = np.sqrt(np.sum((X-Xm)**2,axis=1)*np.sum((y-ym)**2))
#     r = r_num/r_den
#     return r

# corrs = {}

# keys = sorted(df['key'].unique(), reverse=True)

# for key in keys:
#     pdf = df[df['key'] == key]
    
#     sdf = spt[key]
    
#     # make sure the spot order is the same
#     assert(pdf['spotId'].to_list() == sdf.index.to_list())
    
#     rf = pd.DataFrame({'Gene' : sdf.columns})

#     for ctype in cTypes:
#         r = vcorrcoef(sdf.to_numpy().T, pdf[ctype].to_numpy().T)        
#         rf[ctype] = r
        
#     rf = rf.fillna(0)
#     corrs[key] = rf
    
# print('done')

In [None]:
# n = 100
# alpha = 0.05
# db = 'celltypes'

# res = []

# for key in keys:
#     rf = corrs[key]
    
#     for ctype in cTypes:
#         genes = rf.sort_values(by=ctype, ascending=False)['Gene'].head(n).to_list()
#         ef = gget.enrichr(genes, database=db)        
#         ef = ef[ef['p_val'] <= alpha]
        
#         # print(ctype)
#         # print(ef['path_name'].head())
#         # print()
        
#         isSig = False
        
#         if ctype.lower() in [x.lower() for x in ef['path_name'].to_list()]:
#             isSig = True
            
#         if ctype == 'Stromal cells' and 'adipocytes' in [x.lower() for x in ef['path_name'].to_list()]:
#             isSig = True
            
#         row = {
#             'key' : key,
#             'cellType' : ctype,
#             'signifcant' : isSig,
#             'nGenes' : n,
#         }
        
#         res.append(row)
            
# res = pd.DataFrame(res)

# r = pd.pivot_table(res, 
#                index='cellType', 
#                columns='key',
#                values='signifcant')
# r = r[r.columns[::-1]]

# plt.rcParams['figure.dpi'] = 300
# plt.rcParams['figure.dpi'] = 300
# sns.heatmap(data=r,
#            cmap='RdYlGn', 
#            linewidths=0.3, 
#            linecolor='k')

# plt.yticks(rotation=0)
# plt.ylabel("")
# plt.yticks(rotation=0)
# plt.xlabel("")

In [None]:
# break

In [None]:
cTypes = [
    'Mac1',
    'Mac2',
    'Mac3',
    'Mac4',
    'Mac5',
    'Monocytes',
    'T cells',
    'NK cells', 
    'Dendritic cells', 
    'Stromal cells', 
    'B cells'
]

trim = {
    'ND' : {'xlim' :  [220, 1500], 'ylim' : [1600, 630]},
    'HFD8' : {'xlim' :  [190, 1350], 'ylim' : [1680, 400]},
    'HFD14' : {'xlim' :  [250, 1400], 'ylim' : [1650, 450]},
}

layer = 1

plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.facecolor'] = "w"
plt.rcParams['figure.figsize'] = 8,6

colors = ut.ncolor(len(cTypes), cmap='Spectral')
colorDict = dict(zip(sorted(cTypes), colors))



for key in sorted(df['key'].unique(), reverse=True):
    pdf = df[df['key'] == key]
    pdf = pdf.set_index('spotId')
    img = imgs[key]
    
    pdf['cType'] = pdf[cTypes].apply(lambda x: cTypes[np.argsort(x)[-layer]], axis=1)
    pdf['color'] = pdf['cType'].map(colorDict) 
    
    handles = []
    
    for ctype in sorted(cTypes):
        handle = mpatches.Patch(color=colorDict[ctype], 
                                ec='k', 
                                label=ctype)
        handles.append(handle)
    
    plt.scatter(pdf['x'],
                pdf['y'],
                c=pdf['color'],
                ec='k',
                linewidth=0.5)
    
#     plt.imshow(img, 
#                cmap='binary')
    
    _ = plt.xticks([])
    _ = plt.yticks([])
    plt.title(f'{key} Layer {layer}')
    ax = plt.gca()
    xlim = trim[key]['xlim']
    ylim = trim[key]['ylim']
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    plt.legend(handles=handles, loc='upper right', prop={'size': 8})
    plt.show()
    
    break


In [None]:
layer = 1

plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.facecolor'] = "w"
plt.rcParams['figure.figsize'] = 12, 4

colors = ut.ncolor(len(cTypes), cmap='Spectral')
colorDict = dict(zip(sorted(cTypes), colors))

res = []

# fig, axs = plt.subplots(1, 3, sharey=True)

for i, key in enumerate(sorted(df['key'].unique(), reverse=True)):
    pdf = df[df['key'] == key]
    pdf = pdf.set_index('spotId')
    img = imgs[key]
    
    pdf['cType'] = pdf[cTypes].apply(lambda x: cTypes[np.argsort(x)[-layer]], axis=1)
    
    props = pd.DataFrame(pdf['cType'].value_counts(normalize=True).reset_index())
    props.columns = ['cellType', 'proportion']
    props['color'] = props['cellType'].map(colorDict) 
    props = props.sort_values(by='cellType')
    props['key'] = key
    
    res.append(props)
    
#     axs[i].barh(props['cellType'],
#                 props['proportion'],
#                 color=props['color'],
#                 edgecolor='k')
    
#     axs[i].set_xlabel('Proportion of Spots')
#     axs[i].set_title(f"{key} Layer {layer}")
#     axs[i].set_xlim([0, 1])
#     # break
# plt.tight_layout()
# plt.show()

res = pd.concat(res,ignore_index=True)
res = res.sort_values(by='key', ascending=False)
res2 = pd.pivot_table(res, values='proportion', index=['cellType'], columns='key').reset_index()
res2 = res2.set_index('cellType')
# res2.columns = ['ND', 'HFD8', 'HFD14']

plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = 3, 3
plt.rcParams['figure.facecolor'] = 'w'

# res = res.set_index('cellType')
res2.T.plot(kind='bar', stacked=True, edgecolor='k', color=['#9e0142', '#e95c47', '#fdbf6f', '#ffffbe', '#bfe5a0', '#54aead', '#5e4fa2'])
plt.legend(bbox_to_anchor=(1.05, 1.025))
plt.ylabel('Proportion of Spots')
plt.gca().invert_xaxis()
plt.title(f"Layer {layer}")
_ = plt.xticks(rotation=0)
plt.xlabel("")

In [None]:
break

In [None]:
cellType = "Macrophages"
cmap = 'Reds'
query = ['CD9', 'TREM2', 'PLIN2', 'CD63']

plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.facecolor'] = "w"
plt.rcParams['figure.figsize'] = 12, 3

for key in sorted(df['key'].unique(), reverse=True):
    pdf = df[df['key'] == key]
    pdf = pdf.set_index('spotId')
    img = imgs[key]
    sdf = spt[key]
    
    fig, axs = plt.subplots(1, len(query)+1, sharey=True)
    axs = axs.ravel()

    sns.scatterplot(data=pdf,
                    x='x',
                    y='y',
                    hue=cellType,
                    palette=cmap,
                    linewidth=0,
                    alpha=0.8,
                    zorder=2,
                    legend=False,
                    s=10,
                    ax=axs[0])

    # axs[0].imshow(img, 
    #               cmap='binary',
    #               zorder=1)
    """build color bar"""
    norm = plt.Normalize(pdf[cellType].min(), 
                         pdf[cellType].max())

    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])

    axs[0].set_xlabel("")
    axs[0].set_ylabel("")
    _ = axs[0].set_xticks([])
    _ = axs[0].set_yticks([])
    
    xlim = trim[key]['xlim']
    ylim = trim[key]['ylim']
    axs[0].set_xlim(xlim)
    axs[0].set_ylim(ylim)
    axs[0].set_title(f"{key} {cellType}")
    # axs[0].colorbar(sm)
    
    
    for i, q in enumerate(query):
        pi = i + 1
        mask = sdf[q.upper()] > 0
        plotdf = pdf[mask]
        color = sdf[mask][q.upper()]
        
        sns.scatterplot(data=plotdf,
                x='x',
                y='y',
                # hue=color,
                hue=color, 
                # ec='k',     
                palette=cmap,
                linewidth=0.1,
                alpha=0.8,
                legend=False,
                zorder=2,
                s=10,
                ax=axs[pi])
        
        # axs[pi].imshow(img, 
        #                cmap='binary',
        #                zorder=1)
        axs[pi].set_xlim(xlim)
        axs[pi].set_ylim(ylim)
        axs[pi].set_title(str(q).lower().capitalize())
        _ = axs[pi].set_xticks([])
        _ = axs[pi].set_yticks([])

    plt.tight_layout()
    plt.show()
    # break