In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

import anndata as ad
import scanpy as sc

from scipy import stats
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from umap import UMAP


from scroutines import basicu
from scroutines import powerplots
from scroutines.miscu import is_in_polygon

import utils_merfish
from merfish_datasets import merfish_datasets
from merfish_genesets import get_all_genesets

import importlib
importlib.reload(powerplots)
importlib.reload(utils_merfish)

In [None]:
np.random.seed(0)

In [None]:
def binning_pipe(adata, n=20, layer='lnorm', bin_type='depth_bin'):
    """
    """
    assert bin_type in ['depth_bin', 'width_bin']
    # bin it 
    depth_bins, depth_binned = utils_merfish.binning(adata.obs['depth'].values, n)
    width_bins, width_binned = utils_merfish.binning(adata.obs['width'].values, n)

    norm_ = pd.DataFrame(adata.layers[layer], columns=adata.var.index)
    norm_['depth_bin'] = depth_binned
    norm_['width_bin'] = width_binned
    
    norm_mean = norm_.groupby(bin_type).mean(numeric_only=True)
    norm_sem  = norm_.groupby(bin_type).sem(numeric_only=True)
    norm_std  = norm_.groupby(bin_type).std(numeric_only=True)
    norm_n    = norm_[bin_type].value_counts(sort=False)

    return norm_mean, norm_sem, norm_std, norm_n, depth_binned, width_binned, depth_bins, width_bins

def binning_pipe2(adata, col_to_bin, layer, bins=None, n=20):
    """
    """
    if bins is None:
        # bin it 
        bins, binned = utils_merfish.binning(adata.obs[col_to_bin].values, n)
    else:
        binned = pd.cut(adata.obs[col_to_bin].values, bins=bins)

    norm_ = pd.DataFrame(adata.layers[layer], columns=adata.var.index)
    norm_['thebin'] = binned
    
    norm_mean = norm_.groupby('thebin').mean(numeric_only=True)
    norm_sem  = norm_.groupby('thebin').sem(numeric_only=True)
    norm_std  = norm_.groupby('thebin').std(numeric_only=True)
    norm_n    = norm_['thebin'].value_counts(sort=False)

    return norm_mean, norm_sem, norm_std, norm_n, binned, bins 

In [None]:
genesets = get_all_genesets()
genesets

In [None]:
agenes = genesets['a']
bgenes = genesets['b']
cgenes = genesets['c']
iegs   = genesets['i']
up_agenes = genesets['a_up']
abcgenes = np.hstack([agenes, bgenes, cgenes])
len(abcgenes), len(iegs)

In [None]:
directories = merfish_datasets
print(merfish_datasets)

ddir = "/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/merfish/organized" 
!ls $ddir/*240411.h5ad 

In [None]:
%%time
fin = os.path.join(ddir, 'P28NRDR_v1l23_merged_240508.h5ad')
adata = ad.read(fin) 

clst_annots = adata.uns['clst_annots']
pcs = adata.obsm['pca']
ucs = adata.obsm['umap']
r = 0.3

names = [
    'P28NR_ant', 
    'P28NR_pos',
    'P28NRb_ant', 
    'P28NRb_pos',
    
    'P28DR_ant', 
    'P28DR_pos',
    'P28DRb_ant', 
    'P28DRb_pos',
]
adata

In [None]:
agenes_idx = basicu.get_index_from_array(adata.var.index.values, agenes)
bgenes_idx = basicu.get_index_from_array(adata.var.index.values, bgenes)
cgenes_idx = basicu.get_index_from_array(adata.var.index.values, cgenes)
igenes_idx = basicu.get_index_from_array(adata.var.index.values, iegs)

# plot everything 

In [None]:

clsts = adata.obs[f'leiden_r{r}'].astype(int)
xr =  adata.obs['width_show']
yr =  adata.obs['depth_show']
ux    = adata.obsm['umap'][:,0]
uy    = adata.obsm['umap'][:,1]
utils_merfish.plot_cluster(clsts, xr, yr, ux, uy, s=2)

samples, uniq_labels = pd.factorize(adata.obs['sample']) # .astype(int)
utils_merfish.plot_cluster(samples, xr, yr, ux, uy, s=2)

In [None]:
np.unique(clsts, return_counts=True)

In [None]:
clsts = adata.obs[f'leiden_r{r}'].astype(int)
uniq_clsts = np.unique(clsts)
n = len(uniq_clsts)

fig, axs = plt.subplots(1,n,figsize=(n*4,1*4))
for i, clst in enumerate(uniq_clsts):
    show = (clsts == clst)
    xr =  adata.obs['width_show']
    yr =  adata.obs['depth_show']
    ux    = adata.obsm['umap'][:,0]
    uy    = adata.obsm['umap'][:,1]
    
    ax = axs[i]
    ax.scatter(ux, uy, c=show, s=1, edgecolor='none', cmap='rocket_r', rasterized=True)
    ax.set_aspect('equal')
    ax.axis('off')
    ax.set_title(f"C{clst}")
    
fig, axs = plt.subplots(1,n,figsize=(n*4,1*6))
for i, clst in enumerate(uniq_clsts):
    show = (clsts == clst)
    xr =  adata.obs['width_show']
    yr =  adata.obs['depth_show']
    ux    = adata.obsm['umap'][:,0]
    uy    = adata.obsm['umap'][:,1]
    
    ax = axs[i]
    ax.scatter(xr, yr, c=show, s=3, edgecolor='none', cmap='rocket_r', rasterized=True)
    ax.set_aspect('equal')
    ax.axis('off')
    ax.set_title(f"C{clst}")
    
    
    # utils_merfish.plot_cluster(show, xr, yr, ux, uy, s=2, cmap=plt.cm.copper_r, suptitle=clst)

In [None]:
clsts = adata.obs[f'leiden_r{r}'].astype(int)
uniq_clsts = np.unique(clsts)
n = len(uniq_clsts)

fig, ax = plt.subplots(1,1,figsize=(1*4,1*4))
show = [_c in [0,1,2] for _c in clsts]
xr =  adata.obs['width_show']
yr =  adata.obs['depth_show']
ux    = adata.obsm['umap'][:,0]
uy    = adata.obsm['umap'][:,1]

ax.scatter(ux, uy, c=show, s=1, edgecolor='none', cmap='rocket_r', rasterized=True)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title(f"C0,C1,C2")
    
fig, ax = plt.subplots(1,1,figsize=(1*4,1*6))
show = [_c in [0,1,2] for _c in clsts]
xr =  adata.obs['width_show']
yr =  adata.obs['depth_show']
ux    = adata.obsm['umap'][:,0]
uy    = adata.obsm['umap'][:,1]

ax.scatter(xr, yr, c=show, s=3, edgecolor='none', cmap='rocket_r', rasterized=True)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title(f"C0,C1,C2")


# utils_merfish.plot_cluster(show, xr, yr, ux, uy, s=2, cmap=plt.cm.copper_r, suptitle=clst)

# figure out major cell population by marker genes; groups of genes; and quality metrics

In [None]:
# plot
marker_genes = [
       'Ptprn', 'Slc17a7', 'Gad1', 'Fos', 
       
       'Gfap', 'Slc6a13', 'Slc47a1',
       'Grin2c', 'Aqp4', 'Rfx4', 'Sox21', 'Slc1a3',
       
       'Sox10', 'Pdgfra', 'Mog',
       
       'Pecam1', 'Cd34' , 'Tnfrsf12a', 'Sema3c', 
       'Zfhx3', 'Pag1', 'Slco2b1', 'Cx3cr1',
      ] 
gns = marker_genes
n = len(gns)
nx = 4
ny = int((n+nx-1)/nx)
# add some quality metrics
fig, axs = plt.subplots(ny,nx,figsize=(nx*5,ny*4))
for gn, ax in zip(gns, axs.flat):
    g = adata[:,gn].layers['jnorm'].reshape(-1,)
    utils_merfish.st_scatter_ax(fig, ax, ucs[:,0], ucs[:,1], gexp=g)
    ax.set_title(gn)
plt.show()


In [None]:
# plot
marker_genes = [
       'Ptprn', 'Slc17a7', 'Gad1', 'Fos', 
       
       'Gfap', 'Slc6a13', 'Slc47a1',
       'Grin2c', 'Aqp4', 'Rfx4', 'Sox21', 'Slc1a3',
       
       'Sox10', 'Pdgfra', 'Mog',
       
       'Pecam1', 'Cd34' , 'Tnfrsf12a', 'Sema3c', 
       'Zfhx3', 'Pag1', 'Slco2b1', 'Cx3cr1',
      ] 
gns = marker_genes
n = len(gns)
nx = 4
ny = int((n+nx-1)/nx)
# add some quality metrics
fig, axs = plt.subplots(ny,nx,figsize=(nx*5,ny*4))
for gn, ax in zip(gns, axs.flat):
    g = np.log2(1+adata[:,gn].layers['jnorm'].reshape(-1,))
    utils_merfish.st_scatter_ax(fig, ax, ucs[:,0], ucs[:,1], gexp=g)
    ax.set_title(gn)
plt.show()



In [None]:
metrics = [
    'volume', 'anisotropy', 'perimeter_area_ratio', 'solidity', 
    'PolyT_raw', 'PolyT_high_pass', 'DAPI_raw', 'DAPI_high_pass', 
    'transcript_count', 'jnorm_transcript_count', 'gnnum', 'fpcov', 
    'depth', 'width', 'sample' 
       ]
n = len(metrics)
nx = 5
ny = int((n+nx-1)/nx)
# add some quality metrics
fig, axs = plt.subplots(ny,nx,figsize=(nx*5,ny*4))
for metric, ax in zip(metrics, axs.flat):
    g = adata.obs[metric].values
    if metric == 'sample':
        g, uniq_lbls = pd.factorize(g)
    p = utils_merfish.st_scatter_ax(fig, ax, ucs[:,0], ucs[:,1], gexp=g, s=3)
    fig.colorbar(p, shrink=0.4)
    ax.set_title(metric)
plt.show()


In [None]:
metrics = [
    'volume', 'PolyT_raw', 
    'gnnum', 'transcript_count', 'jnorm_transcript_count',  # 'fpcov', 
       ]
n = len(metrics)
nx = 5
ny = int((n+nx-1)/nx)
# add some quality metrics
fig, axs = plt.subplots(ny,nx,figsize=(nx*5,ny*4))
for metric, ax in zip(metrics, axs.flat):
    g = adata.obs[metric].values
    if metric == 'sample':
        g, uniq_lbls = pd.factorize(g)
    p = utils_merfish.st_scatter_ax(fig, ax, ucs[:,0], ucs[:,1], gexp=g, s=3)
    fig.colorbar(p, shrink=0.4)
    ax.set_title(metric)
plt.show()

In [None]:

dfwork = adata.obs.copy() 
xlbl = 'volume'
x = dfwork[xlbl].values 
bins = np.logspace(2,3.2,50)
dfwork['bin'] = pd.cut(x, bins)

    
fig, axs = plt.subplots(1,2,figsize=(8,4))
for ax, ylbl in zip(axs, ['transcript_count', 'jnorm_transcript_count']):
    
    y = dfwork[ylbl].values
    r, _ = stats.spearmanr(x, y)
    res = dfwork.groupby('bin')[[xlbl, ylbl]].mean()
    
    ax.scatter(x,y,s=5, edgecolor='none') #, cmap='viridis')
    ax.plot(res[xlbl], res[ylbl], color='k')
    ax.set_title(f'r={r:.2f}')
    ax.set_xlabel(xlbl)
    ax.set_ylabel(ylbl)
    ax.set_xscale('log')
    ax.set_yscale('log')
fig.tight_layout()
plt.show()


In [None]:
dfwork = adata.obs.copy() 
xlbl = 'volume'
ylbl = 'jnorm_transcript_count'

x = dfwork[xlbl].values 
y = dfwork[ylbl].values
r, _ = stats.spearmanr(x, y)
bins = np.logspace(2,3.2,50)
dfwork['bin'] = pd.cut(x, bins)
res = dfwork.groupby('bin')[[xlbl, ylbl]].mean()

plt.scatter(x,y,s=5, edgecolor='none') #, cmap='viridis')
plt.plot(res[xlbl], res[ylbl], color='k')
plt.title(f'r={r:.2f}')
plt.xlabel(xlbl)
plt.ylabel(ylbl)
plt.xscale('log')
plt.yscale('log')

In [None]:
fig, axs = plt.subplots(1,2,figsize=(2*5,1*4), sharex=True, sharey=True)
ax = axs[0]
sns.boxplot(data=adata.obs, y='transcript_count', x='sample', ax=ax) #].values
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
ax.set_yscale('log')

ax = axs[1]
sns.boxplot(data=adata.obs, y='jnorm_transcript_count', x='sample', ax=ax) #].values
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
ax.set_yscale('log')

fig.tight_layout()
plt.show()


In [None]:
adata

# focus on Glu neurons only

In [None]:
adata.obs['annot'] = clst_annots[adata.obs[f'leiden_r{r}'].astype(int)]
print(adata.obs['annot'].value_counts())

In [None]:
adata = adata[adata.obs['annot'].str.contains('Glu')]
print(adata)
# # adata = adata[adata.obs['gncov']>100]
# print(adata)

In [None]:
width_min = adata.obs.groupby('sample')['width'].min().reindex(names)
width_max = adata.obs.groupby('sample')['width'].max().reindex(names)
width_rng = width_max - width_min 
width_cum = pd.Series(np.cumsum(np.hstack([0, width_rng[:-1]+100])), index=names)

adata.obs['width_n0'] = adata.obs['width'] - width_min.reindex(adata.obs['sample']).values
adata.obs['width_show2'] =  adata.obs['width_n0'] + width_cum.reindex(adata.obs['sample']).values
adata.obs['depth_show2'] = -adata.obs['depth']

In [None]:
mat = adata[:,abcgenes].layers['ljnorm']
mat = stats.zscore(mat, axis=0)

mat_nr = adata[adata.obs['sample'].str.contains('NR'), abcgenes].layers['ljnorm']
mat_nr = stats.zscore(mat_nr, axis=0)

pca = PCA(n_components=5)
pcs_typegenes = pca.fit(mat).transform(mat)
# pcs_typegenes = pca.fit(mat_nr).transform(mat)
adata.obsm['pcs_typegenes'] = pcs_typegenes

In [None]:
xi, yi = 0, 1
xsign, ysign = 1, 1

In [None]:
metrics = ['gncov', 'gnnum', 'depth', 'width_show']

fig, axs = plt.subplots(1,4,figsize=(4*5,1*4))
for metric, ax in zip(metrics, axs):
    # g = np.log10(1+adata.obs[metric])
    g = adata.obs[metric]
    x = xsign*adata.obsm['pcs_typegenes'][:,xi]
    y = ysign*adata.obsm['pcs_typegenes'][:,yi]
    utils_merfish.st_scatter_ax(fig, ax, x, y, gexp=g, s=3, )
    ax.set_title(metric)

In [None]:
xi, yi = 1, 2
xsign, ysign = -1,-1

In [None]:
%%time

from py_pcha import PCHA

np.random.seed(0)

X = np.vstack([
    xsign*adata.obsm['pcs_typegenes'][:,xi], 
    ysign*adata.obsm['pcs_typegenes'][:,yi], 
])

# X = np.vstack([
#     xsign*adata[adata.obs['sample'].str.contains('NR')].obsm['pcs_typegenes'][:,xi], 
#     ysign*adata[adata.obs['sample'].str.contains('NR')].obsm['pcs_typegenes'][:,yi], 
# ])


XC, S, C, SSE, varexpl = PCHA(X, noc=3, delta=0)
XC = np.array(XC)
XC = XC[:,np.argsort(XC[0])].copy() # order this
print(XC.shape, S.shape, C.shape, SSE.shape, varexpl.shape, SSE, varexpl)




In [None]:
def add_triangle(XC, ax, zorder=0, vertices=False, **kwargs):
    # add the triangle
    ax.plot(XC[0].tolist()+[XC[0,0]], XC[1].tolist()+[XC[1,0]], '--',  color='gray', label='', zorder=zorder, linewidth=1, markersize=3)
    
    # add vertices
    if vertices:
        ax.scatter(XC[0,0], XC[1,0], color='C0', zorder=zorder, **kwargs)
        ax.scatter(XC[0,1], XC[1,1], color='C1', zorder=zorder, **kwargs)
        ax.scatter(XC[0,2], XC[1,2], color='C2', zorder=zorder, **kwargs)

In [None]:
gns = [agenes, bgenes, cgenes, iegs]
titles = ['A genes', 'B genes', 'C genes', 'IEGs']
adatas = [
    adata,
    adata[adata.obs['sample'].str.contains('NR')],
    adata[adata.obs['sample'].str.contains('DR')],
]
conditions = ['combined', 'NR', 'DR']

fig, axss = plt.subplots(3,4,figsize=(4*5,3*3), sharex=True, sharey=True)
for i, (axs, adatasub, condition) in enumerate(zip(axss, adatas, conditions)):
    condition = conditions[i]
    for j, (ax, gn, title,) in enumerate(zip(axs, gns, titles)):
        g = adatasub[:,gn].layers['ljnorm'].mean(axis=1)
        x = xsign*adatasub.obsm['pcs_typegenes'][:,xi]
        y = ysign*adatasub.obsm['pcs_typegenes'][:,yi]
        
        p = utils_merfish.st_scatter_ax(fig, ax, x, y, gexp=g, s=5, cmap='coolwarm', vmin_p=5, vmax_p=95)
        colorbar = plt.colorbar(p, aspect=5, shrink=0.3)
        
        # Show ticks but no grid
        ax.set_aspect('equal')
        ax.axis('on')
        ax.grid(False)  # Turn off grid lines
        sns.despine(ax=ax)
        ax.tick_params(axis='both', which='both', bottom=True, left=True)
        
        if i == 0:
            ax.set_title(title)
        if j == 0:
            ax.set_ylabel(condition, rotation=0, loc='top')
            
        # add the triangle
        add_triangle(XC, ax)
plt.show()

In [None]:
gns = [agenes, bgenes, cgenes, iegs]
titles = ['A genes', 'B genes', 'C genes', 'IEGs']
adatas = [
    adata,
    adata[adata.obs['sample'].str.contains('NR')],
    adata[adata.obs['sample'].str.contains('DR')],
]
conditions = ['combined', 'NR', 'DR']

fig, axss = plt.subplots(3,4,figsize=(4*5,3*3), sharex=True, sharey=True)
for i, (axs, adatasub, condition) in enumerate(zip(axss, adatas, conditions)):
    condition = conditions[i]
    for j, (ax, gn, title,) in enumerate(zip(axs, gns, titles)):
        g = adatasub[:,gn].layers['ljnorm'].mean(axis=1)
        x = xsign*adatasub.obsm['pcs_typegenes'][:,xi]
        y = ysign*adatasub.obsm['pcs_typegenes'][:,yi]
        
        # consistent over
        g0 = adata[:,gn].layers['ljnorm'].mean(axis=1)
        vmin = np.percentile(g0,  5)
        vmax = np.percentile(g0, 95)
            
        p = utils_merfish.st_scatter_ax(fig, ax, x, y, gexp=g, s=5, cmap='coolwarm', vmin=vmin, vmax=vmax)
        colorbar = plt.colorbar(p, aspect=5, shrink=0.3)
        
        # Show ticks but no grid
        ax.set_aspect('equal')
        ax.axis('on')
        ax.grid(False)  # Turn off grid lines
        sns.despine(ax=ax)
        ax.tick_params(axis='both', which='both', bottom=True, left=True)
        
        if i == 0:
            ax.set_title(title)
        if j == 0:
            ax.set_ylabel(condition, rotation=0, loc='top')
            
        # add the triangle
        add_triangle(XC, ax)
plt.show()

In [None]:
gns = ['Cdh13', 'Sorcs3', 'Chrm2', 'Fos']
titles = gns
adatas = [
    adata,
    adata[adata.obs['sample'].str.contains('NR')],
    adata[adata.obs['sample'].str.contains('DR')],
]
conditions = ['combined', 'NR', 'DR']

fig, axss = plt.subplots(3,4,figsize=(4*5,3*3), sharex=True, sharey=True)
for i, (axs, adatasub, condition) in enumerate(zip(axss, adatas, conditions)):
    condition = conditions[i]
    for j, (ax, gn, title,) in enumerate(zip(axs, gns, titles)):
        g = adatasub[:,gn].layers['ljnorm'].reshape(-1,)
        x = xsign*adatasub.obsm['pcs_typegenes'][:,xi]
        y = ysign*adatasub.obsm['pcs_typegenes'][:,yi]
        
        # consistent over
        g0 = adata[:,gn].layers['ljnorm'].mean(axis=1)
        vmin = np.percentile(g0,  5)
        vmax = np.percentile(g0, 99)
            
        p = utils_merfish.st_scatter_ax(fig, ax, x, y, gexp=g, s=5, vmin=vmin, vmax=vmax)
        colorbar = plt.colorbar(p, aspect=5, shrink=0.3)
        
        # Show ticks but no grid
        ax.set_aspect('equal')
        ax.axis('on')
        ax.grid(False)  # Turn off grid lines
        sns.despine(ax=ax)
        ax.tick_params(axis='both', which='both', bottom=True, left=True)
        
        if i == 0:
            ax.set_title(title)
        if j == 0:
            ax.set_ylabel(condition, rotation=0, loc='top')
            
        add_triangle(XC, ax)
plt.show()

In [None]:
gns = ['Syt17', 'Astn2', 'Etl4', 'Slc24a3'] 
titles = gns
adatas = [
    adata,
    adata[adata.obs['sample'].str.contains('NR')],
    adata[adata.obs['sample'].str.contains('DR')],
]
conditions = ['combined', 'NR', 'DR']

fig, axss = plt.subplots(3,4,figsize=(4*5,3*3), sharex=True, sharey=True)
for i, (axs, adatasub, condition) in enumerate(zip(axss, adatas, conditions)):
    condition = conditions[i]
    for j, (ax, gn, title,) in enumerate(zip(axs, gns, titles)):
        g = adatasub[:,gn].layers['ljnorm'].reshape(-1,)
        x = xsign*adatasub.obsm['pcs_typegenes'][:,xi]
        y = ysign*adatasub.obsm['pcs_typegenes'][:,yi]
        
        # consistent over
        g0 = adata[:,gn].layers['ljnorm'].mean(axis=1)
        vmin = np.percentile(g0,  5)
        vmax = np.percentile(g0, 99)
            
        p = utils_merfish.st_scatter_ax(fig, ax, x, y, gexp=g, s=5, vmin=vmin, vmax=vmax)
        colorbar = plt.colorbar(p, aspect=5, shrink=0.3)
        
        # Show ticks but no grid
        ax.set_aspect('equal')
        ax.axis('on')
        ax.grid(False)  # Turn off grid lines
        sns.despine(ax=ax)
        ax.tick_params(axis='both', which='both', bottom=True, left=True)
        
        if i == 0:
            ax.set_title(title)
        if j == 0:
            ax.set_ylabel(condition, rotation=0, loc='top')
            
        add_triangle(XC, ax)
plt.show()

# customized colormap 

In [None]:
from matplotlib.colors import LinearSegmentedColormap

colors_a = [(0.0, 'black'), (1.0, 'C0')]      
colors_b = [(0.0, 'black'), (1.0, 'C1')]      
colors_c = [(0.0, 'black'), (1.0, 'C2')]      
colors_nrdr = [(0.0, 'C1'), (0.5, 'white'), (1.0, 'black')]
colors_nr = [(0.0, 'white'), (1.0, 'C1'),]
colors_dr = [(0.0, 'white'), (1.0, 'black'),]

# Create a custom colormap using LinearSegmentedColormap
cmap_a = LinearSegmentedColormap.from_list('cmap_a', colors_a)
cmap_b = LinearSegmentedColormap.from_list('cmap_b', colors_b)
cmap_c = LinearSegmentedColormap.from_list('cmap_c', colors_c)
cmap_nrdr = LinearSegmentedColormap.from_list('cmap_nrdr', colors_nrdr)
cmap_nr = LinearSegmentedColormap.from_list('cmap_nr', colors_nr)
cmap_dr = LinearSegmentedColormap.from_list('cmap_dr', colors_dr)

In [None]:
xmin, xmax = -12, 12
ymin, ymax = -7, 7 

bins_x = np.linspace(xmin, xmax, 1*(xmax-xmin)+1)
bins_y = np.linspace(ymin, ymax, 1*(ymax-ymin)+1)
print(bins_x)
print(bins_y)

hists = []
fig, axs = plt.subplots(1,4,figsize=(4*5,1*4), sharex=True, sharey=True)
for ax, adatasub, cond, _cmap in zip(axs, [
    adata,
    adata[adata.obs['sample'].str.contains('NR')],
    adata[adata.obs['sample'].str.contains('DR')],
    ], 
    ['Combined', 'NR', 'DR'], 
    ['gray_r', cmap_nr, cmap_dr]):
    x =  xsign*adatasub.obsm['pcs_typegenes'][:,xi]
    y =  ysign*adatasub.obsm['pcs_typegenes'][:,yi]
    sns.histplot(x=x, y=y, ax=ax, bins=(bins_x, bins_y), 
                 cmap=_cmap, # 'gray_r', 
                 stat='percent', vmin=0, vmax=2, 
                 cbar=True, cbar_kws=dict(shrink=0.4, ticks=[0,2]))
    # sns.kdeplot(x=x, y=y, ax=ax, bins=(bins_x, bins_y))
    
    hist, _, _= np.histogram2d(x, y, bins=(bins_x, bins_y))
    hist = hist/len(x)*100
    hists.append(hist)
    print(hist.shape)
    ax.set_title(cond)
    ax.set_aspect('equal')
    sns.despine(ax=ax)
    ax.grid(False)
    
    # g = ax.imshow(pd.DataFrame(np.log2(1e-3+hist), 
    #                          index=bins_x[1:]-0.5, 
    #                          columns=bins_y[1:]-0.5).T, 
    #             origin='lower',
    #             extent=(xmin, xmax, ymin, ymax),
    #             cmap='gray_r') # , vmax=1, vmin=-1)
    
    # add the triangle
    add_triangle(XC, ax, zorder=2)
    
ax = axs[3] 
ax.set_title('DR-NR')
g = ax.imshow(
    # pd.DataFrame(np.log2(1e-3+hists[2])-np.log2(1e-3+hists[1]), #-hists[1], 
    pd.DataFrame(hists[2]-hists[1], #-hists[1], 
                         index=bins_x[1:]-0.5, 
                         columns=bins_y[1:]-0.5).T, 
            origin='lower',
            extent=(xmin, xmax, ymin, ymax),
            # cmap='coolwarm', 
            cmap=cmap_nrdr, 
            vmax=2, vmin=-2)
# ax.invert_yaxis()
ax.set_aspect('equal')
ax.grid(False)
fig.colorbar(g, shrink=0.4, ticks=[-2,0,2])
sns.despine(ax=ax)

# add the triangle
add_triangle(XC, ax, zorder=2)

plt.show()

In [None]:
dfshow = adata.obs.copy()
dfshow['nrdr'] = dfshow['sample'].str.contains('DR').astype(int)
dfshow['dim1'] = xsign*adata.obsm['pcs_typegenes'][:,xi]
dfshow['dim2'] = ysign*adata.obsm['pcs_typegenes'][:,yi]
palette = {0: 'C1', 1: 'black'}


fig, axs = plt.subplots(1,2,figsize=(2*5,1*4), sharex=True, sharey=True) 
ax = axs[0]
add_triangle(XC, ax)
sns.scatterplot(data=dfshow.sample(frac=1), x='dim1', y='dim2', hue='nrdr', s=3, edgecolor='none', palette=palette, ax=ax)
ax.set_aspect('equal')
sns.despine(ax=ax)
ax.grid(False)
ax.legend(bbox_to_anchor=(1,1))

ax = axs[1]
add_triangle(XC, ax)
sns.kdeplot(data=dfshow, x='dim1', y='dim2', hue='nrdr', palette=palette, legend=False, ax=ax,)
ax.set_aspect('equal')
sns.despine(ax=ax)
ax.grid(False)


fig.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(2,4,figsize=(4*4,2*3), sharex=True, sharey=True) 
for sample, ax in zip(names, axs.flat):
    add_triangle(XC, ax)
    sns.scatterplot(data=dfshow[dfshow['sample']==sample].sample(frac=1), 
                    x='dim1', y='dim2', hue='nrdr', s=5, edgecolor='none', palette=palette, ax=ax, legend=False)
    sns.kdeplot(data=dfshow[dfshow['sample']==sample],
                x='dim1', y='dim2', hue='nrdr', palette=palette, legend=False, ax=ax,)
    ax.set_aspect('equal')
    sns.despine(ax=ax)
    ax.set_title(sample)
    ax.grid(False)
    # ax.legend(bbox_to_anchor=(1,1))

fig.tight_layout()
plt.show()

# customized colormap 

In [None]:
from matplotlib.colors import LinearSegmentedColormap

colors_a = [(0.0, 'black'), (1.0, 'C0')]      
colors_b = [(0.0, 'black'), (1.0, 'C1')]      
colors_c = [(0.0, 'black'), (1.0, 'C2')]      

# Create a custom colormap using LinearSegmentedColormap
cmap_a = LinearSegmentedColormap.from_list('cmap_a', colors_a)
cmap_b = LinearSegmentedColormap.from_list('cmap_b', colors_b)
cmap_c = LinearSegmentedColormap.from_list('cmap_c', colors_c)


In [None]:
# get ABC scores
g0_a = adata[:,agenes].layers['ljnorm'].mean(axis=1)
g0_b = adata[:,bgenes].layers['ljnorm'].mean(axis=1)
g0_c = adata[:,cgenes].layers['ljnorm'].mean(axis=1)

# make ABC scores comparable and norm to [0,1] [50% to 95%]
vmin_p, vmax_p = 50, 95
vmin_a = np.percentile(g0_a, vmin_p)
vmax_a = np.percentile(g0_a, vmax_p)

vmin_b = np.percentile(g0_b, vmin_p)
vmax_b = np.percentile(g0_b, vmax_p)

vmin_c = np.percentile(g0_c, vmin_p)
vmax_c = np.percentile(g0_c, vmax_p)

g0_a = np.clip((g0_a-vmin_a)/(vmax_a-vmin_a), 0, 1)
g0_b = np.clip((g0_b-vmin_b)/(vmax_b-vmin_b), 0, 1)
g0_c = np.clip((g0_c-vmin_c)/(vmax_c-vmin_c), 0, 1)

# separate them into scale and frequency
g0_sum  = (g0_a+g0_b+g0_c)
g0_size = np.clip(g0_sum/np.percentile(g0_sum, 50), 0, 1) # [0 - > 50%] - [0, 1]
freq0_a = g0_a/(g0_sum+1e-5)
freq0_b = g0_b/(g0_sum+1e-5)
freq0_c = g0_c/(g0_sum+1e-5)

adata.obs['raw_score_a'] = adata[:,agenes].layers['ljnorm'].mean(axis=1)
adata.obs['raw_score_b'] = adata[:,bgenes].layers['ljnorm'].mean(axis=1)
adata.obs['raw_score_c'] = adata[:,cgenes].layers['ljnorm'].mean(axis=1)

adata.obsm['size_freq_abc'] = np.vstack([g0_size, freq0_a, freq0_b, freq0_c, g0_sum]).T

# distances to nearest archetype

In [None]:
def get_dists_to_specialists(prj, XC):
    """
    """
    diffs = np.array([prj-XC[:,0], prj-XC[:,1], prj-XC[:,2]]) # specialist by cell by dim
    dists = np.sqrt(np.sum(np.power(diffs, 2), axis=2)) # specialist by cell
    # print(diffs.shape, dists.shape)
    return dists

In [None]:
dists_dict = {}
for cond in ['NR', 'DR']:
    dists = get_dists_to_specialists(dfshow.loc[dfshow['sample'].str.contains(cond), ['dim1', 'dim2']].values, XC)
    dists_dict[cond] = dists
    

In [None]:
res_frac = pd.DataFrame(index=['A pole', 'B pole', 'C pole'])

for cond in ['NR', 'DR']:
    unqs, cnts = np.unique(np.argmin(dists_dict[cond], axis=0), return_counts=True)
    print(unqs, cnts/cnts.sum())
    res_frac[cond] = cnts/cnts.sum()

In [None]:
res_frac['DR']/res_frac['NR']

In [None]:
fig, ax = plt.subplots(figsize=(4,4))
# res_frac[['P28NR', 'P38NR', 'P28DR', 'P38DR', 'P28DL']].T.plot.bar(
res_frac.T.plot.bar(
    ax=ax, stacked=True, width=0.8,  
)
ax.legend(bbox_to_anchor=(1,1), title='closest to')
sns.despine(ax=ax)
ax.set_ylabel('Frac of cells')
ax.grid(False)
# output = os.path.join(outfigdir, 'bar_frac_abc_pole.pdf')
# powerplots.savefig_autodate(fig, output)
plt.show()


# ABC scores - expression level distributions

In [None]:
ascores_nr = -adata[adata.obs['sample'].str.contains(f'NR')][:,agenes].layers['ljnorm'].mean(axis=1)
ascores_dr = -adata[adata.obs['sample'].str.contains(f'DR')][:,agenes].layers['ljnorm'].mean(axis=1)

bscores_nr = -adata[adata.obs['sample'].str.contains(f'NR')][:,bgenes].layers['ljnorm'].mean(axis=1)
bscores_dr = -adata[adata.obs['sample'].str.contains(f'DR')][:,bgenes].layers['ljnorm'].mean(axis=1)

cscores_nr = -adata[adata.obs['sample'].str.contains(f'NR')][:,cgenes].layers['ljnorm'].mean(axis=1)
cscores_dr = -adata[adata.obs['sample'].str.contains(f'DR')][:,cgenes].layers['ljnorm'].mean(axis=1)

In [None]:
fig, axs = plt.subplots(1,3, figsize=(3*4,4), sharex=False, sharey=False)

ax = axs[0]
sns.histplot(ascores_nr, ax=ax, stat='percent', cumulative=True, element='step', fill=False, color='C1')
sns.histplot(ascores_dr, ax=ax, stat='percent', cumulative=True, element='step', fill=False, color='black')
ax.set_title('A score')
sns.despine(ax=ax)
ax.grid(False)

ax = axs[1]
sns.histplot(bscores_nr, ax=ax, stat='percent', cumulative=True, element='step', fill=False, color='C1')
sns.histplot(bscores_dr, ax=ax, stat='percent', cumulative=True, element='step', fill=False, color='black')
ax.set_title('B score')
sns.despine(ax=ax)
ax.grid(False)

ax = axs[2]
sns.histplot(cscores_nr, ax=ax, stat='percent', cumulative=True, element='step', fill=False, color='C1')
sns.histplot(cscores_dr, ax=ax, stat='percent', cumulative=True, element='step', fill=False, color='black')
ax.set_title('C score')
sns.despine(ax=ax)
ax.grid(False)

fig.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(1,4, figsize=(4*4,4), sharex=False, sharey=True)
for ax, genegroup, title in zip(axs, 
                                [agenes, bgenes, cgenes, iegs], 
                                ['A genes', 'B genes', 'C genes', 'IEGs'],
                               ):
    for i, sample in enumerate(names):
        scores_ = adata[adata.obs['sample']==sample][:,genegroup].layers['ljnorm'].mean(axis=1)
        if 'NR' in sample:
            color = 'C1'
        elif 'DR' in sample:
            color = 'black'

        sns.ecdfplot(scores_, ax=ax, color=color)#, complementary=True) # , linewidth=2)
        
        # _x = np.percentile(scores_, 50)
        # _y = 0.5
        # ax.text(_x, _y, i, fontsize=10, color='red')
    ax.set_ylabel('Cumulative proportion\nof cells')
    ax.set_xlabel('Mean log norm expr.')
    ax.set_title(title)
    sns.despine(ax=ax)
    ax.grid(False)
    

fig.tight_layout()
plt.show()

###  mean and sem over 4 samples

In [None]:
from statsmodels.distributions.empirical_distribution import ECDF

In [None]:
allbins = [
    np.linspace(0,1,20),
    np.linspace(0,1,20),
    np.linspace(0,1,20),
    np.linspace(0,3,20),
]

fig, axs = plt.subplots(1,4, figsize=(4*4,4), sharex=False, sharey=True)
for ax, genegroup, title, color, bins in zip(axs, 
                                [agenes, bgenes, cgenes, iegs], 
                                ['A genes', 'B genes', 'C genes', 'IEGs'],
                                ['C0', 'C1', 'C2', 'C4'],
                                allbins,
                               ):
    nr_tracks = []
    dr_tracks = []
    for i, sample in enumerate(names):
        scores_ = adata[adata.obs['sample']==sample][:,genegroup].layers['ljnorm'].mean(axis=1)
        ecdf = ECDF(scores_)
        val = ecdf(bins)
        
        if 'NR' in sample:
            nr_tracks.append(val)
        elif 'DR' in sample:
            dr_tracks.append(val)

        
    m = np.mean(nr_tracks, axis=0)
    s = np.std(nr_tracks, axis=0)/np.sqrt(len(nr_tracks))
    ax.plot(bins, m, color=color)
    ax.fill_between(bins, m-s, m+s, color=color, alpha=0.1)
    
    m = np.mean(dr_tracks, axis=0)
    s = np.std(dr_tracks, axis=0)/np.sqrt(len(dr_tracks))
    ax.plot(bins, m, color=color, linestyle='--')
    ax.fill_between(bins, m-s, m+s, color=color, alpha=0.1)
    
    ax.set_ylabel('Cumulative\nproportion of cells')
    ax.set_xlabel('Mean log norm expr.')
    ax.set_title(title)
    sns.despine(ax=ax)
    ax.grid(False)
    

fig.tight_layout()
plt.show()

In [None]:
adatas = [
    adata,
    adata[adata.obs['sample'].str.contains('NR')],
    adata[adata.obs['sample'].str.contains('DR')],
]
conditions = ['combined', 'NR', 'DR']

fig, axs = plt.subplots(1,3,figsize=(3*5,1*3), sharex=True, sharey=True)
for i, (ax, adatasub, condition) in enumerate(zip(axs, adatas, conditions)):
    condition = conditions[i]
    x = xsign*adatasub.obsm['pcs_typegenes'][:,xi]
    y = ysign*adatasub.obsm['pcs_typegenes'][:,yi]
    
    g_size = adatasub.obsm['size_freq_abc'][:,0]
    freq_a = adatasub.obsm['size_freq_abc'][:,1]
    freq_b = adatasub.obsm['size_freq_abc'][:,2]
    freq_c = adatasub.obsm['size_freq_abc'][:,3]
    
    # visualize ABC scores using additive blending
    additive = (cmap_a(freq_a)+cmap_b(freq_b)+cmap_c(freq_c))[:,:3]
    p = ax.scatter(x, y, c=additive, s=5, edgecolor='none', alpha=g_size)
        
    # Show ticks but no grid
    ax.set_aspect('equal')
    ax.axis('on')
    ax.grid(False)  # Turn off grid lines
    sns.despine(ax=ax)
    ax.tick_params(axis='both', which='both', bottom=True, left=True)
    ax.set_title(condition)

    # add the triangle
    add_triangle(XC, ax, vertices=True, edgecolors='k', linewidths=1, marker='o')
    # break
# plt.show()

In [None]:
res = []
for sample in names:
    if 'NR' in sample:
        cond = 'NR'
    elif 'DR' in sample:
        cond = 'DR'
        
    adatasub = adata[adata.obs['sample']==sample]
    g_size = adatasub.obsm['size_freq_abc'][:,0]
    g_sum  = adatasub.obsm['size_freq_abc'][:,4]
    freq_a = adatasub.obsm['size_freq_abc'][:,1]
    freq_b = adatasub.obsm['size_freq_abc'][:,2]
    freq_c = adatasub.obsm['size_freq_abc'][:,3]

    # ta = np.sum(g_size*(freq_a))
    # tb = np.sum(g_size*(freq_b))
    # tc = np.sum(g_size*(freq_c))
    ta = np.sum(g_sum*(freq_a))
    tb = np.sum(g_sum*(freq_b))
    tc = np.sum(g_sum*(freq_c))
    tt = (ta+tb+tc)
    # print(sample, f'{ta/tt: .2f}', f'{tb/tt: .2f}', f'{tc/tt: .2f}')
    res.append([sample, cond, ta/tt*100, tb/tt*100, tc/tt*100, ta, tb, tc, tt])
    
res = pd.DataFrame(res, columns=['sample', 'cond', 'A', 'B', 'C', 'tA', 'tB', 'tC', 'tt']).set_index('sample')
res

In [None]:
for i in range(3):
    print(stats.ttest_ind(res.iloc[:4,1+i], res.iloc[4:,1+i]))

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(4,3), sharey=True)
ax = axs[0]
res.loc[names[:4]].plot.bar(ax=ax, stacked=True, width=0.8, edgecolor='none', legend=False)
ax.grid(False)
sns.despine(ax=ax)
ax.set_title('NR')
ax.set_ylabel('L2/3 cells (%)')

ax = axs[1]
res.loc[names[4:]].plot.bar(ax=ax, stacked=True, width=0.8, edgecolor='none')
ax.legend(bbox_to_anchor=(1,1))
ax.grid(False)
sns.despine(ax=ax)
ax.set_title('DR')

plt.show()

In [None]:
for i, lbl in enumerate(['A', 'B', 'C']):
    fig, axs = plt.subplots(1, 2, figsize=(4,3), sharey=True)
    ax = axs[0]
    res.loc[names[:4]][lbl].plot.bar(ax=ax, stacked=False, width=0.8, edgecolor='none', color=f'C{i}', legend=False)
    ax.grid(False)
    sns.despine(ax=ax)
    ax.set_title('NR')
    ax.set_ylabel('L2/3 cells (%)')

    ax = axs[1]
    res.loc[names[4:]][lbl].plot.bar(ax=ax, stacked=False, width=0.8, edgecolor='none', color=f'C{i}')
    ax.grid(False)
    sns.despine(ax=ax, left=True)
    ax.set_title('DR')


    plt.show()

In [None]:
from statsmodels.stats.proportion import test_proportions_2indep

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(2*3,4))
for ax, col, color in zip(axs, ['A', 'B', 'C'], ['C0', 'C1', 'C2']):
    sns.barplot(data=res, x='cond', y=col, ax=ax, color=color, capsize=0.3, errwidth=1)
    sns.swarmplot(data=res, x='cond', y=col, color='k', ax=ax, )
    ax.set_title(f'type {col}', y=1.1)
    sns.despine(ax=ax)
    ax.grid(False)
    ax.set_ylabel('')
    
    a = res[res['cond']=='NR'][col]/100
    b = res[res['cond']=='DR'][col]/100
    # t, p = stats.ttest_ind(a, b)
    s, p = stats.mannwhitneyu(a, b)
    # testres = test_proportions_2indep(np.mean(a),4,np.mean(b),4)
    
    if p > 0.05:
        mark = 'ns'
    elif p < 0.05 and p > 0.001:
        mark = '*'
    elif p < 0.001:
        mark = '***'
    
    # statistical annotation
    x1, x2 = 0, 1   
    y, h = res[col].max() + 2, 2
    ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c='k')
    ax.text((x1+x2)*.5, y+h, mark, ha='center', va='bottom', color='k')
    
axs[0].set_ylabel('L2/3 cells (%)')
fig.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(2*3,4))
for ax, col, color in zip(axs, ['A', 'B', 'C'], ['C0', 'C1', 'C2']):
    sns.boxplot(data=res, x='cond', y=col, color='white', ax=ax)
    sns.swarmplot(data=res, x='cond', y=col, color=color, ax=ax)
    ax.set_title(f'type {col}')
    sns.despine(ax=ax)
    ax.grid(False)
    ax.set_ylabel('')
    
axs[0].set_ylabel('L2/3 cells (%)')
fig.tight_layout()
plt.show()

In [None]:
res = []
for sample in names:
    if 'NR' in sample:
        cond = 'NR'
    elif 'DR' in sample:
        cond = 'DR'
        
    adatasub = adata[adata.obs['sample']==sample]
    depths = adatasub.obs['depth'].values
    g_size = adatasub.obsm['size_freq_abc'][:,0]
    g_sum  = adatasub.obsm['size_freq_abc'][:,4]
    freq_a = adatasub.obsm['size_freq_abc'][:,1]
    freq_b = adatasub.obsm['size_freq_abc'][:,2]
    freq_c = adatasub.obsm['size_freq_abc'][:,3]

    # ta = np.sum(g_size*(freq_a))
    # tb = np.sum(g_size*(freq_b))
    # tc = np.sum(g_size*(freq_c))
    ta = np.sum(g_sum*(freq_a))
    tb = np.sum(g_sum*(freq_b))
    tc = np.sum(g_sum*(freq_c))
    tt = (ta+tb+tc)
    print(sample, f'{ta/tt: .2f}', f'{tb/tt: .2f}', f'{tc/tt: .2f}')
    res.append([sample, cond, ta/tt*100, tb/tt*100, tc/tt*100])
    
res = pd.DataFrame(res, columns=['sample', 'cond', 'A', 'B', 'C']).set_index('sample')

In [None]:
for sample in names:
    adatasub = adata[adata.obs['sample']==sample]
    g_size = adatasub.obsm['size_freq_abc'][:,0]
    g_sum  = adatasub.obsm['size_freq_abc'][:,4]
    freq_a = adatasub.obsm['size_freq_abc'][:,1]
    freq_b = adatasub.obsm['size_freq_abc'][:,2]
    freq_c = adatasub.obsm['size_freq_abc'][:,3]

    # ta = np.sum(g_size*(freq_a))
    # tb = np.sum(g_size*(freq_b))
    # tc = np.sum(g_size*(freq_c))
    ta = np.sum(g_size*(freq_a))
    tb = np.sum(g_size*(freq_b))
    tc = np.sum(g_size*(freq_c))
    tt = (ta+tb+tc)
    print(sample, f'{ta/tt: .2f}', f'{tb/tt: .2f}', f'{tc/tt: .2f}')

In [None]:
for sample in names:
    adatasub = adata[adata.obs['sample']==sample]
    g_size = adatasub.obsm['size_freq_abc'][:,0]
    g_sum  = adatasub.obsm['size_freq_abc'][:,4]
    freq_a = adatasub.obsm['size_freq_abc'][:,1]
    freq_b = adatasub.obsm['size_freq_abc'][:,2]
    freq_c = adatasub.obsm['size_freq_abc'][:,3]

    rank = np.argsort(np.vstack([freq_a,freq_b,freq_c]).T, axis=1)[:,-1]
    ta = np.sum(rank==0)
    tb = np.sum(rank==1)
    tc = np.sum(rank==2)
    tt = len(g_size)
    print(sample, f'{ta/tt: .2f}', f'{tb/tt: .2f}', f'{tc/tt: .2f}')

In [None]:
x =  adata.obs['width_show2']
y =  adata.obs['depth_show2']

g_size = adata.obsm['size_freq_abc'][:,0]
freq_a = adata.obsm['size_freq_abc'][:,1]
freq_b = adata.obsm['size_freq_abc'][:,2]
freq_c = adata.obsm['size_freq_abc'][:,3]

# visualize ABC scores using additive blending
additive = (cmap_a(freq_a)+cmap_b(freq_b)+cmap_c(freq_c))[:,:3]

fig, ax = plt.subplots(1,1,figsize=(1*25,1))
for lbl, coord in width_cum.items():
    ax.text(coord, 0, lbl, fontsize=12)
    
sorting = np.argsort(np.max(additive, axis=1))# [::-1]
p = ax.scatter(x[sorting], y[sorting], c=additive[sorting], s=5, edgecolor='none', alpha=g_size[sorting])
# cond = np.max(additive, axis=1) > 1e-5
# p = ax.scatter(x[cond], y[cond], c=normed[cond], s=1, edgecolor='none')
ax.set_aspect('equal')
ax.axis('off')

plt.show()

# visualize FISH

In [None]:
gns = ['Cdh13', 'Sorcs3', 'Chrm2', 'Fos'] 
x =  adata.obs['width_show2']
y =  adata.obs['depth_show2']
n = len(gns)

fig, axs = plt.subplots(n,1,figsize=(1*25,n*1))
for i, (ax, gn) in enumerate(zip(axs, gns)):
    if i == 0:
        for lbl, coord in width_cum.items():
            ax.text(coord, 0, lbl, fontsize=12)
    
    g = np.log2(1+adata[:,gn].layers['jnorm'].reshape(-1,))
    vmax = np.percentile(g, 99)
    vmin = np.percentile(g,  5)
    sorting = np.argsort(g)
    
    p = utils_merfish.st_scatter_ax(fig, ax,  x[sorting],  y[sorting],  gexp=g[sorting], s=5, title='', vmin=vmin, vmax=vmax, cmap='rocket_r')
    ax.set_title(gn, loc='left', va='center', ha='right', y=0.5, pad=None)
    fig.colorbar(p, pad=0, shrink=0.5, aspect=5, ticks=[np.round(vmin, decimals=1), np.round(vmax-0.1, decimals=1)])
    
plt.show()
    

In [None]:
gns = ['Syt17', 'Astn2', 'Etl4', 'Slc24a3'] 
x =  adata.obs['width_show2']
y =  adata.obs['depth_show2']
n = len(gns)

fig, axs = plt.subplots(n,1,figsize=(1*25,n*1))
for i, (ax, gn) in enumerate(zip(axs, gns)):
    if i == 0:
        for lbl, coord in width_cum.items():
            ax.text(coord, 0, lbl, fontsize=12)
    
    g = np.log2(1+adata[:,gn].layers['jnorm'].reshape(-1,))
    vmax = np.percentile(g, 99)
    vmin = np.percentile(g,  5)
    sorting = np.argsort(g)
    
    p = utils_merfish.st_scatter_ax(fig, ax,  x[sorting],  y[sorting],  gexp=g[sorting], s=5, title='', vmin=vmin, vmax=vmax, cmap='rocket_r')
    ax.set_title(gn, loc='left', va='center', ha='right', y=0.5, pad=None)
    fig.colorbar(p, pad=0, shrink=0.5, aspect=5, ticks=[np.round(vmin, decimals=1), np.round(vmax-0.1, decimals=1)])
    
plt.show()
    

In [None]:
x =  adata.obs['width_show2']
y =  adata.obs['depth_show2']
gns = [agenes, bgenes, cgenes, iegs] 
titles = ['A genes', 'B genes', 'C genes', 'IEGs']
n = len(gns)

fig, axs = plt.subplots(n,1,figsize=(1*25,n*1))
for i, (ax, gn, title) in enumerate(zip(axs, gns, titles)):
    if i == 0:
        for lbl, coord in width_cum.items():
            ax.text(coord, 0, lbl, fontsize=12)
    
    g = adata[:,gn].layers['ljnorm'].mean(axis=1)
    sorting = np.argsort(g)
    
    vmin = np.percentile(g,  5)
    vmax = np.percentile(g, 95)
    p = utils_merfish.st_scatter_ax(fig, ax, x[sorting], y[sorting], gexp=g[sorting], 
                                    s=5, title='', vmin=vmin, vmax=vmax, cmap='coolwarm') #, axis_off=False)
    ax.set_title(title, loc='left', va='center', ha='right', y=0.5, pad=None)
    fig.colorbar(p, pad=0, shrink=0.5, aspect=5, ticks=[np.round(vmin, decimals=1), np.round(vmax-0.1, decimals=1)])
    
plt.show()
    

# stats

In [None]:
stats = {}
bins = np.linspace(0, 400, 12)

for name in names:
    adatasub = adata[adata.obs['sample']==name]# v1l23_data[name]
    lnorm_mean, lnorm_sem, lnorm_std, n, d, db = binning_pipe2(adatasub, 'depth', 'ljnorm', bins=bins)
    stats[name] = (lnorm_mean, lnorm_sem, lnorm_std, n, d, db)
d.value_counts()

In [None]:
# mean expression level across V1 L2/3 in NR
base_a0 = []
base_b0 = []
base_c0 = []
base_i0 = []
for name in [
    'P28NR_ant', 'P28NR_pos', 'P28NRb_ant', 'P28NRb_pos',]:
    (lnorm_mean, lnorm_sem, lnorm_std, n, d, db) = stats[name]
    base_a = np.mean(lnorm_mean.iloc[:,agenes_idx], axis=0) # across depth bins for each gene
    base_b = np.mean(lnorm_mean.iloc[:,bgenes_idx], axis=0) # across depth bins for each gene
    base_c = np.mean(lnorm_mean.iloc[:,cgenes_idx], axis=0) # across depth bins for each gene
    base_i = np.mean(lnorm_mean.iloc[:,igenes_idx], axis=0) # across depth bins for each gene
    
    base_a0.append(base_a)
    base_b0.append(base_b)
    base_c0.append(base_c)
    base_i0.append(base_i)
    
base_a0 = np.mean(base_a0, axis=0)
base_b0 = np.mean(base_b0, axis=0)
base_c0 = np.mean(base_c0, axis=0)
base_i0 = np.mean(base_i0, axis=0)

base_a0.shape, base_b0.shape, base_c0.shape, base_i0.shape

In [None]:
means = {}
sems = {}
for name in names:
    (lnorm_mean, lnorm_sem, lnorm_std, n, d, db) = stats[name]
    
    amean = np.mean(lnorm_mean.iloc[:,agenes_idx]-base_a0, axis=1) # a bin vector
    bmean = np.mean(lnorm_mean.iloc[:,bgenes_idx]-base_b0, axis=1) # a bin vector
    cmean = np.mean(lnorm_mean.iloc[:,cgenes_idx]-base_c0, axis=1) # a bin vector
    imean = np.mean(lnorm_mean.iloc[:,igenes_idx]-base_i0, axis=1) # a bin vector
    
    asem = np.mean(lnorm_sem.iloc[:,agenes_idx], axis=1)
    bsem = np.mean(lnorm_sem.iloc[:,bgenes_idx], axis=1)
    csem = np.mean(lnorm_sem.iloc[:,cgenes_idx], axis=1)
    isem = np.mean(lnorm_sem.iloc[:,igenes_idx], axis=1)
    
    means[name] = [amean, bmean, cmean, imean]
    sems[name] = [asem, bsem, csem, isem]
    

In [None]:
midpoints = np.mean(np.vstack([bins[:-1], bins[1:]]), axis=0)
midpoints

In [None]:
names

In [None]:
gnames = ['A genes (n=64)', 'B genes (n=35)', 'C genes (n=71)']

fig, axs = plt.subplots(2, 4, figsize=(5*4,4*2), sharex=True, sharey=True)

# ax.set_title('P28NR')
linestyle = '-'
for ax, name in zip(axs.flat, names):
    # (lnorm_mean, lnorm_sem, lnorm_std, n, d, db) = stats[name]
    amean, bmean, cmean, imean = means[name]
    asem, bsem, csem, isem = sems[name]
    
    x = midpoints
    ax.plot(x, amean, label='A genes', color='C0', linestyle=linestyle)
    ax.fill_between(x, amean-asem, amean+asem, color='C0', alpha=0.1, edgecolor='none')
    ax.plot(x, bmean, label='B genes', color='C1', linestyle=linestyle)
    ax.fill_between(x, bmean-bsem, bmean+bsem, color='C1', alpha=0.1, edgecolor='none')
    ax.plot(x, cmean, label='C genes', color='C2', linestyle=linestyle)
    ax.fill_between(x, cmean-csem, cmean+csem, color='C2', alpha=0.1, edgecolor='none')
    ax.axhline(color='lightgray', linestyle='dotted', zorder=1)

    sns.despine(ax=ax)
    ax.set_xticks([0, 100, 200, 300])
    ax.set_xlim(left=100, right=350)
    ax.set_ylim([-0.3, 0.3])
    ax.grid(False)
    ax.set_title(name)
axs.flat[0].set_ylabel('mean (expr. +/- sem)')

    
fig.subplots_adjust(wspace=0.1)
# powerplots.savefig_autodate(fig, outdatadir+'/grant_saumya_lineq_abc_v3.pdf')

In [None]:
gnames = ['IEGs']

fig, axs = plt.subplots(2, 4, figsize=(5*4,4*2), sharex=True, sharey=True)

# ax.set_title('P28NR')
linestyle = '-'
for ax, name in zip(axs.flat, names):
    (lnorm_mean, lnorm_sem, lnorm_std, n, d, db) = stats[name]
    amean, bmean, cmean, imean = means[name]
    asem, bsem, csem, isem = sems[name]
    
    x = midpoints
    ax.plot(x, imean, label='IEGs', color='C4', linestyle=linestyle)
    ax.fill_between(x, imean-isem, imean+isem, color='C0', alpha=0.1, edgecolor='none')
    ax.axhline(color='lightgray', linestyle='dotted', zorder=1)

    sns.despine(ax=ax)
    ax.set_xticks([0, 100, 200, 300])
    ax.set_xlim(left=100, right=350)
    # ax.set_ylim([-0.3, 0.3])
    ax.grid(False)
    ax.set_title(name)
axs.flat[0].set_ylabel('mean (expr. +/- sem)')

    
fig.subplots_adjust(wspace=0.1)
# powerplots.savefig_autodate(fig, outdatadir+'/grant_saumya_lineq_abc_v3.pdf')

In [None]:
samp_gene_dpth_mat = np.array([np.array(means[name]) for name in names]) 
print(samp_gene_dpth_mat.shape) # sample, gene group, depth

nr_mat = samp_gene_dpth_mat[:4]
nr_mean = np.mean(nr_mat, axis=0) # gene group, depth
nr_sem  = np.std(nr_mat, axis=0)/np.sqrt(4) # gene group, depth

dr_mat = samp_gene_dpth_mat[4:]
dr_mean = np.mean(dr_mat, axis=0) # gene group, depth
dr_sem  = np.std(dr_mat, axis=0)/np.sqrt(4) # gene group, depth
nr_mean.shape, dr_mean.shape

In [None]:
# t-test between NR and DR for each gene group and each location
from scipy import stats
from statsmodels.stats.multitest import multipletests

ts, ps = stats.ttest_ind(nr_mat, dr_mat)
rejs, qs, _, _ = multipletests(np.nan_to_num(ps, nan=1).reshape(-1,), alpha=0.05, method='fdr_bh')
qs = qs.reshape(ps.shape)
nrdr_mean = np.stack([nr_mean, dr_mean], axis=2).mean(axis=2)

In [None]:
# nr_bio_mat = np.stack([
#     np.mean(samp_gene_dpth_mat[0:2], axis=0),
#     np.mean(samp_gene_dpth_mat[2:4], axis=0),
#     ], axis=0)
# dr_bio_mat = np.stack([
#     np.mean(samp_gene_dpth_mat[4:6], axis=0),
#     np.mean(samp_gene_dpth_mat[6:8], axis=0),
#     ], axis=0)
# ts, ps = stats.ttest_ind(nr_bio_mat, dr_bio_mat)
# qs = ps
# rejs, qs, _, _ = multipletests(np.nan_to_num(ps, nan=1).reshape(-1,), alpha=0.05, method='fdr_bh')
# qs = qs.reshape(ps.shape)
# nrdr_mean = np.stack([nr_mean, dr_mean], axis=2).mean(axis=2)

In [None]:
titles = ['NR', 'DR']
data_mean = [nr_mean, dr_mean]
data_sem = [nr_sem, dr_sem]
gnames = ['A genes', 'B genes', 'C genes']
colors = ['C0', 'C1', 'C2']

fig, axs = plt.subplots(1, 2, figsize=(5*2,4), sharex=True, sharey=True)
for ax, cond_mean, cond_sem, title in zip(axs, data_mean, data_sem, titles):
    # ax.set_title('P28NR')
    linestyle = '-'
    ax.axhline(color='lightgray', linestyle='dotted', zorder=1)
    for i, (gname, color) in enumerate(zip(gnames, colors)):
        ax.plot(midpoints, cond_mean[i], label=gname, color=color, linestyle=linestyle)
        ax.fill_between(midpoints, cond_mean[i]-cond_sem[i], cond_mean[i]+cond_sem[i], color=color, alpha=0.1, edgecolor='none')

    sns.despine(ax=ax)
    ax.set_xticks([0, 100, 200, 300])
    ax.set_xlim(left=100, right=350)
    # ax.set_ylim([-0.3, 0.3])
    ax.grid(False)
    ax.set_title(title)
    ax.set_xlabel('upper->lower cortical depth')
    
axs[0].set_ylabel('mean (expr. +/- sem)')
fig.subplots_adjust(wspace=0.1)
# powerplots.savefig_autodate(fig, outdatadir+'/grant_saumya_lineq_abc_v3.pdf')
plt.show()

In [None]:

titles = ['NR', 'DR']
data_mean = [nr_mean, dr_mean]
data_sem = [nr_sem, dr_sem]
gnames = ['IEGs']
colors = ['C4']
i = 3

fig, axs = plt.subplots(1, 2, figsize=(5*2,4), sharex=True, sharey=True)
for ax, cond_mean, cond_sem, title in zip(axs, data_mean, data_sem, titles):
    # ax.set_title('P28NR')
    linestyle = '-'
    ax.axhline(color='lightgray', linestyle='dotted', zorder=1)
    for (gname, color) in zip(gnames, colors):
        ax.plot(midpoints, cond_mean[i], label=gname, color=color, linestyle=linestyle)
        ax.fill_between(midpoints, cond_mean[i]-cond_sem[i], cond_mean[i]+cond_sem[i], color=color, alpha=0.1, edgecolor='none')

    sns.despine(ax=ax)
    ax.set_xticks([0, 100, 200, 300])
    ax.set_xlim(left=100, right=350)
    # ax.set_ylim([-0.3, 0.3])
    ax.grid(False)
    ax.set_title(title)
    ax.set_xlabel('upper->lower cortical depth')
    
axs[0].set_ylabel('mean (expr. +/- sem)')
fig.subplots_adjust(wspace=0.1)
# powerplots.savefig_autodate(fig, outdatadir+'/grant_saumya_lineq_abc_v3.pdf')
plt.show()

In [None]:
linestyles = ['-', '--']
data_mean = [nr_mean, dr_mean]
data_sem = [nr_sem, dr_sem]
gnames = ['A genes', 'B genes', 'C genes']
titles = gnames
colors = ['C0', 'C1', 'C2']
labels = ['NR', 'DR']
sigs = qs
allmeans = nrdr_mean

fig, axs = plt.subplots(1, 3, figsize=(5*3,4), sharex=True, sharey=True)
for i, (ax, gname, color) in enumerate(zip(axs, gnames, colors)):
    ax.axhline(color='lightgray', linestyle='dotted', zorder=1)
    for cond_mean, cond_sem, title, linestyle in zip(data_mean, data_sem, titles, linestyles):
        ax.plot(midpoints, cond_mean[i], label=gname, color=color, linestyle=linestyle, marker='o', markersize=5)
        ax.fill_between(midpoints, cond_mean[i]-cond_sem[i], cond_mean[i]+cond_sem[i], color=color, alpha=0.1, edgecolor='none')
        
    for _x, _y, _sig in zip(midpoints, allmeans[i], sigs[i]):
        if _sig < 1e-3:
            ax.text(_x, _y, "***", ha='left', va='center', fontsize=12, rotation=90)
            ax.vlines(_x, _y-0.02, _y+0.02, color='k', linewidth=0.5)
        elif _sig < 5e-2:
            ax.text(_x, _y, "*", ha='left', va='center', fontsize=12, rotation=90)
            ax.vlines(_x, _y-0.02, _y+0.02, color='k', linewidth=0.5)

    sns.despine(ax=ax)
    ax.set_xticks([0, 100, 200, 300])
    ax.set_xlim(left=100, right=350)
    ax.set_ylim([-0.2, 0.3])
    ax.grid(False)
    ax.set_title(gname)
    ax.set_xlabel('upper->lower cortical depth')
    
axs[0].set_ylabel('mean (expr. +/- sem)')
fig.subplots_adjust(wspace=0.1)
# powerplots.savefig_autodate(fig, outdatadir+'/grant_saumya_lineq_abc_v3.pdf')
plt.show()

In [None]:
linestyles = ['-', '--']
data_mean = [nr_mean, dr_mean]
data_sem = [nr_sem, dr_sem]
gnames = ['IEGs']
titles = gnames
colors = ['C4']
labels = ['NR', 'DR']
i = 3
sigs = qs
allmeans = nrdr_mean

fig, ax = plt.subplots(1, 1, figsize=(5*1,4), sharex=True, sharey=True)
for (gname, color) in zip(gnames, colors):
    ax.axhline(color='lightgray', linestyle='dotted', zorder=1)
    for cond_mean, cond_sem, linestyle in zip(data_mean, data_sem, linestyles):
        ax.plot(midpoints, cond_mean[i], label=gname, color=color, linestyle=linestyle, marker='o', markersize=5)
        ax.fill_between(midpoints, cond_mean[i]-cond_sem[i], cond_mean[i]+cond_sem[i], color=color, alpha=0.1, edgecolor='none')
        
    for _x, _y, _sig in zip(midpoints, allmeans[i], sigs[i]):
        if _sig < 1e-3:
            ax.text(_x, _y, "***", ha='left', va='center', fontsize=12, rotation=90)
            ax.vlines(_x, _y-0.1, _y+0.1, color='k', linewidth=0.5)
        elif _sig < 5e-2:
            ax.text(_x, _y, "*", ha='left', va='center', fontsize=12, rotation=90)
            ax.vlines(_x, _y-0.1, _y+0.1, color='k', linewidth=0.5)

    sns.despine(ax=ax)
    ax.set_xticks([0, 100, 200, 300])
    ax.set_xlim(left=100, right=350)
    ax.set_ylim([-0.2, 1.2])
    ax.grid(False)
    ax.set_title(gname)
    ax.set_xlabel('upper->lower cortical depth')
    
ax.set_ylabel('mean (expr. +/- sem)')
fig.subplots_adjust(wspace=0.1)
# powerplots.savefig_autodate(fig, outdatadir+'/grant_saumya_lineq_abc_v3.pdf')
plt.show()