In [None]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import anndata 
import seaborn as sns
from scipy.stats import zscore
import matplotlib.pyplot as plt
import collections
from natsort import natsorted

from scipy import stats
from scipy import sparse
from sklearn.decomposition import PCA
from umap import UMAP
from statsmodels.stats.multitest import multipletests

from matplotlib.colors import LinearSegmentedColormap

from scroutines.config_plots import *
from scroutines import powerplots # .config_plots import *
from scroutines import pnmf
from scroutines import basicu

from scroutines.gene_modules import GeneModules  

# import importlib
# import scroutines
# importlib.reload(scroutines)
# from scroutines.gene_modules import GeneModules  


In [None]:
outfigdir = "/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/figures/250409"
!mkdir -p $outfigdir

# load gene annotation and data

In [None]:
gene_modules = GeneModules()
g, gs, ms = gene_modules.check_genes('Cdh13')
print("\t".join(g))
print("\t".join(gs))
print("\t".join(ms))

In [None]:
scores_abc = pd.read_csv("/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/scores_l23abc.csv", 
                         index_col=0,
                        )
scores_abc['scores_c-a'] = scores_abc['scores_c'] - scores_abc['scores_a']
scores_abc

In [None]:
adata = anndata.read("/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/superdupermegaRNA_hasraw_multiome_l23.h5ad")
adata

In [None]:
adata.X = adata.raw.X

In [None]:
adata.obs['scores_a'] = scores_abc.loc[adata.obs.index,'scores_a'].copy()
adata.obs['scores_b'] = scores_abc.loc[adata.obs.index,'scores_b'].copy()
adata.obs['scores_c'] = scores_abc.loc[adata.obs.index,'scores_c'].copy()
adata.obs['scores_c-a'] = scores_abc.loc[adata.obs.index,'scores_c-a'].copy()

In [None]:
sample_labels = adata.obs['Sample'].values
time_labels = [s[:-1].replace('DR', '') for s in sample_labels]

adata.obs['sample'] = sample_labels #
adata.obs['time']   = time_labels

uniq_samples = natsorted(np.unique(sample_labels))
nr_samples = [s for s in uniq_samples if "DR" not in s]
dr_samples = [s for s in uniq_samples if "DR" in s]

uniq_conds = np.array(natsorted(np.unique(adata.obs['cond'].values)))
print(uniq_conds)

In [None]:
nr_idx = np.array([0,1,2,4,6,8,10])
dr_idx = np.array([3,5,7,9])

nr_times = np.array([6,8,10,12,14,17,21])
dr_times = np.array(       [12,14,17,21])

In [None]:
# remove mitocondria genes
adata = adata[:,~adata.var.index.str.contains(r'^mt-')]
# remove sex genes
adata = adata[:,~adata.var.index.str.contains(r'^Xist$')]

# filter genes
cond = np.ravel((adata.X>0).sum(axis=0)) > 10 # expressed in more than 10 cells
adata = adata[:,cond].copy()

In [None]:
adata

In [None]:
# counts
x = adata.X
cov = np.ravel(np.sum(x, axis=1))
genes = adata.var.index.values

# CP10k
xn = (sparse.diags(1/cov).dot(x))*1e4

# log2(CP10k+1)
xln = xn.copy()
xln.data = np.log2(xln.data+1)

adata.layers[    'norm'] = np.array(xn.todense())
adata.layers[ 'lognorm'] = np.array(xln.todense())

In [None]:
fin1 = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/DEG_l23abc_qs_250409.npy'
fin2 = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/DEG_l23abc_l2fc_250409.npy'
# fin3 = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/DEG_l23abc_gene_list_250409.csv'

qs_tensor = np.load(fin1)
l2fc_tensor = np.load(fin2)

l2fc_th = np.log2(2)
l2fc_th_s = np.log2(1.2)
alpha_th = 0.05

In [None]:
qs_ca   = qs_tensor[:,0,:]
qs_ba   = qs_tensor[:,1,:]
qs_bc   = qs_tensor[:,2,:]

l2fc_ca = l2fc_tensor[:,0,:]
l2fc_ba = l2fc_tensor[:,1,:]
l2fc_bc = l2fc_tensor[:,2,:]

In [None]:
qs_a   = np.minimum(qs_ca, qs_ba) # the better of the two
qs_c   = np.minimum(qs_ca, qs_bc) # the better of the two
qs_b   = np.minimum(qs_ba, qs_bc) # the better of the two

l2fc_a = np.max([-l2fc_ca, -l2fc_ba], axis=0) # mean fold change
l2fc_c = np.max([ l2fc_ca, -l2fc_bc], axis=0) # mean fold change
l2fc_b = np.max([ l2fc_ba,  l2fc_bc], axis=0) # mean fold change


l2fc_as = np.min([-l2fc_ca, -l2fc_ba], axis=0) # mean fold change
l2fc_cs = np.min([ l2fc_ca, -l2fc_bc], axis=0) # mean fold change
l2fc_bs = np.min([ l2fc_ba,  l2fc_bc], axis=0) # mean fold change


cond_sig_a = np.all([-l2fc_ca > 0, -l2fc_ba > 0, l2fc_a > l2fc_th, qs_a < alpha_th], axis=0)
cond_sig_c = np.all([ l2fc_ca > 0, -l2fc_bc > 0, l2fc_c > l2fc_th, qs_c < alpha_th], axis=0)
cond_sig_b = np.all([ l2fc_ba > 0,  l2fc_bc > 0, l2fc_b > l2fc_th, qs_b < alpha_th, l2fc_bs > l2fc_th_s], axis=0)


instances, counts_a = np.unique(cond_sig_a.sum(axis=0), return_counts=True)
instances, counts_c = np.unique(cond_sig_c.sum(axis=0), return_counts=True)
instances, counts_b = np.unique(cond_sig_b.sum(axis=0), return_counts=True)

print('num A genes for each cond:\t', cond_sig_a.sum(axis=1), np.any(cond_sig_a, axis=0).sum())
print('num C genes for each cond:\t', cond_sig_c.sum(axis=1), np.any(cond_sig_c, axis=0).sum())
print('num B genes for each cond:\t', cond_sig_b.sum(axis=1), np.any(cond_sig_b, axis=0).sum())

print('num A genes in num conds:\t',  counts_a[1:])
print('num C genes in num conds:\t',  counts_c[1:])
print('num B genes in num conds:\t',  counts_b[1:])

In [None]:
df_res_all = []

for label, cond_sig in zip(['A', 'C', 'B'], 
                           [cond_sig_a, cond_sig_c, cond_sig_b]):
    
    cond_idx, gene_idx = np.nonzero(cond_sig.astype(int))
    
    df_res = pd.DataFrame()
    df_res['cond'] = uniq_conds[cond_idx]
    df_res['gene'] = genes[gene_idx]
    df_res['archetype'] = label
    df_res_all.append(df_res)
    
df_res_all = pd.concat(df_res_all)
df_res_all
    

# further check

In [None]:
cond_sig_a_any = np.any(cond_sig_a, axis=0)
cond_sig_b_any = np.any(cond_sig_b, axis=0)
cond_sig_c_any = np.any(cond_sig_c, axis=0)
cond_sig_abc_any = np.any(cond_sig_a+cond_sig_b+cond_sig_c, axis=0)
cond_sig_a_any.sum(), cond_sig_b_any.sum(), cond_sig_c_any.sum(), cond_sig_abc_any.sum()

In [None]:
# adata.obs['sample'].unique()
import re

todo_conds = [
    'P12DR', 'P14DR', 'P17DR', 'P21DR',
    'P6', 'P8', 'P10', 'P12', 'P14', 'P17', 'P21', 
]
todo_samps = [
    'P12DRa', 'P12DRb',
    'P14DRa', 'P14DRb',
    'P17DRa', 'P17DRb',
    'P21DRa', 'P21DRb',
    'P6a', 'P6b', 'P6c', 
    'P8a', 'P8b', 'P8c', 
    'P10a', 'P10b', 
    'P12a', 'P12b', 'P12c', 
    'P14a', 'P14b',
    'P17a', 'P17b', 
    'P21a', 'P21b', 
]
todo_conds_t = np.array([int(re.sub(r'[a-zA-Z]', '', a)) for a in todo_conds])
todo_samps_t = np.array([int(re.sub(r'[a-zA-Z]', '', a)) for a in todo_samps])
print(todo_conds_t)
print(todo_samps_t)

def mean_over_samples(mmat_res_samp):
    """25 samples to 11 conditions
    """
    assert mmat_res_samp.shape[0] == 25
    
    mmat_res_samp_mean = np.zeros(mmat_res_samp.shape)[:11]
    mmat_res_samp_mean[0] = np.mean(mmat_res_samp[ :2], axis=0)
    mmat_res_samp_mean[1] = np.mean(mmat_res_samp[2:4], axis=0)
    mmat_res_samp_mean[2] = np.mean(mmat_res_samp[4:6], axis=0)
    mmat_res_samp_mean[3] = np.mean(mmat_res_samp[6:8], axis=0)

    mmat_res_samp_mean[4] = np.mean(mmat_res_samp[8:11], axis=0)
    mmat_res_samp_mean[5] = np.mean(mmat_res_samp[11:14], axis=0)
    mmat_res_samp_mean[6] = np.mean(mmat_res_samp[14:16], axis=0)
    mmat_res_samp_mean[7] = np.mean(mmat_res_samp[16:19], axis=0)
    mmat_res_samp_mean[8] = np.mean(mmat_res_samp[19:21], axis=0)
    mmat_res_samp_mean[9] = np.mean(mmat_res_samp[21:23], axis=0)
    mmat_res_samp_mean[10] = np.mean(mmat_res_samp[23:  ], axis=0)
    
    return mmat_res_samp_mean

def transform_bigredmat(bigmat, n_type):
    """bigmat or redmat
    to fmat and zmat
    """
    fmat = bigmat.reshape(-1, bigmat.shape[-1]).T
    fmat = np.hstack([fmat[:,4*n_type:], fmat[:,:4*n_type]]) # CHANGED COLUMN ORDER!!
    zmat = zscore(fmat, axis=1)
    
    return fmat, zmat

In [None]:
%%time

offset = 1
mat = adata.layers['norm'][...]
gexp_l23baseline = np.log2(np.mean(mat, axis=0)*1e2+offset) # CP10k -> CPM

n_type = 10
frac_archetypal_cells_viz = 0.2
bigmat_nfd = np.zeros((len(todo_samps), n_type, mat.shape[1]))
bigmat_abc = np.zeros((len(todo_samps),      3, mat.shape[1]))

for i, samp in enumerate(todo_samps):
    print(samp)
    
    # get sub
    adatasub = adata[adata.obs['sample']==samp]
    n_cells = adatasub.shape[0]
    
    # get A vs C 
    ranks_ac = adatasub.obs['scores_c-a'].rank()
    ranks_b  = adatasub.obs['scores_b'].rank()
    
    # per type
    cells_type_nfd = pd.qcut(ranks_ac, n_type, labels=False)
    for j in range(n_type):
        mat_j = adatasub[cells_type_nfd==j].layers['norm'][...]
        mmat_j = np.log2(np.mean(mat_j, axis=0)*1e2+offset)-gexp_l23baseline # CP10k -> CPM
        bigmat_nfd[i,j] = mmat_j
    
    # A, B, C
    num_archetypal_cells_viz = int(n_cells*frac_archetypal_cells_viz)
    
    precond_a = ranks_ac <= num_archetypal_cells_viz
    precond_c = ranks_ac > adatasub.shape[0] - num_archetypal_cells_viz
    precond_b = ranks_b  > adatasub.shape[0] - num_archetypal_cells_viz
    
    cond_a = np.all([ precond_a, ~precond_b, ~precond_c], axis=0)
    cond_b = np.all([~precond_a,  precond_b, ~precond_c], axis=0)
    cond_c = np.all([~precond_a, ~precond_b,  precond_c], axis=0)
    
    for j, cond in enumerate([cond_a, cond_b, cond_c]):
        mat_j = adatasub[cond].layers['norm'][...]
        mmat_j = np.log2(np.mean(mat_j, axis=0)*1e2+offset)-gexp_l23baseline # CP10k -> CPM
        bigmat_abc[i,j] = mmat_j


In [None]:
redmat_nfd = mean_over_samples(bigmat_nfd)
fmat_nfd, zmat_nfd = transform_bigredmat(redmat_nfd, n_type)
print(fmat_nfd.shape)   # gene, cond*type
print(zmat_nfd.shape)   # gene, cond*type

fmat_nfd_abcg = fmat_nfd[cond_sig_abc_any]
zmat_nfd_abcg = zmat_nfd[cond_sig_abc_any]

In [None]:
# res_a = organize_zmat(zmat_nfd_ag, fmat_nfd_ag, redmat_nfd_ag, title='A genes', genes=genes[cond_sig_a_any])
# res_b = organize_zmat(zmat_nfd_bg, fmat_nfd_bg, redmat_nfd_bg, title='B genes', genes=genes[cond_sig_b_any])
# res_c = organize_zmat(zmat_nfd_cg, fmat_nfd_cg, redmat_nfd_cg, title='C genes', genes=genes[cond_sig_c_any])
# res_abc = organize_zmat(zmat_nfd_abcg, fmat_nfd_abcg, redmat_nfd_abcg, title='ABC genes', genes=genes[cond_sig_abc_any])

# Profile these modules

In [None]:
genes_a = genes[cond_sig_a_any]
genes_b = genes[cond_sig_b_any]
genes_c = genes[cond_sig_c_any]

cond_sig_abc_any = np.any([
    cond_sig_a_any, 
    cond_sig_b_any, 
    cond_sig_c_any, 
], axis=0)
print(cond_sig_abc_any.sum())
genes_abc = genes[cond_sig_abc_any]

In [None]:
csm_annots = [
    'igsf',    
    'cad',     
    'fbrn',    
    'eph',     
    'sema',    
    'tene',    
    'astn',    
    'cntnap',  
    'nrxn',    
    'axon',    
    'wiring',
]

In [None]:
gene_annots_abc = gene_modules.check_genes(genes_abc)[0]
cond_csm_abc = np.array([_g in csm_annots for _g in gene_annots_abc])
print(genes_abc[cond_csm_abc], np.sum(cond_csm_abc))

gene_annots_a = gene_modules.check_genes(genes_a)[0]
cond_csm_a = np.array([_g in csm_annots for _g in gene_annots_a])
print(genes_a[cond_csm_a], np.sum(cond_csm_a))

gene_annots_b = gene_modules.check_genes(genes_b)[0]
cond_csm_b = np.array([_g in csm_annots for _g in gene_annots_b])
print(genes_b[cond_csm_b], np.sum(cond_csm_b))

gene_annots_c = gene_modules.check_genes(genes_c)[0]
cond_csm_c = np.array([_g in csm_annots for _g in gene_annots_c])
print(genes_c[cond_csm_c], np.sum(cond_csm_c))

# Import Vision ABC genes

In [None]:
df_nrdr = pd.read_csv('/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_sexual_dimorphism/summary_l23_p21nrdr.csv')
df_nrdr

In [None]:
df_nrdr['is_csm'] = np.array([_g in csm_annots for _g in gene_modules.check_genes(df_nrdr['gene'].values)[0]])
df_nrdr[df_nrdr['is_csm']]

In [None]:
abc_csm = genes_abc[cond_csm_abc]
len(abc_csm), abc_csm

In [None]:
vision_abc_all = df_nrdr['gene'].values
vision_abc_csm = df_nrdr[df_nrdr['is_csm']]['gene'].values
vision_abc_csm

In [None]:
vision_abc_synaptic_b = ['Bdnf', 'Nptx2', 'Homer1', 'Plcl1', 'Ppm1h']  
vision_abc_synaptic = ['Bdnf', 'Nptx2', 'Homer1', 'Plcl1', 'Ppm1h', 'C1ql3', 'Syt10', 'Rph3a']  

# streamline 2 heatmaps

In [None]:
def order_zmat(zmat, metric0=None):
    """order rows
    """
    zmat_nr = zmat[:,:10]
    zmat_dr = zmat[:,10:]
    
    # metric0: categorical (provided)
    if metric0 is None:
        # metric0 = [0]*len(zmat)
        metric0 = (np.mean(zmat_dr, axis=1) - np.mean(zmat_nr, axis=1)) > 0
    
    # metric2: continuous (where in NR)
    # metric2 = np.argmax(zmat_nr, axis=1) # peak location
    pmat_nr = np.exp(zmat_nr)
    pmat_nr = pmat_nr/np.sum(pmat_nr, axis=1).reshape(-1,1)
    metric2 = pmat_nr.dot(np.arange(10))                     # centroid location
    
    
    # first by metric 0 then by metric 2
    dforder = pd.DataFrame()
    dforder['m0'] = metric0
    dforder['m2'] = metric2
    
    gene_order = dforder.sort_values(['m0', 'm2']).index.values
    return dforder, gene_order


def mark_ticklabels(highlights, color='red'):
    """
    """
    # Get the tick labels
    tick_labels = plt.gca().get_yticklabels()

    # Mark labels at x = 2 and x = 8 in red
    for label in tick_labels:
        tick_val = label.get_text()
        if tick_val in highlights:
            label.set_color(color)
    return 

In [None]:
selected_conditions = [6,10]
columns = np.hstack([
    6*n_type+np.arange(n_type), 
   10*n_type+np.arange(n_type),
])

In [None]:
zmat_nfd_abcg.shape, vision_abc_csm

In [None]:
genes_this = vision_abc_csm
genes_this = [
    # A down
    'Cdh4', 'Ptprg', 
    # A up
    'Epha6', 'Slit3',
    # B down
    'Nrp1', 'Ctnna3', 'Megf11', 
    # B up
    'Cdh18',
    # C down
    'Igsf9b', 'Epha10', 'Ntng1', 
    # C up
    'Cdh20', 'Cntnap4',  'Pcdh10', 'Cdh7', 
    ] 
genes_idx = basicu.get_index_from_array(genes_abc, genes_this)

fig, ax = plt.subplots(1,1,figsize=(8,5))
zmat = zmat_nfd_abcg[genes_idx, :][:, columns]
zmat = stats.zscore(zmat, axis=1)
sns.heatmap(zmat,#[:,:7*10], 
            cmap='coolwarm', cbar_kws=dict(shrink=0.5), 
            xticklabels=False,
            # vmax=2.5, vmin=-2.5,
            vmax=2, vmin=-2,
            rasterized=True,
            ax=ax)
ax.set_yticks(0.5+np.arange(len(zmat)))
ax.set_yticklabels(genes_this, fontsize=12, rotation=0)

ax.vlines(10, 0, len(zmat), color='white', linewidth=1)
ax.hlines([4,8], 0, 20, color='white', linewidth=1)
# ax.hlines([2,4,7,8,11], 0, 20, color='white', linewidth=1)
for i, cond in enumerate(['P21NR', 'P21DR']):
    ax.text(i*10, -0.5, f'{cond}', fontsize=12, va='bottom')
plt.show()


In [None]:
genes_this = vision_abc_all
genes_idx = basicu.get_index_from_array(genes_abc, genes_this)
genes_highlights1 = vision_abc_csm
genes_highlights2 = vision_abc_synaptic

fig, ax = plt.subplots(1,1,figsize=(7,14))
zmat = zmat_nfd_abcg[genes_idx, :][:, columns]
zmat = stats.zscore(zmat, axis=1)
dforder, gene_order = order_zmat(zmat)
zmat = zmat[gene_order]
genes_this = genes_this[gene_order]
sns.heatmap(zmat, 
            cmap='coolwarm', cbar_kws=dict(shrink=0.3), 
            xticklabels=False,
            # vmax=2.5, vmin=-2.5,
            vmax=2, vmin=-2,
            rasterized=True,
            ax=ax)
ax.set_yticks(0.5+np.arange(len(zmat)))
ax.set_yticklabels(genes_this, fontsize=12, rotation=0)
mark_ticklabels(genes_highlights1, color='magenta')
mark_ticklabels(genes_highlights2, color='green')

ax.vlines(10, 0, len(zmat), color='white', linewidth=1)
# ax.hlines([4,8], 0, 20, color='white', linewidth=1)
# ax.hlines([2,4,7,8,11], 0, 20, color='white', linewidth=1)
for i, cond in enumerate(['P21NR', 'P21DR']):
    ax.text(i*10, -0.5, f'{cond}', fontsize=12, va='bottom')
    
output = os.path.join(outfigdir, 'vision_abc_heatmap.pdf')
powerplots.savefig_autodate(fig, output)

plt.show()

In [None]:
genes_this = abc_csm
genes_idx = basicu.get_index_from_array(genes_abc, genes_this)
genes_highlights1 = vision_abc_csm

fig, ax = plt.subplots(1,1,figsize=(7,14))
zmat = zmat_nfd_abcg[genes_idx, :][:, columns]
zmat = stats.zscore(zmat, axis=1)

genes_this_metric_a = np.array([g in vision_abc_csm for g in genes_this]).astype(int)
genes_this_metric_b = (np.mean(zmat[:,10:]-zmat[:,:10], axis=1) > 0).astype(int)+1
genes_this_metric0 = genes_this_metric_a * genes_this_metric_b

dforder, gene_order = order_zmat(zmat, genes_this_metric0)
zmat = zmat[gene_order]
genes_this = genes_this[gene_order]

sns.heatmap(zmat, 
            cmap='coolwarm', cbar_kws=dict(shrink=0.3), 
            xticklabels=False,
            # vmax=2.5, vmin=-2.5,
            vmax=2, vmin=-2,
            rasterized=True,
            ax=ax)
ax.set_yticks(0.5+np.arange(len(zmat)))
ax.set_yticklabels(genes_this, fontsize=12, rotation=0)
mark_ticklabels(genes_highlights1, color='magenta')

ax.vlines(10, 0, len(zmat), color='white', linewidth=1)
# ax.hlines([4,8], 0, 20, color='white', linewidth=1)
# ax.hlines([2,4,7,8,11], 0, 20, color='white', linewidth=1)
for i, cond in enumerate(['P21NR', 'P21DR']):
    ax.text(i*10, -0.5, f'{cond}', fontsize=12, va='bottom')
    
output = os.path.join(outfigdir, 'csm_abc_heatmap.pdf')
powerplots.savefig_autodate(fig, output)

plt.show()

# Line plots

In [None]:
adatasub_nr = adata[adata.obs['cond']=='P21NR']
adatasub_dr = adata[adata.obs['cond']=='P21DR']

x1 = adatasub_nr.obs['scores_c-a'].values
x2 = adatasub_dr.obs['scores_c-a'].values
x1min, x1max = np.min(x1), np.max(x1)
x2min, x2max = np.min(x2), np.max(x2)

x1 = (x1-x1min)/(x1max-x1min)
x2 = (x2-x2min)/(x2max-x2min)

nbins = 10 
xmids = np.arange(1/(2*nbins),1,1/nbins)

In [None]:
# genes_plot = vision_abc_csms[:10] # genes_abc_csm[cond_sig]

genes_plot = [
#     ### synaptic B
#     'Homer1', 'Plcl1',
#     'Bdnf', 'Nptx2', 
#     'Ppm1h', 
    
#     ### CSMs
#     'Pcdh19', 'Pcdh15', 'Pcdh10', 
#     'Cdh4',   'Cdh18',  'Cdh20',
#     'Ptprg',  'Megf11', 'Igsf9b',
#     'Epha6',  'Nrp1',   'Epha10',
#     'Slit3',  'Ctnna3', 'Cntnap4',
    
    # 'Cdh7', 'Cdh20'
    'Slit3',
    # 'Rph3a', 'Syt10', 'C1ql3'
    
    ### others
    # 'Mapk4',                      # B synaptic vision-dependent - but less well known than ERK1/2 MAPKs - synapic strength
    # 'C1ql3',                      # B synaptic vision-dependent - but looks like A-enriched
    # 'Amph', 'Dtna',               # B synaptic less vision-dependent
    # 'Rph3a',                      # synaptic C
    # 'Syt10', 'Sparcl1', 'Fnbp1l', # synaptic A
]


n = len(genes_plot)
nx = 3
ny = int((n+nx-1)/nx)

fig, axs = plt.subplots(ny, nx, figsize=(3*nx,4*ny), sharey=True)
for i, gene in enumerate(genes_plot):
    ax = axs.flat[i]
    
    y1 = np.array(adatasub_nr[:,gene].layers['norm']).reshape(-1,)
    y2 = np.array(adatasub_dr[:,gene].layers['norm']).reshape(-1,)

    df1 = pd.DataFrame(np.vstack([x1,y1]).T)
    df1['bin_idx'] = pd.qcut(df1[0], nbins, labels=False)
    df1_mean = df1.groupby('bin_idx').mean()
    scale = df1_mean[1].max()
    df1_mean = df1_mean/scale
    
    df2 = pd.DataFrame(np.vstack([x2,y2]).T)
    df2['bin_idx'] = pd.qcut(df2[0], nbins, labels=False)
    df2_mean = df2.groupby('bin_idx').mean()
    df2_mean = df2_mean/scale

    # ax.scatter(x1, y1, s=10, edgecolors='none', color='C1', alpha=0.1)
    # ax.scatter(x2, y2, s=10, edgecolors='none', color='k',  alpha=0.1)
    ax.plot(df1_mean[1], xmids, '-o', markersize=5, color='k')
    ax.plot(df2_mean[1], xmids, '-o', markersize=5, color='gray')
    ax.set_ylim([1,0])
    # ax.set_yticks(np.arange(0.1,1,1/5)[::-1])
    ax.set_yticklabels([])
    ax.set_xlabel('norm expr')
    
    ax.grid(axis='x')
    
    # ax.plot(xmids, np.log2(df2_mean[1]/df1_mean[1]), '-')
    
    ax.set_title(gene)
    sns.despine(ax=ax)
    # break
    
fig.tight_layout()
plt.show()

In [None]:
# genes_plot = vision_abc_csms[:10] # genes_abc_csm[cond_sig]

genes_plot = [
    
    ### CSMs A
    'Pcdh19', 
    'Cdh4',   
    'Ptprg',  
    'Epha6',  
    'Slit3',  
    
    ### CSMs C
    'Pcdh10', 
    'Cdh20',
    'Igsf9b',
    'Epha10',
    'Cntnap4',
    
    ### CSMs B
    'Pcdh15', 
    'Cdh18',  
    'Megf11', 
    'Nrp1',   
    'Ctnna3', 
    
    ### synaptic B
    'Homer1', 'Plcl1', 'Bdnf', 'Nptx2', 'Ppm1h', 
]


n = len(genes_plot)
nx = 5
ny = int((n+nx-1)/nx)

fig, axs = plt.subplots(ny, nx, figsize=(4*nx,4*ny), sharey=True)
for i, gene in enumerate(genes_plot):
    ax = axs.flat[i]
    
    y1 = np.array(adatasub_nr[:,gene].layers['norm']).reshape(-1,)
    y2 = np.array(adatasub_dr[:,gene].layers['norm']).reshape(-1,)

    df1 = pd.DataFrame(np.vstack([x1,y1]).T)
    df1['bin_idx'] = pd.qcut(df1[0], nbins, labels=False)
    df1_mean = df1.groupby('bin_idx').mean()
    scale = df1_mean[1].max()
    df1_mean = df1_mean/scale
    
    df2 = pd.DataFrame(np.vstack([x2,y2]).T)
    df2['bin_idx'] = pd.qcut(df2[0], nbins, labels=False)
    df2_mean = df2.groupby('bin_idx').mean()
    df2_mean = df2_mean/scale

    # ax.scatter(x1, y1, s=10, edgecolors='none', color='C1', alpha=0.1)
    # ax.scatter(x2, y2, s=10, edgecolors='none', color='k',  alpha=0.1)
    ax.plot(df1_mean[1], xmids, '-o', markersize=5, color='k')
    ax.plot(df2_mean[1], xmids, '-o', markersize=5, color='gray')
    ax.set_ylim([1,0])
    # ax.set_yticks(np.arange(0.1,1,1/5)[::-1])
    ax.set_yticklabels([])
    ax.set_xlabel('normalized expression')
    
    ax.grid(axis='x')
    
    # ax.plot(xmids, np.log2(df2_mean[1]/df1_mean[1]), '-')
    
    ax.set_title(gene)
    sns.despine(ax=ax)
    # break
    
fig.tight_layout()
output = os.path.join(outfigdir, 'vision_abc_lineplots.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

# quantification

In [None]:
l23_5fold_colors = np.array(['#2775B3', '#5D737C', '#F47C20', '#BD842B', '#2B9A47'])

In [None]:
def get_mean_patterns(y1, y2, nbins_this):

    df1 = pd.DataFrame(y1)
    df1['bin_idx'] = pd.qcut(x1, nbins_this, labels=False)
    df1_mean = df1.groupby('bin_idx').mean() # average over cells of the same bin

    df2 = pd.DataFrame(y2)
    df2['bin_idx'] = pd.qcut(x2, nbins_this, labels=False)
    df2_mean = df2.groupby('bin_idx').mean() # average over cells of the same bin
    logfc = np.log2(df2_mean/df1_mean)
    
    xmids = np.arange(1/(2*nbins_this),1,1/nbins_this)
    
    return xmids, logfc



In [None]:
# CP10k 
genes_plot = abc_csm
nbins_this = 5
y1 = np.array(adatasub_nr[:,genes_plot].layers['norm'])#.reshape(-1,)
y2 = np.array(adatasub_dr[:,genes_plot].layers['norm'])#.reshape(-1,)
xmids, logfc = get_mean_patterns(y1, y2, nbins_this)

meanfc = np.mean(np.abs(logfc), axis=1)
stdfc = np.std(np.abs(logfc), axis=1)

fig, ax = plt.subplots(figsize=(4,4))
ax.plot(meanfc, xmids,'-o', color='magenta') # , colors=np.repeat(l23_5fold_colors, 2))
ax.fill_betweenx(xmids, 
                 meanfc-stdfc/np.sqrt(logfc.shape[1])*1.96, 
                 meanfc+stdfc/np.sqrt(logfc.shape[1])*1.96,
                 color='magenta',
                 edgecolors='none',
                 alpha=0.2,
               ) 
ax.invert_yaxis()
sns.despine(ax=ax)
ax.set_yticks(xmids)
ax.set_ylabel('L2/3 cells')
ax.set_xlabel('|log2(fold change)|')
ax.set_title('P21 NR vs DR \n (All archetype-enriched CSMs)')
output = os.path.join(outfigdir, 'ac_line_csm.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

# ax.grid(axis='x')
# sns.boxplot(np.abs(logfc).T, 
#                color='white', 
#                ax=ax)
# sns.violinplot(np.abs(logfc).T, 
#                color='white', 
#                cut=0, ax=ax)
# sns.stripplot(np.abs(logfc).T, s=3, palette={i:j for i, j in enumerate(l23_5fold_colors)}, 
#               edgecolors='none', alpha=1, ax=ax)

In [None]:
genes_plot = vision_abc_synaptic
nbins_this = 5
y1 = np.array(adatasub_nr[:,genes_plot].layers['norm'])#.reshape(-1,)
y2 = np.array(adatasub_dr[:,genes_plot].layers['norm'])#.reshape(-1,)
xmids, logfc = get_mean_patterns(y1, y2, nbins_this)

meanfc = np.mean(np.abs(logfc), axis=1)
stdfc = np.std(np.abs(logfc), axis=1)

fig, ax = plt.subplots(figsize=(4,4))
ax.plot(meanfc, xmids,'-o', color='green') # , colors=np.repeat(l23_5fold_colors, 2))
ax.fill_betweenx(xmids, 
                 meanfc-stdfc/np.sqrt(logfc.shape[1])*1.96, 
                 meanfc+stdfc/np.sqrt(logfc.shape[1])*1.96,
                 color='green',
                 edgecolors='none',
                 alpha=0.2,
               ) 
ax.invert_yaxis()
sns.despine(ax=ax)
ax.set_yticks(xmids)
ax.set_ylabel('L2/3 cells')
ax.set_xlabel('|log2(fold change)|')
ax.set_title('P21 NR vs DR \n (All archetype-enriched vision-dependent synaptic)')
output = os.path.join(outfigdir, 'ac_line_synapse.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

# ax.grid(axis='x')
# sns.boxplot(np.abs(logfc).T, 
#                color='white', 
#                ax=ax)
# sns.violinplot(np.abs(logfc).T, 
#                color='white', 
#                cut=0, ax=ax)
# sns.stripplot(np.abs(logfc).T, s=3, palette={i:j for i, j in enumerate(l23_5fold_colors)}, 
#               edgecolors='none', alpha=1, ax=ax)

In [None]:
# CP10k 
genes_plot = vision_abc_all
nbins_this = 5
y1 = np.array(adatasub_nr[:,genes_plot].layers['norm'])#.reshape(-1,)
y2 = np.array(adatasub_dr[:,genes_plot].layers['norm'])#.reshape(-1,)
xmids, logfc = get_mean_patterns(y1, y2, nbins_this)

meanfc = np.mean(np.abs(logfc), axis=1)
stdfc = np.std(np.abs(logfc), axis=1)

fig, ax = plt.subplots(figsize=(4,4))
ax.plot(meanfc, xmids,'-o', color='k') # , colors=np.repeat(l23_5fold_colors, 2))
ax.fill_betweenx(xmids, 
                 meanfc-stdfc/np.sqrt(logfc.shape[1])*1.96, 
                 meanfc+stdfc/np.sqrt(logfc.shape[1])*1.96,
                 color='k',
                 edgecolors='none',
                 alpha=0.2,
               ) 
ax.invert_yaxis()

sns.despine(ax=ax)
ax.set_yticks(xmids)
ax.set_ylabel('L2/3 cells')
ax.set_xlabel('|log2(fold change)|')
ax.set_title('P21 NR vs DR \n (all vision-dependent archetype-enriched genes)')
output = os.path.join(outfigdir, 'ac_line_vision_abc.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

In [None]:
# CP10k 
colors = ['k', 'green', 'magenta', 'magenta', ]
titles = ['vision-dep.', 'vision-dep. synaptic', 'vision-dep. CSMs', 'vision-indep. CSMs']
nonvision_abc_csm = np.array([g for g in abc_csm if g not in vision_abc_csm])

fig, axs = plt.subplots(1,4, figsize=(4*4,4), sharey=True)
for i, genes_plot in enumerate([vision_abc_all,  vision_abc_synaptic, vision_abc_csm, nonvision_abc_csm,]):
    ax = axs[i]
    nbins_this = 5
    y1 = np.array(adatasub_nr[:,genes_plot].layers['norm'])#.reshape(-1,)
    y2 = np.array(adatasub_dr[:,genes_plot].layers['norm'])#.reshape(-1,)
    xmids, logfc = get_mean_patterns(y1, y2, nbins_this)

    meanfc = np.mean(np.abs(logfc), axis=1)
    stdfc = np.std(np.abs(logfc), axis=1)
    ax.plot(meanfc, xmids,'-o', color=colors[i]) # , colors=np.repeat(l23_5fold_colors, 2))
    ax.fill_betweenx(xmids, 
                     meanfc-stdfc/np.sqrt(logfc.shape[1])*1.96, 
                     meanfc+stdfc/np.sqrt(logfc.shape[1])*1.96,
                     color=colors[i],
                     edgecolors='none',
                     alpha=0.2,
                   ) 
    sns.despine(ax=ax)
    ax.set_title(titles[i])
    ax.set_yticks(xmids)
    ax.set_ylabel('L2/3 cells')
    ax.set_xlabel('|log2(fold change)|')
    # ax.set_title('P21 NR vs DR \n (all vision-dependent archetype-enriched genes)')
axs[0].invert_yaxis()

output = os.path.join(outfigdir, 'vision_on_archetype_gene_groups.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

In [None]:
df_nrdr_anno = df_nrdr.copy().set_index('gene')
df_nrdr_anno['anno'] = 'other' 
df_nrdr_anno.loc[vision_abc_csm, 'anno'] = 'csm'  #[df_nrdr_anno['is_csm']] = df_nrdr['is_csm']
df_nrdr_anno.loc[vision_abc_synaptic, 'anno'] = 'synapse'  #[df_nrdr_anno['is_csm']] = df_nrdr['is_csm']
df_nrdr_anno['sig_format'] = df_nrdr_anno['test_sig'].apply(lambda x: x.replace(' ALL', ''))
df_nrdr_anno

In [None]:
fig, ax = plt.subplots(figsize=(5,4))
df_plot1 = (df_nrdr_anno.groupby(['archetype', 'anno'])
                   .size().unstack().fillna(0) # .to_frame().reset_index()
                   .reindex(['A', 'B', 'C', 'AB', 'BC'])
                   [['csm', 'synapse', 'other']]
           )
df_plot1.plot.barh(edgecolor='none', ax=ax, width=0.8, stacked=True, color=['magenta', 'green', 'lightgray'])
ax.legend(bbox_to_anchor=(1,1))

ax.set_xlabel('number of vision-dependent genes')
ax.set_ylabel('L2/3 archetypes')
ax.invert_yaxis()
sns.despine(ax=ax)
ax.grid(axis='y')

output = os.path.join(outfigdir, 'ac_bar_archetype_origin.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(5,4))
df_plot1 = (df_nrdr_anno.groupby(['sig_format', 'anno'])
                   .size().unstack().fillna(0) # .to_frame().reset_index()
                   .reindex(['A', 'B', 'C', 'ALL', 'A C', 'B C',])
                   [['csm', 'synapse', 'other']]
           )
df_plot1.plot.barh(edgecolor='none', ax=ax, width=0.8, stacked=True, color=['magenta', 'green', 'lightgray'])
ax.legend(bbox_to_anchor=(1,1))

ax.set_xlabel('number of vision-dependent genes')
ax.set_ylabel('L2/3 archetypes')
ax.invert_yaxis()
sns.despine(ax=ax)
ax.grid(axis='y')

output = os.path.join(outfigdir, 'ac_bar_archetype_affected.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

# P6 gradient

In [None]:
adatasub_p6 = adata[adata.obs['cond']=='P6NR']

x3 = adatasub_p6.obs['scores_c-a'].values
x3min, x3max = np.min(x3), np.max(x3)
x3 = (x3-x3min)/(x3max-x3min)

nbins = 10 
xmids = np.arange(1/(2*nbins),1,1/nbins)

In [None]:
[g for g in adatasub_p6.var.index.values if g.startswith('Robo')]
[g for g in adatasub_p6.var.index.values if g.startswith('Slit')]

In [None]:
# genes_plot = vision_abc_csms[:10] # genes_abc_csm[cond_sig]

genes_plot = [
    'Robo1', 'Robo3', 
    # 'Slit1', 'Slit2', 'Slit3',
]


n = len(genes_plot)
nx = 3
ny = int((n+nx-1)/nx)

fig, axs = plt.subplots(ny, nx, figsize=(3*nx,4*ny), sharey=True) #, sharex=True)
for i, gene in enumerate(genes_plot):
    ax = axs.flat[i]
    
    y3 = np.array(adatasub_p6[:,gene].layers['norm']).reshape(-1,)

    df3 = pd.DataFrame(np.vstack([x3,y3]).T)
    df3['bin_idx'] = pd.qcut(df3[0], nbins, labels=False)
    df3_mean = df3.groupby('bin_idx').mean()
    df3_std = df3.groupby('bin_idx').std()
    df3_n = df3.groupby('bin_idx').size()
    scale = df3_mean[1].max()
    df3_mean = df3_mean/scale
    df3_std  = df3_std/scale
    
    ax.plot(df3_mean[1], xmids, '-o', markersize=5, color='k')
    ax.fill_betweenx(xmids, 
                     df3_mean[1]-df3_std[1]/np.sqrt(df3_n)*1.96, 
                     df3_mean[1]+df3_std[1]/np.sqrt(df3_n)*1.96, 
                     color='k', 
                     edgecolors='none',
                     alpha=0.2)
    ax.set_ylim([1,0])
    # ax.set_yticks(np.arange(0.1,1,1/5)[::-1])
    ax.set_yticklabels([])
    ax.set_xlabel('norm expr')
    ax.set_xticks([0, 0.5, 1])
    
    ax.grid(axis='x')
    
    
    ax.set_title(gene)
    sns.despine(ax=ax)
    # break
    
fig.tight_layout()
output = os.path.join(outfigdir, 'Robo1-3.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()