In [None]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import anndata 
import seaborn as sns
from scipy.stats import zscore
import matplotlib.pyplot as plt
import collections
from natsort import natsorted

from scipy import stats
from scipy import sparse
from sklearn.decomposition import PCA
from umap import UMAP
from statsmodels.stats.multitest import multipletests

from matplotlib.colors import LinearSegmentedColormap

from scroutines.config_plots import *
from scroutines import powerplots # .config_plots import *
from scroutines import pnmf
from scroutines import basicu
from scroutines.gene_modules import GeneModules  


In [None]:
outfigdir = "/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/figures/250409"
!mkdir -p $outfigdir

# load gene annotation and data

In [None]:
gene_modules = GeneModules()
g, gs, ms = gene_modules.check_genes('Cdh13')
print("\t".join(g))
print("\t".join(gs))
print("\t".join(ms))

In [None]:
genes_alltime_hvgs = np.loadtxt('/u/home/f/f7xiesnm/v1_multiome/l23_alltime_hvgs_n4940.txt', dtype='str')
genes_alltime_hvgs

In [None]:
adata = anndata.read("/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/superdupermegaRNA_hasraw.h5ad", backed='r')
adata

In [None]:
meta = adata.obs.copy()
meta = meta[meta['Study']=='2023 Multiome']
meta

In [None]:
sample_labels = meta['Sample'].values
time_labels = [s[:-1].replace('DR', '') for s in sample_labels]

meta['sample'] = sample_labels #
meta['time']   = time_labels

uniq_samples = natsorted(np.unique(sample_labels))
nr_samples = [s for s in uniq_samples if "DR" not in s]
dr_samples = [s for s in uniq_samples if "DR" in s]

nr_conds = ['P6', 'P8', 'P10', 'P12', 'P14', 'P17', 'P21']
dr_conds = ['P12DR', 'P14DR', 'P17DR', 'P21DR']

# uniq_conds = np.array(natsorted(np.unique(meta['cond'].values)))
# print(uniq_conds)

In [None]:
cell_counts = meta.groupby(['Age', 'Subclass']).size().unstack().drop(['P28', 'P28_dl', 'P28_dr', 'P38', 'P38_dr'])
n_cells = cell_counts.sum(axis=1)
cell_freq = cell_counts.divide(n_cells, axis=0)

In [None]:
meta_exc = meta[meta['Class']=='Excitatory'].copy()
cell_counts_exc = meta_exc.groupby(['Age', 'Subclass']).size().unstack().loc[nr_conds+dr_conds] # .drop(['P28', 'P28_dl', 'P28_dr', 'P38', 'P38_dr'])
n_exc = cell_counts_exc.sum(axis=1)
cell_freq_exc = cell_counts_exc.divide(n_exc, axis=0)
cell_freq_exc['L2-4'] = cell_freq_exc[['L2/3', 'L4', 'L2/3/4']].sum(axis=1)
cell_freq_exc

In [None]:
cell_counts_exc_samp = meta_exc.groupby(['sample', 'Subclass']).size().unstack().loc[nr_samples+dr_samples]
n_exc_samp = cell_counts_exc_samp.sum(axis=1)
cell_freq_exc_samp = cell_counts_exc_samp.divide(n_exc_samp, axis=0)
cell_freq_exc_samp['cond'] = [samp[:-1] for samp in cell_freq_exc_samp.index]
cell_freq_exc_samp['L2-4'] = cell_freq_exc_samp[['L2/3', 'L4', 'L2/3/4']].sum(axis=1)
cell_freq_exc_samp

In [None]:
fig, ax  = plt.subplots(figsize=(6,4))
sns.barplot(data=cell_freq_exc_samp, x='cond', y='L2/3', edgecolor='none', ax=ax, facecolor='gray', width=0.7)
# sns.stripplot(data=cell_freq_exc_samp, x='cond', y='L2/3', color='k', ax=ax, s=7)
sns.despine(ax=ax)
ax.set_title("Abundance of L2/3 neurons")
ax.set_ylabel("Proportion of \n excitatory neurons")
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
plt.show()

In [None]:
fig, ax  = plt.subplots(figsize=(6,4))
# for col in ['L2-4', 'L2/3', 'L2/3/4', 'L4', 'L5IT', 'L6IT', 'L5PT', 'L6CT', 'L5NP', 'L6b']:
for col in ['L2/3', 'L4', 'L5IT', 'L6IT', 'L5PT', 'L6CT', 'L5NP', 'L6b']:
    sns.stripplot(data=cell_freq_exc_samp, x='cond', y=col, s=7, ax=ax)
    ax.plot(np.arange(11), cell_freq_exc[col], '-', label=col)

sns.despine(ax=ax)
ax.set_ylim(ymin=0)
ax.legend(bbox_to_anchor=(1,1))
ax.set_title("Abundance of subclass")
ax.set_ylabel("Proportion of exc neurons")
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
output = os.path.join(outfigdir, 'cell_subclass_abundance.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

In [None]:
cell_counts_class = meta.groupby(['Age', 'Class']).size().unstack().loc[nr_conds+dr_conds]
n_class = cell_counts_class.sum(axis=1)
cell_freq_class = cell_counts_class.divide(n_class, axis=0)
cell_freq_class['cond'] = [samp for samp in cell_freq_class.index]

cell_counts_class_samp = meta.groupby(['sample', 'Class']).size().unstack().loc[nr_samples+dr_samples]
n_class_samp = cell_counts_class_samp.sum(axis=1)
cell_freq_class_samp = cell_counts_class_samp.divide(n_class_samp, axis=0)
cell_freq_class_samp['cond'] = [samp[:-1] for samp in cell_freq_class_samp.index]

In [None]:
colors = ['k', 'C1', 'C2']
fig, ax  = plt.subplots(figsize=(6,4))
for i, col in enumerate(['Excitatory', 'Inhibitory', 'Non-neurons']):
    color = colors[i]
    sns.stripplot(data=cell_freq_class_samp, x='cond', y=col, color=color, s=7, ax=ax)
    ax.plot(np.arange(11), cell_freq_class[col], '-', color=color, label=col)

sns.despine(ax=ax)
ax.legend(bbox_to_anchor=(1,1))
ax.set_ylim(ymin=0)
ax.set_title("Abundance of cell class")
ax.set_ylabel("Proportion of cells")
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
output = os.path.join(outfigdir, 'cell_class_abundance.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()