In [None]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import anndata 
import seaborn as sns
from scipy.stats import zscore
import matplotlib.pyplot as plt
import collections
from natsort import natsorted

from scipy import stats
from scipy import sparse
from sklearn.decomposition import PCA
from umap import UMAP

from matplotlib.colors import LinearSegmentedColormap

from scroutines.config_plots import *
from scroutines import powerplots # .config_plots import *
from scroutines import pnmf
from scroutines import basicu
from scroutines.gene_modules import GeneModules  


In [None]:
outdir_fig = "/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/figures"

# load gene annotation and data

In [None]:
%%time
adata = anndata.read("../../data/v1_multiome/L23_allmultiome_proc_P6toP21.h5ad")
adata

In [None]:
f_rna1 = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/rna_qs_avc_p6to21.txt'
f_rna2 = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/rna_l2fc_avc_p6to21.txt'

rna_qs_avc = np.loadtxt(f_rna1)
rna_l2fc_avc = np.loadtxt(f_rna2)

rna_qs_avc.shape, rna_l2fc_avc.shape

In [None]:
# # define
genes = adata.var.index.values
# conds = adata.obs['cond'].values
# types = adata.obs['Type'].values
# samps = adata.obs['sample'].values

# switch back to float64
adata.layers[    'norm'] = adata.layers['norm'][...].astype(np.float64)
adata.layers[ 'lognorm'] = np.log10(1+adata.layers['norm'][...]) # np.array(xln.todense())
# adata.layers[ 'lognorm'] = np.log2(1+adata.layers['norm'][...]) # np.array(xln.todense())
adata.layers['zlognorm'] = zscore(adata.layers['lognorm'][...], axis=0)

In [None]:
pcs_p8 = adata.obsm['pca_p8']
pcs_p17on = adata.obsm['pca_p17on']

In [None]:
allcolors = sns.color_palette('tab20c', 20)
allcolors

In [None]:
allcolors2 = sns.color_palette('tab10', 20)
allcolors2

In [None]:
palette = collections.OrderedDict({
     "P6": allcolors[2],
     "P8": allcolors[1],
    "P10": allcolors[0],
    "P12": allcolors[4+2],
    "P14": allcolors[4+0],
    
    "P17": allcolors[8+2],
    "P21": allcolors[8+0],
    
})
cases = np.array(list(palette.keys()))

cond_order_dict = {
    'P6':  0,
    'P8':  1,
    'P10': 2,
    'P12': 3,
    'P14': 4,
    'P17': 5,
    'P21': 6,
    
    # 'P12DR': 7,
    # 'P14DR': 8,
    # 'P17DR': 9,
    # 'P21DR': 10,
}
unq_conds = np.array(list(cond_order_dict.keys()))
adata.obs['cond_order'] = adata.obs['cond'].apply(lambda x: cond_order_dict[x]).astype(int)

palette_types = collections.OrderedDict({
    'L2/3_A': allcolors2[0],
    'L2/3_B': allcolors2[1],
    'L2/3_C': allcolors2[2],
})             

palette_types = {
    'c14': 'C0', 
    'c18': 'C1',
    'c16': 'C2', 
    
    'c13': 'C0', 
    'c15': 'C1', 
    'c17': 'C2',
}
type_order = [key for key, val in palette_types.items()]
type_order

In [None]:
adata.obs['cond'].unique()

# big tensor

In [None]:
# by condition
nt, nc, nr, ng = len(unq_conds), 5, 2, len(adata.var)  # time/condition, cluster, rep, gene 
tensor = np.zeros((nt, nc, nr, ng))

for i, (cond_order, obssub) in enumerate(adata.obs.groupby('cond_order')):
    print(cond_order)
    adatasub = adata[obssub.index]
    
    # by type -- cut into nc=5 equal bins
    x = adatasub.obsm['pca_p17on'][...,0]
    type_lbls = pd.qcut(x, nc, labels=np.arange(nc)).astype(int)
    unq_types = np.unique(type_lbls)
    
    # by replicates -- first two
    sample_lbls = adatasub.obs['sample'].values
    unq_samples = np.unique(sample_lbls)[:nr] # first nr samples if n > nr2
    
    for j, qtype in enumerate(unq_types):
        for k, samp in enumerate(unq_samples):
            selector = ((sample_lbls==samp) & (type_lbls==qtype))
            # tensor[i,j,k] = np.mean(adatasub[selector].layers['lognorm'][...], axis=0)
            # tensor[i,j,k] = np.sum(np.array(adatasub[selector].layers['norm']), axis=0) # CP10k - sum over all cells (or simply raw reads?)
            tensor[i,j,k] = np.sum(np.array(adatasub[selector].X.todense()), axis=0) # CP10k - sum over all cells (or simply raw reads?)

In [None]:
# normalize it as CPM; log2(CPM+1)
tensor = (tensor/np.sum(tensor, axis=-1, keepdims=True))*1e6
tensor = np.log2(1+tensor) 

In [None]:
def rename_gene(g):
    if g == 'March1': 
        return 'Marchf1'
    else:
        return g
    
genes = np.array([rename_gene(g) for g in genes])
tensor.shape, genes.shape

In [None]:
# use this new order
f = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_atac/all_ac_genes_unique.bed'
df_ac_genes = pd.read_csv(f, sep='\t', header=None)
df_ac_genes

In [None]:
ac_genes_idx = basicu.get_index_from_array(genes, df_ac_genes[3])
assert np.all(ac_genes_idx != -1)

In [None]:
tensor_sub = tensor[:,:,:,ac_genes_idx]
tensor_sub.shape

In [None]:
f_out = "/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_atac/all_ac_genes_tensor.npy"
np.save(f_out, tensor_sub)