In [None]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import anndata 
import seaborn as sns
from scipy.stats import zscore
import matplotlib.pyplot as plt
import collections
from natsort import natsorted

from scipy import stats
from scipy import sparse
from sklearn.decomposition import PCA
from umap import UMAP

from matplotlib.colors import LinearSegmentedColormap

from scroutines.config_plots import *
from scroutines import powerplots # .config_plots import *
from scroutines import pnmf
from scroutines import basicu
from scroutines.gene_modules import GeneModules  

from atac_utils import merge_peaks

In [None]:
times = [6,8,10,12,14,17,21]

# prep the matrices
- peaks and genes - over time and type

show these matrices side-by-side
- gene by (type & time) (zscore)
- peak (average over the same gene) by (tye & time) (zscore)

In [None]:
outdir_fig = "/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/figures"

In [None]:
f = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_atac/all_ac_genes_unique.bed'
df_genes_ordered = pd.read_csv(f, sep='\t', header=None)
df_genes_ordered['isA'] = df_genes_ordered[4].str.contains('A')
df_genes_ordered['isC'] = df_genes_ordered[4].str.contains('C')
df_genes_ordered['isAC'] = np.logical_and(df_genes_ordered['isA'], df_genes_ordered['isC'])
df_genes_ordered

# select C peaks

In [None]:
cond_gene = df_genes_ordered['isC'].values
selected_genes = df_genes_ordered[cond_gene][3] # .values
genes_order = selected_genes.values
genes_order_idx = selected_genes.index.values 
genes_order, genes_order_idx

In [None]:
wkdir = "/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_atac/" 
f_out = wkdir+"all_ac_peaks_to_ac_genes.bed"

df_res = pd.read_csv(f_out, sep='\t', header=None)
cond_peak = np.logical_and(
    (df_res[9]<1e6).values,
     df_res[7].isin(genes_order),
)
df_res2 = df_res[cond_peak].copy()
df_res2

In [None]:
f = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_atac/all_AvsC_peak_tensor.npy'
tensor_peak = np.load(f)
tensor_peak.shape

In [None]:
f = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_atac/all_ac_genes_tensor.npy'
tensor_rna = np.load(f)
tensor_rna.shape

In [None]:
tensor_peak_sub = tensor_peak[:,:,:,cond_peak]
tensor_peak_sub.shape

In [None]:
tensor_rna_sub = tensor_rna[:,:,:,cond_gene]
tensor_rna_sub.shape

In [None]:
# get A
A = tensor_rna_sub.copy()
A = 1.0/2*(A[:,:,0,:]+A[:,:,1,:])
A = (A.reshape(-1, A.shape[-1]))
A = (A-np.mean(A, axis=0))/np.std(A, axis=0)
A = A.T
A.shape

In [None]:
# get a gene by peak association matrix 
n_peaks = len(df_res2)
n_genes = len(genes_order)
closest_gene_to_peaks = df_res2[7].values

peak_idx = np.arange(n_peaks)
gene_idx = basicu.get_index_from_array(genes_order, closest_gene_to_peaks)
print(peak_idx, peak_idx.shape)
print(gene_idx, gene_idx.shape)
I_pg = np.array(sparse.coo_matrix(([1]*n_peaks, (peak_idx, gene_idx)), shape=(n_peaks, n_genes)).todense())
I_pg = (I_pg)/(I_pg.sum(axis=0)+1e-10)

# get B
B = tensor_peak_sub.copy()
B = 1.0/2*(B[:,:,0,:]+B[:,:,1,:])
B = (B.reshape(-1, B.shape[-1]))
B = (B-np.mean(B, axis=0))/np.std(B, axis=0)
B = (B.dot(I_pg)).T
B.shape

# same order

In [None]:
from scipy.cluster.hierarchy import linkage, dendrogram

In [None]:
def plot_heatmaps(configs, tree=None, size_x=6, size_y=12):
    """
    """
    n_width = 3
    n_plots = len(configs)
    fullsize_x = n_plots*size_x 
    
    mosaic = "".join(np.repeat(np.arange(n_plots), n_width).astype(str))
    
    if tree is not None:
        # size_x, size_y = 8, 10
        mosaic = "A" + mosaic
        fullsize_x += 1/n_width*size_x
        
    fig, axs = plt.subplot_mosaic(mosaic, figsize=(fullsize_x,size_y))
    if tree is not None:
        ax = axs['A']
        Z = tree['Z']
        dendrogram(Z, ax=ax, orientation='left', color_threshold=False, above_threshold_color='k')
        ax.grid(False)
        ax.axis('off')
        
    for i_plot, config in enumerate(configs):
        ax = axs[f'{i_plot}']
        mat = config['mat']
        title = config['title']
        ylabel = config['ylabel']
        vmin = config['vmin']
        vmax = config['vmax']
        cbar_label = config['cbar_label']
        
        cbar_kws = dict(shrink=0.5, orientation='horizontal', pad=0.05, label=cbar_label)
        if tree is not None:
            cbar_kws = dict(shrink=0.2, pad=0.05, label=cbar_label)

        sns.heatmap(mat, cmap='coolwarm', vmax=vmax, vmin=vmin, 
                    cbar_kws=cbar_kws,
                    ax=ax)
        for i in np.arange(len(times)):
            ax.axvline(i*n_type, color='k', linestyle='--', linewidth=1)
            ax.text(i*n_type, 0, f'P{times[i]}', fontsize=15)
        ax.set_title(title, pad=15)
        ax.set_ylabel(ylabel)
        ax.set_xticks(0.5+np.arange(n_type))
        ax.set_xticklabels(['A', '', 'B', '', 'C'], fontsize=10)
        ax.set_yticks(np.arange(0, len(mat), 50))
        ax.set_yticklabels(np.arange(0, len(mat), 50), fontsize=10)
    fig.tight_layout()
    plt.show()

In [None]:
n_type = 5

Z_a = linkage(A, method='ward')#, metric='correlation')
leaves_a = dendrogram(Z_a, no_plot=True)['leaves']

Z_b = linkage(B, method='ward')#, metric='correlation')
leaves_b = dendrogram(Z_b, no_plot=True)['leaves']

Z_ab = linkage(np.hstack([A,B]), method='ward')#, metric='correlation')
leaves_ab = dendrogram(Z_ab, no_plot=True)['leaves']


A_a  = A[leaves_a]
A_b  = A[leaves_b]
A_ab = A[leaves_ab]

B_a  = B[leaves_a]
B_b  = B[leaves_b]
B_ab = B[leaves_ab]

In [None]:
configs = [
    {
    'mat': A_a, 'title': 'RNA', 'ylabel': 'genes',
    'cbar_label': 'zscore', 'vmin': -3, 'vmax': 3,
    },
    
    {
    'mat': A_ab, 'title': 'RNA', 'ylabel': 'genes',
    'cbar_label': 'zscore', 'vmin': -3, 'vmax': 3,
    },
    
    {
    'mat': A_b, 'title': 'RNA', 'ylabel': 'genes',
    'cbar_label': 'zscore', 'vmin': -3, 'vmax': 3,
    },
    
    {
    'mat': B_b, 'title': 'ATAC', 'ylabel': 'peaks',
    'cbar_label': 'zscore', 'vmin': -2, 'vmax': 2,
    },
    
    {
    'mat': B_ab, 'title': 'ATAC', 'ylabel': 'peaks',
    'cbar_label': 'zscore', 'vmin': -2, 'vmax': 2,
    },
    
    {
    'mat': B_a, 'title': 'ATAC', 'ylabel': 'peaks',
    'cbar_label': 'zscore', 'vmin': -2, 'vmax': 2,
    },
]

In [None]:
plot_heatmaps([configs[0], configs[1], configs[2]])

In [None]:
plot_heatmaps([configs[3], configs[4], configs[5]],)

In [None]:
plot_heatmaps([configs[0]], tree=dict(Z=Z_a))
plot_heatmaps([configs[3]], tree=dict(Z=Z_b))
plot_heatmaps([configs[1], configs[4]], tree=dict(Z=Z_ab))