In [None]:
import numpy as np
import pandas as pd
import os

import matplotlib.pyplot as plt
import seaborn as sns
from scipy import sparse
from sklearn.decomposition import PCA

import scanpy as sc
from umap import UMAP
from scroutines import powerplots
from scroutines import basicu

In [None]:
def get_index_from_array(a, b):
    """get the indices of b elements in a array
    """
    return pd.Series(a).reset_index().set_index(0).loc[b]['index'].values
    

In [None]:
outdirfig = "/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/figures/250409"

In [None]:
# Barplot

In [None]:
nums = [8486, 11249, 11430, 0, 0, 0]
fig, ax = plt.subplots(figsize=(3,3))
ax.bar(np.arange(6), nums, color=['C0', 'C1', 'C2'], edgecolor='none')
ax.set_xticks(np.arange(6))
sns.despine(ax=ax)
ax.grid(axis='x')

output = os.path.join(outdirfig, f'FigS1_ATAC_DAR_data_vs_shuff.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

# UMAP

In [None]:
dfplot = pd.read_csv('/u/home/f/f7xiesnm/v1_multiome/rna_atac_umap_p21_l23.csv')
dfplot

In [None]:
ctrds = dfplot.groupby('label').mean()

In [None]:
unq_types = np.sort(dfplot['label'].unique())
unq_types

In [None]:
fig, axs = plt.subplots(1,2,figsize=(2*5,1*5))

for ax, x, y, title in zip(axs, ['r1', 'a1'], ['r2', 'a2'], ['RNA', 'ATAC']):
    sns.scatterplot(
        data=dfplot, x=x, y=y, hue='label', 
        # hue_order=unq_types,
        # palette=palette,
        s=2,
        edgecolor='none',
        rasterized=True,
        legend=False,
        ax=ax)
    # ax.legend(bbox_to_anchor=(0,0))
    ax.set_title(title)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlabel('')
    ax.set_ylabel('')
    
    # plot centroid - text
for t in unq_types:
    r1, r2, a1, a2 = ctrds.loc[t][['r1', 'r2', 'a1', 'a2']]
    axs[0].text(r1, r2, t)
    axs[1].text(a1, a2, t)

# axs[1].invert_yaxis()
axs[1].invert_xaxis()
fig.tight_layout()

output = os.path.join(outdirfig, f'Fig1_RNA_ATAC.pdf')
powerplots.savefig_autodate(fig, output)
plt.show()

# confmat

In [None]:
confmat = pd.read_csv('/u/home/f/f7xiesnm/v1_multiome/rna_atac_confmat_p21_l23.csv', index_col=0)
confmat_norm = confmat.divide(confmat.sum(axis=1), axis=0)

unq_types_ordered = [
    'L2/3', 'L4', 'L5IT', 'L6IT',
    'L5PT', 'L6CT', 'L5NP', 'L6b',
    'Pvalb', 'Sst', 'Vip', 'Lamp5', 'Frem1', 'Stac',
    'Astro', 'OPC', 'OD', 'Micro',
    'Endo', 'VLMC'
]
len(unq_types_ordered), len(confmat_norm.columns)

In [None]:
confmat_norm_reordered = confmat_norm[unq_types_ordered]

In [None]:
rows = confmat_norm_reordered.index.values
cols = confmat_norm_reordered.columns.values
mat, ridx, cidx = basicu.diag_matrix_rows(confmat_norm_reordered)

fig, ax = plt.subplots()
sns.heatmap(mat.T, 
            xticklabels=rows[ridx],
            yticklabels=cols[cidx],
            cmap='rocket_r', 
            rasterized=True,
            cbar_kws={'shrink':0.5, 'label':'% overlap'},
            vmax=1, vmin=0,
            ax=ax)
ax.set_aspect('equal')
output = os.path.join(outdirfig, f'FigS1_RNA_ATAC_confmat.pdf')
powerplots.savefig_autodate(fig, output)