# Questions
- which TFs make into a regulon and which do not? 

- TFBS database
- level of expression 
- additional criteria? 

In [None]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import anndata 
import seaborn as sns
from scipy.stats import zscore
import matplotlib.pyplot as plt
import collections
from natsort import natsorted

from scipy import stats
from scipy import sparse
from sklearn.decomposition import PCA
from umap import UMAP
from statsmodels.stats.multitest import multipletests

from matplotlib.colors import LinearSegmentedColormap

from scroutines.config_plots import *
from scroutines import powerplots # .config_plots import *
from scroutines import pnmf
from scroutines import basicu
from scroutines.gene_modules import GeneModules  

import atac_utils

In [None]:
outdirfig = "/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/figures/250521"
!mkdir -p $outdirfig

In [None]:
ddir = '/u/home/f/f7xiesnm/v1_multiome/juyoun/' 
!ls $ddir

In [None]:
f = ddir+'regulons_l23alltime_trimmed_cleaned_bigtable.csv'
# scenic metadata
df_scenic = pd.read_csv(f, index_col=0)
df_scenic


In [None]:
df_reg = df_scenic.groupby(['TF', 'signs', 'Consensus_name']).first()[['Gene_signature_name', 'Region_signature_name', ]].sort_values('TF')

scenic_tfs = np.sort(df_scenic['TF'].unique())
scenic_genes = np.sort(df_scenic['Gene'].unique()) # .shape
scenic_regions = np.sort(df_scenic['Region'].unique()) # .shape

num_reg    = len(df_reg)
num_tf     = len(scenic_tfs)
num_gene = len(scenic_genes)
num_region = len(scenic_regions)
print(num_reg, num_tf, num_gene, num_region)
df_reg

In [None]:
df_reg.reset_index()['signs'].value_counts()

# prep RNA data

In [None]:
# RNA data
scores_abc = pd.read_csv("/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/scores_l23abc.csv", 
                         index_col=0,
                        )
scores_abc['scores_c-a'] = scores_abc['scores_c'] - scores_abc['scores_a']

adata = anndata.read("/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/v1_multiome/superdupermegaRNA_hasraw_multiome_l23.h5ad")
adata.X = adata.raw.X

adata.obs['scores_a'] = scores_abc.loc[adata.obs.index,'scores_a'].copy()
adata.obs['scores_b'] = scores_abc.loc[adata.obs.index,'scores_b'].copy()
adata.obs['scores_c'] = scores_abc.loc[adata.obs.index,'scores_c'].copy()
adata.obs['scores_c-a'] = scores_abc.loc[adata.obs.index,'scores_c-a'].copy()

adata

In [None]:
sample_labels = adata.obs['Sample'].values
time_labels = [s[:-1].replace('DR', '') for s in sample_labels]

adata.obs['sample'] = sample_labels #
adata.obs['time']   = time_labels

uniq_samples = natsorted(np.unique(sample_labels))
nr_samples = [s for s in uniq_samples if "DR" not in s]
dr_samples = [s for s in uniq_samples if "DR" in s]

uniq_conds = np.array(natsorted(np.unique(adata.obs['cond'].values)))
print(uniq_conds)

In [None]:
# remove mitocondria genes
adata = adata[:,~adata.var.index.str.contains(r'^mt-')]
# remove sex genes
adata = adata[:,~adata.var.index.str.contains(r'^Xist$')]

# filter genes
cond = np.ravel((adata.X>0).sum(axis=0)) > 10 # expressed in more than 10 cells
adata = adata[:,cond].copy()

In [None]:
# counts
x = adata.X
cov = np.ravel(np.sum(x, axis=1))
genes = adata.var.index.values

# CP10k
xn = (sparse.diags(1/cov).dot(x))*1e4

# log2(CP10k+1)
xln = xn.copy()
xln.data = np.log2(xln.data+1)

adata.layers[    'norm'] = np.array(xn.todense())
adata.layers[ 'lognorm'] = np.array(xln.todense())

In [None]:
# adata.obs['sample'].unique()
import re

todo_conds = [
    'P12DR', 'P14DR', 'P17DR', 'P21DR',
    'P6', 'P8', 'P10', 'P12', 'P14', 'P17', 'P21', 
]
todo_samps = [
    'P12DRa', 'P12DRb',
    'P14DRa', 'P14DRb',
    'P17DRa', 'P17DRb',
    'P21DRa', 'P21DRb',
    'P6a', 'P6b', 'P6c', 
    'P8a', 'P8b', 'P8c', 
    'P10a', 'P10b', 
    'P12a', 'P12b', 'P12c', 
    'P14a', 'P14b',
    'P17a', 'P17b', 
    'P21a', 'P21b', 
]

cond2condcode = {
    'P12DR': 0, 
    'P14DR': 1,
    'P17DR': 2,
    'P21DR': 3,
    'P6':    4,
    'P8':    5,
    'P10':   6,
    'P12':   7,
    'P14':   8,
    'P17':   9,
    'P21':  10,
}
todo_conds_t = np.array([int(re.sub(r'[a-zA-Z]', '', a)) for a in todo_conds])
todo_samps_t = np.array([int(re.sub(r'[a-zA-Z]', '', a)) for a in todo_samps])
print(todo_conds_t)
print(todo_samps_t)

def mean_over_samples(mmat_res_samp):
    """25 samples to 11 conditions
    """
    assert mmat_res_samp.shape[0] == 25
    
    mmat_res_samp_mean = np.zeros(mmat_res_samp.shape)[:11]
    mmat_res_samp_mean[0] = np.mean(mmat_res_samp[ :2], axis=0)
    mmat_res_samp_mean[1] = np.mean(mmat_res_samp[2:4], axis=0)
    mmat_res_samp_mean[2] = np.mean(mmat_res_samp[4:6], axis=0)
    mmat_res_samp_mean[3] = np.mean(mmat_res_samp[6:8], axis=0)

    mmat_res_samp_mean[4] = np.mean(mmat_res_samp[8:11], axis=0)
    mmat_res_samp_mean[5] = np.mean(mmat_res_samp[11:14], axis=0)
    mmat_res_samp_mean[6] = np.mean(mmat_res_samp[14:16], axis=0)
    mmat_res_samp_mean[7] = np.mean(mmat_res_samp[16:19], axis=0)
    mmat_res_samp_mean[8] = np.mean(mmat_res_samp[19:21], axis=0)
    mmat_res_samp_mean[9] = np.mean(mmat_res_samp[21:23], axis=0)
    mmat_res_samp_mean[10] = np.mean(mmat_res_samp[23:  ], axis=0)
    
    return mmat_res_samp_mean

def transform_bigredmat(bigmat):
    """bigmat or redmat
    to fmat and zmat
    
    (samp, type, gene) -> (gene, samp*type)
    also change column order
    """
    fmat = bigmat.reshape(-1, bigmat.shape[-1]).T
    fmat = np.hstack([fmat[:,4*5:], fmat[:,:4*5]]) # CHANGED COLUMN ORDER!!
    zmat = zscore(fmat, axis=1)
    
    return fmat, zmat

In [None]:
%%time


offset = 1
n_type = 5
frac_archetypal_cells_viz = 0.2

mat = adata.layers['norm'][...]
gexp_l23baseline = np.log2(np.mean(mat, axis=0)*1e2+offset) # CP10k -> CPM

bigmat_nfd = np.zeros((len(todo_samps), n_type, adata.shape[1]))
bigmat_abc = np.zeros((len(todo_samps),      3, adata.shape[1]))

for i, samp in enumerate(todo_samps):
    print(samp)
    
    # get sub
    adatasub = adata[adata.obs['sample']==samp]
    n_cells = adatasub.shape[0]
    
    # get A vs C 
    ranks_ac = adatasub.obs['scores_c-a'].rank()
    ranks_b  = adatasub.obs['scores_b'].rank()
    
    # per type
    cells_type_nfd = pd.qcut(ranks_ac, n_type, labels=False)
    for j in range(n_type):
        mat_j = adatasub[cells_type_nfd==j].layers['norm'][...]
        mmat_j = np.log2(np.mean(mat_j, axis=0)*1e2+offset) - gexp_l23baseline # CP10k -> CPM
        bigmat_nfd[i,j] = mmat_j
    
    # A, B, C
    num_archetypal_cells_viz = int(n_cells*frac_archetypal_cells_viz)
    
    precond_a = ranks_ac <= num_archetypal_cells_viz
    precond_c = ranks_ac > adatasub.shape[0] - num_archetypal_cells_viz
    precond_b = ranks_b  > adatasub.shape[0] - num_archetypal_cells_viz
    
    cond_a = np.all([ precond_a, ~precond_b, ~precond_c], axis=0)
    cond_b = np.all([~precond_a,  precond_b, ~precond_c], axis=0)
    cond_c = np.all([~precond_a, ~precond_b,  precond_c], axis=0)
    
    for j, cond in enumerate([cond_a, cond_b, cond_c]):
        mat_j = adatasub[cond].layers['norm'][...]
        mmat_j = np.log2(np.mean(mat_j, axis=0)*1e2+offset) - gexp_l23baseline # CP10k -> CPM
        bigmat_abc[i,j] = mmat_j


In [None]:
redmat_abc = mean_over_samples(bigmat_abc)

In [None]:
bigmat_abc.shape, redmat_abc.shape, genes.shape

# prep region data

In [None]:
%%time
adatas_pk = []
# for cond in sample_conditions:
for cond in todo_conds:
    print(cond)
    f = f'/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_atac/pmat_l23scenic_v2_{cond}.h5ad'
    adata_pk = sc.read(f)
    adatas_pk.append(adata_pk)
    print(adata_pk)
    # break

In [None]:
regionset = adata_pk.var.index.values
regionset

In [None]:
%%time
mat = []
for adata_pk in adatas_pk:
    mat.append(np.array(adata_pk.X.todense()))
mat = np.vstack(mat)
mat = mat/np.sum(mat, axis=1).reshape(-1,1)*1e6
atac_l23baseline = np.log2(np.mean(mat, axis=0)+offset)

In [None]:
%%time

offset = 1
n_type = 5
frac_archetypal_cells_viz = 0.2
bigmatatac_nfd = np.zeros((len(todo_samps), n_type, len(regionset)))
bigmatatac_abc = np.zeros((len(todo_samps),      3, len(regionset)))

for i, samp in enumerate(todo_samps):
    cond_name = samp[:-1]
    cond_code = cond2condcode[cond_name]
    print(samp, cond_name, cond_code)
    
    # get ATAC
    adata_pk = adatas_pk[cond_code]
    cells_atac = adata_pk.obs.index.values
    
    # get sub
    adatasub = adata[adata.obs['sample']==samp]
    cells_rna = adatasub.obs.index.values
    n_cells = adatasub.shape[0]
    
    # get A vs C 
    ranks_ac = adatasub.obs['scores_c-a'].rank()
    ranks_b  = adatasub.obs['scores_b'].rank()
    
    # per type
    cells_type_nfd = pd.qcut(ranks_ac, n_type, labels=False)
    for j in range(n_type):
        cond_j = cells_type_nfd==j
        cells_j = np.intersect1d(cells_rna[cond_j], cells_atac)
        mat_j = np.array(adata_pk[cells_j].X.todense()) 
        mat_j = mat_j/np.sum(mat_j, axis=1).reshape(-1,1)*1e6
        mmat_j = np.log2(np.mean(mat_j, axis=0)+offset) - atac_l23baseline
        bigmatatac_nfd[i,j] = mmat_j
    
    # A, B, C
    num_archetypal_cells_viz = int(n_cells*frac_archetypal_cells_viz)
    
    precond_a = ranks_ac <= num_archetypal_cells_viz
    precond_c = ranks_ac > adatasub.shape[0] - num_archetypal_cells_viz
    precond_b = ranks_b  > adatasub.shape[0] - num_archetypal_cells_viz
    
    cond_a = np.all([ precond_a, ~precond_b, ~precond_c], axis=0)
    cond_b = np.all([~precond_a,  precond_b, ~precond_c], axis=0)
    cond_c = np.all([~precond_a, ~precond_b,  precond_c], axis=0)
    
        
    # get A, B, C 
    cells_a = np.intersect1d(cells_rna[cond_a], cells_atac)
    cells_b = np.intersect1d(cells_rna[cond_b], cells_atac)
    cells_c = np.intersect1d(cells_rna[cond_c], cells_atac)
    
    for j, cond_j in enumerate([cond_a, cond_b, cond_c]):
        cells_j = np.intersect1d(cells_rna[cond_j], cells_atac)
        mat_j = np.array(adata_pk[cells_j].X.todense()) 
        mat_j = mat_j/np.sum(mat_j, axis=1).reshape(-1,1)*1e6
        mmat_j = np.log2(np.mean(mat_j, axis=0)+offset) - atac_l23baseline
        bigmatatac_abc[i,j] = mmat_j
        

In [None]:
print(bigmat_nfd.shape) # cond, type, gene
print(bigmat_abc.shape) # cond, type, gene

print(bigmatatac_nfd.shape) # cond, type, gene
print(bigmatatac_abc.shape) # cond, type, gene

# profile regulons

In [None]:
def prep_regulon(reg_tf, reg_name):
    """
    reg_tf = 'Meis2'
    reg_name = 'Meis2_+_+'
    """

    df_this_reg = df_scenic[df_scenic['Consensus_name']==reg_name]
    reg_genes = df_this_reg['Gene'].unique()
    reg_regions = df_this_reg['Region'].unique()
    print(reg_genes.shape, reg_regions.shape)

    #
    tf_idx   = basicu.get_index_from_array(genes, [reg_tf])[0]
    gene_idx  = basicu.get_index_from_array(genes, reg_genes)
    region_idx = basicu.get_index_from_array(regionset, reg_regions)
    assert np.all(region_idx != -1)
    assert np.all(gene_idx != -1)

    ftrs_x = bigmat_abc[:,:,tf_idx]
    ftrs_y = np.mean(bigmat_abc[:,:,gene_idx], axis=-1)
    ftrs_z = np.mean(bigmatatac_abc[:,:,region_idx], axis=-1)
    # print(ftrs_x.shape, ftrs_y.shape, ftrs_z.shape)

    bigmat_abc_ig_list = [ftrs_x, ftrs_y, ftrs_z]
    redmat_abc_ig_list = [mean_over_samples(x) for x in bigmat_abc_ig_list]

    return reg_genes, reg_regions, bigmat_abc_ig_list, redmat_abc_ig_list


class Regulon:
    def __init__(self, reg_tf, reg_name):
        reg_genes, reg_regions, bigmat_abc, redmat_abc = prep_regulon(reg_tf, reg_name)
        
        self.reg_tf      = reg_tf
        self.reg_name    = reg_name
        self.reg_genes   = reg_genes
        self.reg_regions = reg_regions
        
        self.bigmat_abc = bigmat_abc
        self.redmat_abc = redmat_abc
        
        return
    
    def plot(self, output=None):
        """
        """
        
        fig, axs = plt.subplots(1, 3, figsize=(3*3,1*4), sharex=True) #, sharey=True)
        for i in range(3):
            ax = axs[i]
            bigmat_mean_ig = self.bigmat_abc[i]
            redmat_mean_ig = self.redmat_abc[i]

            ax.plot(todo_samps_t[8:], bigmat_mean_ig[8:,0], 'o', markersize=5, fillstyle='none', color='C0')
            ax.plot(todo_samps_t[8:], bigmat_mean_ig[8:,1], 'o', markersize=5, fillstyle='none', color='C1')
            ax.plot(todo_samps_t[8:], bigmat_mean_ig[8:,2], 'o', markersize=5, fillstyle='none', color='C2')

            # ax.plot(todo_samps_t[:8], bigmat_mean_ig[:8,0], 's', markersize=5, fillstyle='none', color='C0', alpha=0.5)
            # ax.plot(todo_samps_t[:8], bigmat_mean_ig[:8,1], 's', markersize=5, fillstyle='none', color='C1', alpha=0.5)
            # ax.plot(todo_samps_t[:8], bigmat_mean_ig[:8,2], 's', markersize=5, fillstyle='none', color='C2', alpha=0.5)

            ax.plot(todo_conds_t[4:], redmat_mean_ig[4:,0], '-', color='C0')
            ax.plot(todo_conds_t[4:], redmat_mean_ig[4:,1], '-', color='C1')
            ax.plot(todo_conds_t[4:], redmat_mean_ig[4:,2], '-', color='C2')

            # ax.plot(todo_conds_t[:4], redmat_mean_ig[:4,0], '-', color='C0', alpha=0.5)
            # ax.plot(todo_conds_t[:4], redmat_mean_ig[:4,1], '-', color='C1', alpha=0.5)
            # ax.plot(todo_conds_t[:4], redmat_mean_ig[:4,2], '-', color='C2', alpha=0.5)

            ax.grid(False)
            sns.despine(ax=ax)
            ax.set_xticks([6,10,14,17,21])
            ax.set_xticklabels(["P6","10","14","17","21"])

        # axs[0].set_xlabel('Postnatal day (P)')
        axs[0].set_ylabel('log2(FC in CPM)')
        axs[0].set_title(f'{self.reg_name}')
        axs[1].set_title(f'Genes n={len(self.reg_genes)}')
        axs[2].set_title(f'Regions n={len(self.reg_regions)}')
        if output is not None:
            powerplots.savefig_autodate(fig, output)
        fig.tight_layout()
        plt.show()
        
        return
    

In [None]:
all_regulons = df_reg.reset_index()['Consensus_name'].values
cond_pp = np.array([x[-3:]=='+_+' for x in all_regulons])
cond_mp = np.array([x[-3:]=='-_+' for x in all_regulons])
cond_pm = np.array([x[-3:]=='+_-' for x in all_regulons])
cond_mm = np.array([x[-3:]=='-_-' for x in all_regulons])

print(all_regulons.shape)
print(cond_pp.sum(), 
      cond_mp.sum(),
      cond_pm.sum(),
      cond_mm.sum(),
     )

In [None]:
for reg_name in all_regulons[:4]:
    tf = reg_name.split('_')[0]
    reg = Regulon(tf, reg_name)
    reg.plot()
    break

# single cell regulons

In [None]:
selected_regulons = all_regulons[cond_pp]
n = len(selected_regulons)
nx = 12
ny = int((n+nx-1)/nx)

fig, axs = plt.subplots(ny,nx,figsize=(nx*3.5,ny*3.5))
for i, (ax, reg_name) in enumerate(zip(axs.flat, selected_regulons)):
    tf = reg_name[:-4]

    df_this_reg = df_scenic[df_scenic['Consensus_name']==reg_name]
    reg_genes = df_this_reg['Gene'].unique()
    reg_regions = df_this_reg['Region'].unique()

    tf_idx = basicu.get_index_from_array(genes, [tf])[0]
    reg_genes_idx = basicu.get_index_from_array(genes, reg_genes)
    
    x = bigmat_nfd[:,:,tf_idx].reshape(-1,)
    xb = np.linspace(np.min(x), np.max(x), 50)
    y = np.mean(bigmat_nfd[:,:,reg_genes_idx], axis=2).reshape(-1,)
    
    slope = np.polyfit(x, y, deg=1)[0]
    f1 = np.poly1d(np.polyfit(x, y, deg=1))
    f2 = np.poly1d(np.polyfit(x, y, deg=2))
    f3 = np.poly1d(np.polyfit(x, y, deg=3))
    
    ax.scatter(x,y, color='gray', s=10)
    ax.plot(xb,f1(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f2(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f3(xb), linewidth=1.5, zorder=0)
    sns.despine(ax=ax)
    
    ax.set_title(f'{reg_name}\n slope={slope:.2f}')
# axs.flat[0].set_xlabel('TF log2')
# axs.flat[0].set_ylabel('Target gene log2')
fig.tight_layout()
plt.show()

In [None]:
selected_regulons = all_regulons[cond_pm]
n = len(selected_regulons)
nx = 12
ny = int((n+nx-1)/nx)

fig, axs = plt.subplots(ny,nx,figsize=(nx*3.5,ny*3.5))
for i, (ax, reg_name) in enumerate(zip(axs.flat, selected_regulons)):
    tf = reg_name[:-4]

    df_this_reg = df_scenic[df_scenic['Consensus_name']==reg_name]
    reg_genes = df_this_reg['Gene'].unique()
    reg_regions = df_this_reg['Region'].unique()

    tf_idx = basicu.get_index_from_array(genes, [tf])[0]
    reg_genes_idx = basicu.get_index_from_array(genes, reg_genes)
    
    x = bigmat_nfd[:,:,tf_idx].reshape(-1,)
    xb = np.linspace(np.min(x), np.max(x), 50)
    y = np.mean(bigmat_nfd[:,:,reg_genes_idx], axis=2).reshape(-1,)
    
    slope = np.polyfit(x, y, deg=1)[0]
    f1 = np.poly1d(np.polyfit(x, y, deg=1))
    f2 = np.poly1d(np.polyfit(x, y, deg=2))
    f3 = np.poly1d(np.polyfit(x, y, deg=3))
    
    ax.scatter(x,y, color='gray', s=10)
    ax.plot(xb,f1(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f2(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f3(xb), linewidth=1.5, zorder=0)
    sns.despine(ax=ax)
    
    ax.set_title(f'{reg_name}\n slope={slope:.2f}')
# axs.flat[0].set_xlabel('TF log2')
# axs.flat[0].set_ylabel('Target gene log2')
fig.tight_layout()
plt.show()

In [None]:
selected_regulons = all_regulons[cond_mp]
n = len(selected_regulons)
nx = 12
ny = int((n+nx-1)/nx)

fig, axs = plt.subplots(ny,nx,figsize=(nx*3.5,ny*3.5))
for i, (ax, reg_name) in enumerate(zip(axs.flat, selected_regulons)):
    tf = reg_name[:-4]

    df_this_reg = df_scenic[df_scenic['Consensus_name']==reg_name]
    reg_genes = df_this_reg['Gene'].unique()
    reg_regions = df_this_reg['Region'].unique()

    tf_idx = basicu.get_index_from_array(genes, [tf])[0]
    reg_genes_idx = basicu.get_index_from_array(genes, reg_genes)
    
    x = bigmat_nfd[:,:,tf_idx].reshape(-1,)
    xb = np.linspace(np.min(x), np.max(x), 50)
    y = np.mean(bigmat_nfd[:,:,reg_genes_idx], axis=2).reshape(-1,)
    
    slope = np.polyfit(x, y, deg=1)[0]
    f1 = np.poly1d(np.polyfit(x, y, deg=1))
    f2 = np.poly1d(np.polyfit(x, y, deg=2))
    f3 = np.poly1d(np.polyfit(x, y, deg=3))
    
    ax.scatter(x,y, color='gray', s=10)
    ax.plot(xb,f1(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f2(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f3(xb), linewidth=1.5, zorder=0)
    sns.despine(ax=ax)
    
    ax.set_title(f'{reg_name}\n slope={slope:.2f}')
# axs.flat[0].set_xlabel('TF log2')
# axs.flat[0].set_ylabel('Target gene log2')
fig.tight_layout()
plt.show()

In [None]:
selected_regulons = all_regulons[cond_mm]
n = len(selected_regulons)
nx = 12
ny = int((n+nx-1)/nx)

fig, axs = plt.subplots(ny,nx,figsize=(nx*3.5,ny*3.5))
for i, (ax, reg_name) in enumerate(zip(axs.flat, selected_regulons)):
    tf = reg_name[:-4]

    df_this_reg = df_scenic[df_scenic['Consensus_name']==reg_name]
    reg_genes = df_this_reg['Gene'].unique()
    reg_regions = df_this_reg['Region'].unique()

    tf_idx = basicu.get_index_from_array(genes, [tf])[0]
    reg_genes_idx = basicu.get_index_from_array(genes, reg_genes)
    
    x = bigmat_nfd[:,:,tf_idx].reshape(-1,)
    xb = np.linspace(np.min(x), np.max(x), 50)
    y = np.mean(bigmat_nfd[:,:,reg_genes_idx], axis=2).reshape(-1,)
    
    slope = np.polyfit(x, y, deg=1)[0]
    f1 = np.poly1d(np.polyfit(x, y, deg=1))
    f2 = np.poly1d(np.polyfit(x, y, deg=2))
    f3 = np.poly1d(np.polyfit(x, y, deg=3))
    
    ax.scatter(x,y, color='gray', s=10)
    ax.plot(xb,f1(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f2(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f3(xb), linewidth=1.5, zorder=0)
    sns.despine(ax=ax)
    
    ax.set_title(f'{reg_name}\n slope={slope:.2f}')
# axs.flat[0].set_xlabel('TF log2')
# axs.flat[0].set_ylabel('Target gene log2')
fig.tight_layout()
plt.show()

In [None]:
reg_name = 'Fos_+_+'
tf = reg_name[:-4]

df_this_reg = df_scenic[df_scenic['Consensus_name']==reg_name]
reg_genes = df_this_reg['Gene'].unique()
reg_regions = df_this_reg['Region'].unique()

tf_idx = basicu.get_index_from_array(genes, [tf])[0]
reg_genes_idx = basicu.get_index_from_array(genes, reg_genes)

n = len(reg_genes)
nx = 12
ny = int((n+nx-1)/nx)

x = bigmat_nfd[:,:,tf_idx].reshape(-1,)
xb = np.linspace(np.min(x), np.max(x), 50)

fig, axs = plt.subplots(ny,nx,figsize=(nx*3.5,ny*3.5))
for i, (ax, rgi) in enumerate(zip(axs.flat, reg_genes_idx)):
    
    y = bigmat_nfd[:,:,rgi].reshape(-1,)
    
    slope = np.polyfit(x, y, deg=1)[0]
    f1 = np.poly1d(np.polyfit(x, y, deg=1))
    f2 = np.poly1d(np.polyfit(x, y, deg=2))
    f3 = np.poly1d(np.polyfit(x, y, deg=3))
    
    ax.scatter(x,y, color='gray', s=10)
    ax.plot(xb,f1(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f2(xb), linewidth=1.5, zorder=0)
    ax.plot(xb,f3(xb), linewidth=1.5, zorder=0)
    sns.despine(ax=ax)
    
    ax.set_title(f'{tf}->{genes[rgi]}\n slope={slope:.2f}')
    
# axs.flat[0].set_xlabel('TF log2')
# axs.flat[0].set_ylabel('Target gene log2')
fig.tight_layout()
plt.show()