In [None]:
import numpy as np
import pandas as pd
import os

import matplotlib.pyplot as plt
import seaborn as sns
from scipy import sparse
from sklearn.decomposition import PCA

import scanpy as sc
from umap import UMAP
from scroutines import powerplots
from scroutines import basicu

In [None]:
ddir = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_atac'

ntop = 10
res = []
for group in ['A', 'B', 'C']:
    for i in range(5):
        f = os.path.join(ddir, f'ame_v2_L23{group}_M{i+1}', 'ame.tsv')
        df = pd.read_csv(f, sep='\t')
        n = df['FASTA_max'].iloc[0]
        x = df['TP']
        y = df['FP']
        oddsratios = (x*(n-y))/(y*(n-x))
        df['OR'] = oddsratios
        df['TP/FP'] = x/y
        # df = df.sort_values('OR', ascending=False)
        # df = df.sort_values('', ascending=False)
        dfres = df[['motif_alt_ID', 'motif_ID', 'OR']].head(ntop)
        dfres['anno'] = f'{group}{i+1}'
        res.append(dfres)

res = pd.concat(res)

In [None]:
df

In [None]:
res

In [None]:
ressubs = [
     res.iloc[        :  5*ntop],
     res.iloc[  5*ntop:2*5*ntop],
     res.iloc[2*5*ntop:        ],
]


fig, axs = plt.subplots(1,3,figsize=(10*3,16))
for i, (ax, ressub) in enumerate(zip(axs, ressubs)):
    ax.barh(np.arange(5*ntop), ressub['OR'], color=f'C{i}', edgecolor='none')
    ax.set_yticks(np.arange(0, 5*ntop, 1))
    ax.set_yticklabels(ressub['motif_alt_ID'])
    ax.hlines(np.arange(0-0.5, 5*ntop-0.5, ntop), 0, 10, color='k', linestyle='--')
    ax.invert_yaxis()
    sns.despine(ax=ax)
    ax.grid(axis='y')

fig.tight_layout()

# remove redundancy

In [None]:
ddir = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/results_atac'

query = []
res = []
for group in ['A', 'B', 'C']:
    for i in range(5):
        f = os.path.join(ddir, f'ame_v2_L23{group}_M{i+1}', 'ame.tsv')
        df = pd.read_csv(f, sep='\t').dropna()  #.head(10)
        n = df['FASTA_max'].iloc[0]
        x = df['TP']
        y = df['FP']
        oddsratios = (x*(n-y))/(y*(n-x))
        df['OR'] = oddsratios
        df['TP/FP'] = x/y
        
        # dfres = df[['motif_alt_ID', 'motif_ID', 'OR']].copy # .head(5)
        df['anno'] = f'{group}{i+1}'
        res.append(df)

res = pd.concat(res)
res

In [None]:
idx = np.hstack([
    np.char.add('A', np.arange(1,6,1).astype(str)),
    np.char.add('B', np.arange(1,6,1).astype(str)),
    np.char.add('C', np.arange(1,6,1).astype(str)),
])

idx = pd.DataFrame(index=idx)
idx['color'] = ['C0']*5+['C1']*5+['C2']*5 
idx

In [None]:
res[res['motif_alt_ID'].str.contains('MEIS')]

In [None]:
q_id = 'MA0099.3'
q_name = 'FOS::JUN'

# q_id = 'MA0798.3'
# q_name = 'RFX3'

# q_id = 'MA0774.1'
# q_name = 'MEIS2'

# q_id = 'MA1995.1'
# q_name = 'Npas4'

# q_id = 'MA0162.4'
# q_name = 'EGR1'

# q_id = 'MA1869.1'
# q_name = 'FoxK'

# q_id = 'MA1929.1'
# q_name = 'CTCF'

q1_pre = res[res['motif_alt_ID']==q_name]
print(q1_pre['motif_ID'].value_counts())
q1 = res[res['motif_ID']==q_id]
r1 = idx.join(q1.set_index('anno'))

fig, ax = plt.subplots()
ax.bar(np.arange(len(r1)), r1['%TP'], linewidth=0, color=r1['color'])
ax.bar(np.arange(len(r1)), r1['%FP'], linewidth=0, color='lightgray')
ax.set_xticks(np.arange(len(r1)))
ax.set_xticklabels(r1.index.values)
ax.set_ylabel('%TP')
ax.set_title(f'{q_name} ({q_id})')
sns.despine(ax=ax)
ax.grid(axis='x')
output = '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/figures/250407/motif_enrichment.pdf'
powerplots.savefig_autodate(fig, output)
plt.show()