In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from functools import partial
from multiprocessing import shared_memory
from multiprocessing.dummy import Pool
from sklearn.ensemble import RandomForestRegressor
import multiprocessing as mp
from itertools import chain, combinations
import sys
import os
from scipy import stats

from sklearn.preprocessing import Normalizer
from sklearn.ensemble import RandomForestRegressor

import mp_run

import concurrent.futures
os.environ["OMP_NUM_THREADS"] = "1" # export OMP_NUM_THREADS=1
os.environ["OPENBLAS_NUM_THREADS"] = "1" # export OPENBLAS_NUM_THREADS=1
os.environ["MKL_NUM_THREADS"] = "1" # export MKL_NUM_THREADS=1
os.environ["VECLIB_MAXIMUM_THREADS"] = "1" # export VECLIB_MAXIMUM_THREADS=1
os.environ["NUMEXPR_NUM_THREADS"] = "1" # export NUMEXPR_NUM_THREADS=1

In [3]:
perturbation_factor = 3
num_rf_predictors = 500

target_tf = 'AT2G46680'

induction_flag = -1
mp_threads = 20
# if (len(sys.argv)>=3):
#     induction_flag = bool(sys.argv[1])
#     mp_threads = int(sys.argv[2])

In [4]:
tf_df = pd.read_csv('data/wrky_regulators.csv')
tf_list = tf_df['Gene']

In [5]:
def choose_2_3(iterable):
    "powerset([1,2,3]) -->  (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(2,4))

In [6]:
target_df = pd.read_csv('data/wrky_targets_neg.csv')
if (induction_flag > 0):
    target_df = pd.read_csv('data/wrky_targets_pos.csv')
deg_genes = target_df['Gene']

In [7]:
ts_df = pd.read_csv('data/GSE97500/expression.tsv', sep='\t', index_col=0)
c = pd.Series(list(set(deg_genes).intersection(set(ts_df.index))))
tf_list = pd.Series(list(set(tf_list).intersection(set(ts_df.index))))
meta_df = pd.read_csv('data/GSE97500/meta_data.tsv', sep='\t')
ts_exp_index = meta_df[meta_df['isTs']]
ts_exp_index_target =  ts_exp_index[ts_exp_index['is1stLast'] != 'f'].condName
ts_exp_index_source =  ts_exp_index[ts_exp_index['is1stLast'] != 'f'].prevCol

In [8]:
target_genes = set(deg_genes).intersection(set(ts_df.index))

In [9]:
non_trivial_targets = []
for target in target_genes:
    if ts_df.loc[target].mean() != 0.0:
        non_trivial_targets.append(target)


In [10]:
target_genes = pd.Series(non_trivial_targets)

In [11]:

ts_train_y_list = ts_df[ts_exp_index_target]

result_list = []
result_measure_list = []

p_val_res_list = []

for target_gene in tqdm(target_genes):
    # train_gene_index = tf_list[tf_list != target_gene]
    train_gene_index = tf_list
    
    ts_train_X = ts_df[ts_exp_index_source].T[train_gene_index]

    ts_train_y = ts_train_y_list.loc[target_gene]

    input_mean = ts_train_X.mean()
    input_std = ts_train_X.std()
    perturbation_input = input_mean.copy()
    perturbation_input[target_tf] += input_std[target_tf]*perturbation_factor
    perturbation_input = np.tile(perturbation_input.values, (len(input_mean),1))
    for i, tf_name in enumerate(tf_list):
        if tf_name == target_tf: continue
        perturbation_input[i][i] += input_std[tf_name]*perturbation_factor

    func = partial(mp_run.rf_feature_importance, ts_train_X, ts_train_y, perturbation_input)

    with mp.Pool() as pool:
        results = pool.map(func, range(num_rf_predictors))
    results = np.array(results)
    target_tf_index = np.where(tf_list == target_tf)[0][0]
    p_val_list = []
    for i, tf_name in enumerate(tf_list):
        if i == target_tf_index: 
            t_val, p_val = stats.ttest_rel(results[:, target_tf_index], np.zeros(num_rf_predictors))
        else:
            t_val, p_val = stats.ttest_rel(results[:,i], results[:, target_tf_index])
        if (induction_flag*t_val > 0): p_val_list.append(p_val)
        else: p_val_list.append(1)
    p_val_res_list.append(p_val_list)

100%|██████████| 639/639 [33:53<00:00,  3.18s/it]


In [12]:
tf_df.index = tf_df['Gene']
out_df = pd.DataFrame(index=target_genes, columns=tf_df.loc[tf_list]['Symbol'], data=np.array(p_val_res_list))

In [13]:
out_df.to_csv('./output/xb_wrky_inf_pval_neg.csv')

In [39]:
tf_df.index = tf_df['Gene']
tf_df.loc[tf_list]['Symbol']

'HB7'

In [27]:
out_df

Unnamed: 0,AT2G03340,AT1G66600,AT4G11070,AT3G58710,AT1G64000,AT2G24570,AT3G56400,AT4G39410,AT4G26440,AT5G26170,...,AT4G01250,AT5G22570,AT2G34830,AT5G07100,AT1G62300,AT3G62340,AT2G46130,AT1G80590,AT1G18860,AT2G30250
AT5G45110,1.000000e+00,0.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,3.267931e-02,1.000000e+00,3.537085e-105,2.614901e-259,1.000000e+00,...,1.512885e-06,2.154471e-204,4.273833e-46,1.000000e+00,1.511681e-05,1.387390e-68,2.658043e-132,6.632532e-198,1.000000e+00,1.000000e+00
AT5G02290,1.000000e+00,0.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.079207e-97,1.000000e+00,1.000000e+00,...,1.000000e+00,1.158070e-66,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.555881e-49,1.753223e-236,1.000000e+00,6.676295e-23
AT1G66160,5.337728e-34,0.000000e+00,1.000000e+00,5.540835e-02,1.399327e-166,5.824431e-95,1.002381e-54,0.000000e+00,1.517303e-143,1.000000e+00,...,2.676314e-216,1.448137e-52,1.000000e+00,1.000000e+00,3.836657e-29,2.658087e-139,6.356803e-98,1.421447e-11,6.604319e-07,2.312458e-278
AT5G19290,1.000000e+00,0.000000e+00,3.549600e-177,3.431683e-206,3.435450e-25,3.939136e-183,1.000000e+00,1.292511e-221,1.000000e+00,1.743100e-144,...,1.000000e+00,3.503521e-19,4.695632e-67,7.140652e-65,1.286384e-112,1.134998e-08,1.000000e+00,1.341869e-02,1.141598e-88,1.000000e+00
AT1G72330,1.000000e+00,0.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,4.320344e-208,1.000000e+00,1.221802e-25,...,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.737255e-26,1.000000e+00,2.193657e-114,1.000000e+00,1.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AT4G25350,1.976514e-01,5.458548e-296,1.000000e+00,1.099608e-02,2.223263e-104,8.940451e-142,1.000000e+00,5.625402e-01,1.492510e-299,1.000000e+00,...,1.000000e+00,1.351716e-199,1.254795e-05,1.172346e-112,1.000000e+00,3.482509e-29,3.075282e-89,1.630368e-187,1.000000e+00,5.198037e-77
AT5G48385,1.000000e+00,6.968207e-256,1.000000e+00,1.000000e+00,1.000000e+00,3.333306e-70,1.000000e+00,6.748484e-105,1.000000e+00,2.206356e-82,...,1.000000e+00,1.000000e+00,1.000000e+00,3.000922e-121,1.000000e+00,1.263129e-23,1.000000e+00,1.000000e+00,8.986524e-304,1.000000e+00
AT2G28940,1.000000e+00,0.000000e+00,1.000000e+00,1.000000e+00,1.440799e-79,1.323964e-44,1.000000e+00,1.000000e+00,1.724430e-18,1.000000e+00,...,1.000000e+00,7.861058e-72,1.638035e-24,1.135634e-43,1.604899e-01,1.000000e+00,9.774409e-132,2.917848e-120,1.104004e-03,3.846091e-05
AT1G47890,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.230039e-02,1.000000e+00,1.000000e+00,4.961471e-161,1.000000e+00,1.000000e+00,...,5.159830e-16,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,5.142829e-09,1.000000e+00,1.000000e+00,8.891648e-77


In [77]:

regr = RandomForestRegressor(random_state=0)

ts_train_y_list = ts_df[ts_exp_index_target]

result_list = []
result_measure_list = []

for target_gene in tqdm(target_genes):
    train_gene_index = tf_list[tf_list != target_gene]
    ts_train_X = ts_df[ts_exp_index_source].T[train_gene_index]

    ts_train_y = ts_train_y_list.loc[target_gene]

    input_mean = ts_train_X.mean()
    input_std = ts_train_X.std()
    perturbation_input = input_mean.copy()
    perturbation_input = np.repeat(np.array(input_mean), repeats=len(input_mean), axis=0)
    perturbation_input = perturbation_input.reshape(len(input_mean),len(input_mean)) + np.diagflat(np.array([ts_train_X.values.std()]*len(input_mean)) * perturbation_factor)

    func = partial(mp_run.rf_feature_importance, ts_train_X, ts_train_y)

    with mp.Pool() as pool:
        results = pool.map(func, range(num_rf_predictors))


    base_predictions = [result.predict(np.array(input_mean).reshape(1,-1))[0] for result in results]
    y_std = ts_train_y.std()

    perturbation_input_list = np.array_split(perturbation_input, 10)
    perturbation_input_shapes = [p.shape for p in perturbation_input_list]
    shared_memory_list = []
    for i, p in enumerate(perturbation_input_list):
        shm = shared_memory.SharedMemory(create=True, size=p.nbytes, name="perturbation_input_"+str(i))
        buffer = np.ndarray(p.shape, dtype=p.dtype, buffer=shm.buf)
        buffer[:] = p[:]
        shared_memory_list.append(shm)


    def f(perturbation_input_shapes, x):
        pred_list = []
        for i, shape in enumerate(perturbation_input_shapes):

            # Attach to the existing shared memory
            existing_shm = shared_memory.SharedMemory(name='perturbation_input_'+str(i))
            # Read from the shared memory (we know the size is 1)
            c = np.ndarray(shape, dtype=np.float64, buffer=existing_shm.buf)
            pred_list.append(x.predict(c))
            existing_shm.close()
        return np.concatenate(pred_list)

    func = partial(f, perturbation_input_shapes)

    with mp.Pool(processes=mp_threads) as pool:
        perturbation_predictions = pool.map(func, results)

    for shm in shared_memory_list:
        shm.close()
        shm.unlink()


    perturbation_measures = [(perturbation_prediction - base_prediction)/y_std for perturbation_prediction, base_prediction in zip(perturbation_predictions, base_predictions)]
    importance_matrix = np.array(perturbation_measures).T
    importance_df = pd.DataFrame(index=train_gene_index, data=importance_matrix, columns=range(num_rf_predictors))
    importance_df_list = []
    mean_importance = importance_df.mean(axis=1)
    if (induction_flag):
        top_influence_genes = train_gene_index[np.argsort(mean_importance)[::-1][:5]]
    else:
        top_influence_genes = train_gene_index[np.argsort(mean_importance)[:5]]
    importance_df_list.append(mean_importance)
    data_mean = ts_df.T[top_influence_genes].mean()
    data_std = ts_df.T[top_influence_genes].std()
    regr = RandomForestRegressor(random_state=42, warm_start=True, n_estimators=100, n_jobs=20)
    ts_train_X = ts_df[ts_exp_index_source].T[top_influence_genes]
    regr = regr.fit(ts_train_X, ts_train_y)

    base_prediction = regr.predict(np.array(data_mean).reshape(1,-1))[0]
    y_std = ts_df.T.std()[target_gene]
    perturbation_list = list(choose_2_3(top_influence_genes))

    perturbation_result_list = []
    perturbation_list_names = ['; '.join(perturbation_genes) for perturbation_genes in perturbation_list]
    for perturbation_genes in perturbation_list:
        perturbation_input = data_mean.copy()
        for gene in perturbation_genes:
            perturbation_input[gene] += data_std[gene] * perturbation_factor
        perturbation_prediction = regr.predict(np.array(perturbation_input).reshape(1,-1))[0]
        perturbation_measure = (perturbation_prediction - base_prediction)/y_std
        perturbation_result_list.append(perturbation_measure)
    if (induction_flag):
        result_list.append(np.array(perturbation_list_names)[np.argsort(perturbation_result_list)[::-1][:5]])
        result_measure_list.append(np.array(perturbation_result_list)[np.argsort(perturbation_result_list)[::-1][:5]])
    else:
        result_list.append(np.array(perturbation_list_names)[np.argsort(perturbation_result_list)[:5]])
        result_measure_list.append(np.array(perturbation_result_list)[np.argsort(perturbation_result_list)[:5]])
        
    del importance_df_list
    del importance_df
    del shared_memory_list
    

  0%|          | 0/775 [00:00<?, ?it/s]


In [18]:
for shm in shared_memory_list:
        shm.close()
        shm.unlink()

In [69]:
result_measure_list = np.array(result_measure_list)
result_list = np.array(result_list)
out_df = pd.DataFrame()
out_df.index = target_genes[:50]
for i in range(5):
    comb_name = 'top_{}_combination'.format(i+1)
    score_name = 'top_{}_score'.format(i+1)
    out_df[comb_name] = result_list[:,i]
    out_df[score_name] = result_measure_list[:,i]

In [70]:
if (induction_flag):
    out_df.to_csv('output/wrky_presentation_comb_pos.csv')
else:
    out_df.to_csv('output/wrky_presentation_comb_neg.csv')

In [25]:
res_df = pd.read_csv('./output/rf_wrky_inf_pval_neg.csv', index_col=0)
# res_df

In [26]:
pymc_res_df = pd.read_csv('./output/neg_res_new.csv', index_col=0)

In [27]:
tf_df.index = tf_df['Gene']
tf_df

Unnamed: 0_level_0,Gene,Symbol
Gene,Unnamed: 1_level_1,Unnamed: 2_level_1
AT2G04880,AT2G04880,WRKY1
AT1G55600,AT1G55600,WRKY10
AT4G31550,AT4G31550,WRKY11
AT2G44745,AT2G44745,WRKY12
AT4G39410,AT4G39410,WRKY13
...,...,...
AT5G28650,AT5G28650,WRKY74
AT5G13080,AT5G13080,WRKY75
AT5G46350,AT5G46350,WRKY8
AT1G68150,AT1G68150,WRKY9


In [28]:
pymc_res_df.index = tf_df.loc[pymc_res_df.index]['Symbol']

In [30]:
pymc_res_df = pymc_res_df.loc[res_df.columns]

In [32]:
pymc_res_df = pymc_res_df.T

In [34]:
common_index = pd.Index(set(res_df.index).intersection(set(pymc_res_df.index)))

In [35]:
pymc_res_df = pymc_res_df.loc[common_index]
pymc_res_df

Unnamed: 0,WRKY3,WRKY63,WRKY41,WRKY69,WRKY56,WRKY17,WRKY70,WRKY13,WRKY34,WRKY50,...,WRKY22,WRKY38,WRKY35,WRKY26,WRKY6,WRKY68,WRKY43,WRKY66,WRKY61,WRKY25
AT1G16640,0.5655,0.4535,0.4545,0.4775,0.4520,0.5435,0.5665,0.4455,0.4440,0.4515,...,0.4775,0.5140,0.4570,0.5390,0.5405,0.4295,0.4545,0.4475,0.5140,0.5850
AT1G20780,0.4685,0.5095,0.5295,0.5260,0.5490,0.4685,0.4590,0.5570,0.5640,0.4935,...,0.5335,0.4515,0.5355,0.4965,0.4575,0.5400,0.5295,0.5425,0.4795,0.4385
AT1G69900,0.6015,0.5250,0.4230,0.5110,0.5035,0.4945,0.4805,0.4980,0.4755,0.4665,...,0.4550,0.4715,0.5095,0.4685,0.4600,0.4940,0.5110,0.4860,0.5150,0.5570
AT5G01100,0.5945,0.4560,0.3925,0.5155,0.4600,0.4965,0.5275,0.4865,0.4785,0.4585,...,0.4890,0.4475,0.4870,0.5065,0.4635,0.4595,0.4815,0.4775,0.4775,0.5580
AT1G30755,0.5965,0.5395,0.4945,0.5720,0.5220,0.4920,0.5150,0.5205,0.5460,0.4850,...,0.5370,0.4640,0.5190,0.5390,0.5000,0.5205,0.5340,0.5330,0.5235,0.5330
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AT1G02170,0.5075,0.5070,0.5195,0.5395,0.5335,0.4440,0.4480,0.5745,0.5410,0.5070,...,0.5290,0.4440,0.5250,0.5160,0.4945,0.5385,0.5230,0.5580,0.4805,0.4495
AT4G34450,0.4760,0.4895,0.5060,0.5135,0.5510,0.4540,0.4730,0.5255,0.5485,0.5175,...,0.5395,0.4540,0.5315,0.4770,0.4960,0.5315,0.5335,0.5460,0.4855,0.4110
AT4G40020,0.5810,0.4640,0.4560,0.5415,0.4820,0.4845,0.4675,0.5070,0.5055,0.4495,...,0.4915,0.4545,0.4920,0.5195,0.4870,0.4960,0.4850,0.4740,0.5100,0.5590
AT4G15120,0.5200,0.4705,0.4290,0.4795,0.4580,0.5295,0.5235,0.4675,0.4410,0.4795,...,0.4805,0.5100,0.4820,0.5130,0.4795,0.4595,0.4375,0.4625,0.4780,0.5385


In [36]:
(1-pymc_res_df).to_csv('./output/sample_pymc_res_neg.csv')

In [37]:
res_df.loc[common_index].to_csv('./output/sample_rf_res_neg.csv')

In [39]:
comb_df = (pd.concat([1-pymc_res_df,res_df.loc[common_index]])
   .stack()
   .groupby(level=[0,1])
   .apply(tuple)
   .unstack()
 ).loc[common_index]

In [41]:
comb_df

Unnamed: 0,WRKY3,WRKY63,WRKY41,WRKY69,WRKY56,WRKY17,WRKY70,WRKY13,WRKY34,WRKY50,...,WRKY22,WRKY38,WRKY35,WRKY26,WRKY6,WRKY68,WRKY43,WRKY66,WRKY61,WRKY25
AT1G16640,"(0.4345, 1.0)","(0.5465, 0.0)","(0.5455, 1.0)","(0.5225, 1.0)","(0.548, 1.0)","(0.4565, 1.968422992040913e-24)","(0.4335, 1.0)","(0.5545, 1.2066171288648885e-194)","(0.556, 0.0316863484772732)","(0.5485, 1.0)",...,"(0.5225, 0.0100157988203473)","(0.486, 0.0)","(0.5429999999999999, 1.0)","(0.46099999999999997, 4.353796113976852e-188)","(0.4595, 1.0)","(0.5705, 1.0)","(0.5455, 0.0133868611916385)","(0.5525, 4.835034971585914e-297)","(0.486, 3.0959145041624355e-31)","(0.41500000000000004, 1.0)"
AT1G20780,"(0.5315, 1.0)","(0.49050000000000005, 0.0)","(0.47050000000000003, 1.0)","(0.474, 1.4232622709374354e-31)","(0.45099999999999996, 1.0)","(0.5315, 1.1514923547674261e-120)","(0.5409999999999999, 1.0)","(0.44299999999999995, 8.617123111489592e-119)","(0.43600000000000005, 1.0)","(0.5065, 4.967361026616519e-188)",...,"(0.4665, 0.0019752725495346)","(0.5485, 1.0)","(0.4645, 0.004390093526085)","(0.5035000000000001, 3.6319061154555894e-111)","(0.5425, 1.0)","(0.45999999999999996, 0.0002408260645258)","(0.47050000000000003, 1.7254622172952151e-99)","(0.4575, 1.0)","(0.5205, 2.339995051052668e-218)","(0.5615, 1.0)"
AT1G69900,"(0.39849999999999997, 5.666476020127022e-75)","(0.475, 0.0)","(0.577, 1.0)","(0.489, 1.7017973822994658e-60)","(0.49650000000000005, 1.0)","(0.5055000000000001, 1.3607271643569615e-125)","(0.5195000000000001, 0.0112809261902779)","(0.502, 8.619824428606513e-295)","(0.5245, 0.3877377587570911)","(0.5335, 5.003739669245092e-78)",...,"(0.5449999999999999, 2.5991230422009358e-180)","(0.5285, 1.0)","(0.49050000000000005, 1.0)","(0.5315, 1.640737456350532e-38)","(0.54, 1.0)","(0.506, 1.0)","(0.489, 1.0)","(0.514, 1.0)","(0.485, 3.6525376403568327e-172)","(0.44299999999999995, 1.0)"
AT5G01100,"(0.40549999999999997, 1.0)","(0.544, 1.0)","(0.6074999999999999, 1.0)","(0.48450000000000004, 1.0)","(0.54, 3.19119127126836e-34)","(0.5035000000000001, 1.0)","(0.47250000000000003, 3.203009442302055e-52)","(0.5135000000000001, 1.0)","(0.5215000000000001, 1.0)","(0.5415, 6.0307835500155086e-33)",...,"(0.511, 7.1076407744191115e-217)","(0.5525, 1.0)","(0.513, 1.0)","(0.49350000000000005, 3.0083071359968744e-63)","(0.5365, 9.054293873593482e-53)","(0.5405, 1.0)","(0.5185, 4.399698820809598e-49)","(0.5225, 1.0)","(0.5225, 0.0928323758564177)","(0.44199999999999995, 7.417112129500498e-233)"
AT1G30755,"(0.40349999999999997, 1.0)","(0.4605, 0.0)","(0.5055000000000001, 1.0)","(0.42800000000000005, 1.0)","(0.478, 1.0)","(0.508, 5.041322067459514e-93)","(0.485, 1.0)","(0.47950000000000004, 1.7002270916175226e-192)","(0.45399999999999996, 1.0)","(0.515, 6.528227965260684e-18)",...,"(0.46299999999999997, 1.0)","(0.536, 1.0)","(0.481, 1.0)","(0.46099999999999997, 1.0)","(0.5, 2.3044879540297103e-57)","(0.47950000000000004, 7.405647115107859e-65)","(0.46599999999999997, 9.359840629019238e-21)","(0.46699999999999997, 2.667824734026501e-10)","(0.47650000000000003, 1.0)","(0.46699999999999997, 0.0376632860320095)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AT1G02170,"(0.49250000000000005, 1.0)","(0.493, 6.45969858037125e-196)","(0.48050000000000004, 1.0301536367868753e-61)","(0.4605, 4.118494340173989e-05)","(0.4665, 1.0)","(0.556, 1.760594829639661e-192)","(0.552, 1.0)","(0.4255, 3.7252314462684806e-11)","(0.45899999999999996, 1.0)","(0.493, 3.92338875713081e-118)",...,"(0.471, 1.0)","(0.556, 1.0)","(0.475, 2.80558555100988e-67)","(0.484, 5.4621115482439365e-133)","(0.5055000000000001, 6.596491215411753e-105)","(0.4615, 1.0)","(0.477, 1.0)","(0.44199999999999995, 0.3828944354881017)","(0.5195000000000001, 4.558515094505175e-264)","(0.5505, 1.0)"
AT4G34450,"(0.524, 1.0)","(0.5105, 0.0)","(0.494, 1.0)","(0.48650000000000004, 1.0)","(0.44899999999999995, 1.0)","(0.546, 1.0)","(0.527, 1.0)","(0.47450000000000003, 5.898826861044408e-37)","(0.4515, 1.0)","(0.48250000000000004, 2.4989496895628688e-59)",...,"(0.4605, 7.821549037479034e-11)","(0.546, 1.0)","(0.4685, 8.032262145598629e-33)","(0.523, 1.2618594499545455e-27)","(0.504, 1.0)","(0.4685, 1.1965484709176402e-27)","(0.4665, 1.0)","(0.45399999999999996, 1.0)","(0.5145, 7.829467862715642e-262)","(0.589, 1.0)"
AT4G40020,"(0.41900000000000004, 1.0)","(0.536, 1.0365248045699916e-197)","(0.544, 3.8231396205789857e-60)","(0.4585, 1.3798927945461091e-77)","(0.518, 2.062849492495344e-25)","(0.5155000000000001, 1.0443953917515559e-192)","(0.5325, 1.0)","(0.493, 2.385710734314881e-117)","(0.49450000000000005, 1.1181072046865712e-94)","(0.5505, 2.2020140507878618e-42)",...,"(0.5085, 1.0)","(0.5455, 1.0)","(0.508, 1.0)","(0.48050000000000004, 3.5946211249562587e-125)","(0.513, 2.784104562573559e-75)","(0.504, 0.0039058482031807)","(0.515, 1.0)","(0.526, 8.784909782292935e-33)","(0.49, 1.0)","(0.44099999999999995, 3.0232397730761566e-41)"
AT4G15120,"(0.48, 1.0)","(0.5295000000000001, 6.700753082274445e-118)","(0.571, 1.0)","(0.5205, 1.0)","(0.542, 1.0)","(0.47050000000000003, 3.284419854087836e-204)","(0.47650000000000003, 1.0)","(0.5325, 1.1209499816213765e-29)","(0.5589999999999999, 1.0)","(0.5205, 4.647101851532603e-58)",...,"(0.5195000000000001, 0.5809340909350249)","(0.49, 1.0)","(0.518, 1.0)","(0.487, 2.662290187158467e-118)","(0.5205, 5.188803743066909e-26)","(0.5405, 1.0)","(0.5625, 1.0)","(0.5375, 1.0)","(0.522, 1.4286215588627946e-280)","(0.4615, 1.0)"


In [42]:
comb_df.to_csv('./output/sample_comb_res_neg.csv')

In [16]:
rf_out_df = pd.read_csv('./output/rf_wrky_inf_pval_neg.csv', index_col=0)

In [18]:
comb_df = (pd.concat([out_df,rf_out_df.loc[out_df.index]])
   .stack()
   .groupby(level=[0,1])
   .apply(tuple)
   .unstack()
 )
comb_df.to_csv('./output/sample_xb_rf_comb_neg.csv')

In [26]:
agree_count = 0
disagree_count = 0
for row in comb_df.values:
    for xb_p, rf_p in row:
        if (xb_p < 0.05 and rf_p < 0.05):
            agree_count += 1
        if (xb_p - 0.05) * (rf_p - 0.05) < 0:
            disagree_count += 1

In [27]:
agree_count

10924

In [28]:
disagree_count

18144

In [30]:
comb_df.values.shape

(639, 72)

In [31]:
639*72

46008