In [48]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from functools import partial
from multiprocessing import shared_memory
from multiprocessing.dummy import Pool
from sklearn.ensemble import RandomForestRegressor
import multiprocessing as mp
from itertools import chain, combinations
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from scipy.stats import wilcoxon
from scipy.stats import ttest_rel
import sys
import os

import mp_run

import concurrent.futures
os.environ["OMP_NUM_THREADS"] = "1" # export OMP_NUM_THREADS=1
os.environ["OPENBLAS_NUM_THREADS"] = "1" # export OPENBLAS_NUM_THREADS=1
os.environ["MKL_NUM_THREADS"] = "1" # export MKL_NUM_THREADS=1
os.environ["VECLIB_MAXIMUM_THREADS"] = "1" # export VECLIB_MAXIMUM_THREADS=1
os.environ["NUMEXPR_NUM_THREADS"] = "1" # export NUMEXPR_NUM_THREADS=1

In [49]:
perturbation_factor = 3
num_rf_predictors = 500

target_tf = 'AT2G46680'

induction_flag = 0
mp_threads = 20
# if (len(sys.argv)>=3):
#     induction_flag = bool(sys.argv[1])
#     mp_threads = int(sys.argv[2])

In [50]:
tf_df = pd.read_csv('data/wrky_regulators.csv')
tf_list = tf_df['Gene']
scaler = StandardScaler()

In [51]:
target_df = pd.read_csv('data/wrky_targets_neg.csv')
if (induction_flag):
    target_df = pd.read_csv('data/wrky_targets_pos.csv')
deg_genes = target_df['Gene']

In [52]:
ts_df_raw = pd.read_csv('data/GSE97500/expression.tsv', sep='\t', index_col=0)
ts_df = pd.DataFrame(scaler.fit_transform(ts_df_raw), columns=ts_df_raw.columns)
ts_df.index = ts_df_raw.index

c = pd.Series(list(set(deg_genes).intersection(set(ts_df.index))))
tf_list = pd.Series(list(set(tf_list).intersection(set(ts_df.index))))
meta_df = pd.read_csv('data/GSE97500/meta_data.tsv', sep='\t')
ts_exp_index = meta_df[meta_df['isTs']]
ts_exp_index_target =  ts_exp_index[ts_exp_index['is1stLast'] == 'm'].condName
ts_exp_index_source =  ts_exp_index[ts_exp_index['is1stLast'] == 'm'].prevCol

In [53]:
ts_exp_index_target_val = ts_exp_index[ts_exp_index['is1stLast'] == 'l'].condName
ts_exp_index_source_val =  ts_exp_index[ts_exp_index['is1stLast'] == 'l'].prevCol

In [54]:
target_genes = set(deg_genes).intersection(set(ts_df.index))

In [55]:
non_trivial_targets = []
for target in target_genes:
    if ts_df.loc[target].mean() != 0.0:
        non_trivial_targets.append(target)


In [56]:
target_genes = pd.Series(non_trivial_targets)

In [57]:
ts_train_y_list = ts_df[ts_exp_index_target]
# set training target to expression level change
ts_train_y_list.iloc[:, :] = ts_df[ts_exp_index_target].values - ts_df[ts_exp_index_source].values
ts_val_y_list = ts_df[ts_exp_index_target_val]
# set training target to expression level change
ts_val_y_list.iloc[:, :] = ts_df[ts_exp_index_target_val].values - ts_df[ts_exp_index_source_val].values

In [58]:
ts_y_all = ts_train_y_list.join(ts_val_y_list)

In [59]:
ts_y_all.loc[target_genes].to_csv('./data/GSE97500/exp_neg_target.csv')

In [60]:
ts_X_all = ts_df[ts_exp_index_source].join(ts_df[ts_exp_index_source_val])

In [61]:
ts_X_all.loc[tf_list].to_csv('./data/GSE97500/exp_tf.csv')

In [64]:
ts_X_all.loc[tf_list]

Unnamed: 0,R5C-1,R10C-1,R15C-1,R20C-1,R30C-1,R45C-1,R60C-1,R5N-1,R10N-1,R15N-1,...,S90C-1,S90N-1,R90C-2,R90N-2,S90C-2,S90N-2,R90C-3,R90N-3,S90C-3,S90N-3
AT1G62300,0.041495,0.140289,0.198817,0.108456,0.103839,0.107883,0.090138,0.048606,0.073338,0.102298,...,0.249494,0.218433,0.101548,0.112832,-0.043584,0.242764,0.078629,0.120241,0.149937,0.108030
AT5G64810,0.037443,-0.014327,0.011878,0.062392,0.062263,0.043908,0.023219,0.059613,-0.007364,0.006123,...,-0.067390,-0.080437,0.065583,0.059805,-0.043584,-0.057019,0.051128,0.063191,-0.082242,-0.071314
AT2G40750,0.003219,-0.001598,0.054874,0.092461,0.066242,0.046205,0.021562,0.042414,0.021449,0.093206,...,0.026662,0.005530,0.073512,0.222483,-0.043584,0.089503,0.088137,0.198908,0.027290,0.042931
AT4G31550,0.022627,0.066294,0.103925,0.050893,0.046266,0.056082,0.059062,0.045646,0.090995,0.073657,...,0.396826,0.056474,0.050712,0.045586,-0.043584,0.191163,0.074038,0.072464,0.355887,0.132989
AT4G11070,-0.023822,-0.024585,-0.031026,-0.038798,-0.038266,-0.038249,-0.031084,-0.028372,-0.017912,-0.021650,...,-0.136634,-0.088928,-0.040944,-0.048491,-0.043584,-0.097837,-0.050020,-0.053787,-0.125472,-0.088553
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AT4G23810,-0.024092,-0.017023,-0.003320,-0.029050,-0.028034,-0.031799,-0.027101,-0.024259,-0.006261,0.005999,...,0.069804,0.073031,-0.032712,-0.017555,-0.043584,0.150923,-0.034323,-0.030635,0.039676,0.053223
AT4G01720,-0.033346,-0.011164,-0.015369,-0.030595,-0.028643,-0.025026,-0.022014,-0.031740,-0.017373,-0.020080,...,0.010483,-0.064305,-0.034798,-0.040197,-0.043584,-0.079353,-0.039078,-0.035495,0.050605,-0.053817
AT1G68150,-0.011957,-0.019849,-0.028671,-0.013981,-0.021579,-0.014102,-0.010096,-0.011376,-0.018655,-0.021940,...,-0.140732,-0.106970,-0.017309,-0.009007,-0.043584,-0.127103,-0.008503,-0.003517,-0.136887,-0.105793
AT4G23550,-0.009322,0.004635,0.016435,0.014646,-0.006881,0.005369,0.023335,-0.000325,-0.002888,0.003148,...,-0.099315,-0.065154,0.018086,-0.015820,-0.043584,-0.047585,0.009530,0.012792,-0.097543,-0.055361


In [65]:
ts_y_all.loc[target_genes]

Unnamed: 0,R10C-1,R15C-1,R20C-1,R30C-1,R45C-1,R60C-1,R90C-1,R10N-1,R15N-1,R20N-1,...,S120C-1,S120N-1,R120C-2,R120N-2,S120C-2,S120N-2,R120C-3,R120N-3,S120C-3,S120N-3
AT5G35735,0.101583,0.124044,-0.126464,0.044389,-0.029002,-0.040702,0.155547,0.057403,0.018137,-0.065303,...,0.412851,0.318974,0.272290,-0.007449,0.696040,0.382889,0.017595,-0.048461,0.096246,0.590367
AT4G35985,0.004671,0.009205,-0.008343,-0.010361,0.011093,-0.006042,0.003490,0.006530,-0.004214,0.001947,...,-0.002378,-0.004434,0.001470,-0.000508,-0.085347,-0.000109,-0.011375,-0.015390,-0.004485,-0.033204
AT5G63020,0.000492,-0.013490,0.000173,-0.006058,0.009068,0.006627,-0.018862,0.007605,-0.005841,-0.005930,...,0.016726,0.002256,-0.001732,-0.001603,0.033298,-0.008216,-0.001139,0.004900,-0.001550,-0.016911
AT3G19580,0.031846,0.023270,-0.022770,-0.017714,0.010041,-0.006045,0.031538,-0.003632,0.015827,-0.017780,...,-0.163391,0.009629,0.041810,-0.001259,0.033922,0.009777,0.010532,-0.008807,0.012442,0.018263
AT1G80520,0.007680,-0.012993,-0.010333,0.002785,0.001147,0.012328,-0.018985,0.013249,-0.003782,-0.013335,...,0.004094,0.003123,-0.004077,-0.000724,-0.077229,0.011128,-0.004046,0.006943,-0.003660,-0.036059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AT5G64300,-0.027648,0.003498,0.031473,-0.000624,-0.000835,-0.012472,0.014663,-0.017283,-0.000904,0.025107,...,0.035757,0.107544,0.018589,-0.011659,0.207933,0.055115,-0.002639,-0.018355,0.020925,0.237776
AT5G12480,-0.068601,0.001562,0.101984,0.003752,0.002063,-0.050144,0.045955,-0.087801,-0.003325,0.093718,...,0.027775,-0.011956,-0.000245,-0.029493,0.246441,-0.058529,0.002543,-0.005176,0.021038,0.081227
AT3G26115,0.002184,-0.014399,-0.006229,0.001453,0.000559,0.013131,-0.020351,0.011400,-0.005333,-0.010157,...,0.003657,-0.009945,-0.003939,0.000406,-0.081392,0.004300,-0.000031,0.007401,-0.001855,-0.035209
AT1G23700,0.004896,-0.014839,-0.008903,-0.000343,0.002011,0.013426,-0.021858,0.013395,-0.005230,-0.013400,...,0.010795,-0.006156,-0.003001,-0.002247,-0.103664,0.003674,-0.002060,0.008976,-0.001155,-0.042578
