In [11]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from functools import partial
from multiprocessing import shared_memory
from multiprocessing.dummy import Pool
from sklearn.ensemble import RandomForestRegressor
import multiprocessing as mp
from itertools import chain, combinations
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from scipy.stats import wilcoxon
from scipy.stats import ttest_rel

from sklearn.preprocessing import Normalizer
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb

import sys
import os

import mp_run

import concurrent.futures
os.environ["OMP_NUM_THREADS"] = "1" # export OMP_NUM_THREADS=1
os.environ["OPENBLAS_NUM_THREADS"] = "1" # export OPENBLAS_NUM_THREADS=1
os.environ["MKL_NUM_THREADS"] = "1" # export MKL_NUM_THREADS=1
os.environ["VECLIB_MAXIMUM_THREADS"] = "1" # export VECLIB_MAXIMUM_THREADS=1
os.environ["NUMEXPR_NUM_THREADS"] = "1" # export NUMEXPR_NUM_THREADS=1

In [27]:
nof_iters = 10000

def pairedtest(n1, n2, nof_iters):
    import random
   
    ndiff = sum([ n1[i]-n2[i] for i in range(len(n1)) ])
    if ndiff < 0:
         nbig = n2
         nsmall = n1
         ndiff = -ndiff
    else:
         nbig = n1
         nsmall = n2
    
    bcount = 0
    for niter in range(nof_iters):
        tdiff = 0
        for i in range(len(nbig)):
            a = random.random()
            if a <0.5:
                tdiff += nsmall[i]-nbig[i]
            else:
                tdiff += nbig[i]-nsmall[i]
        if tdiff > ndiff:
            bcount += 1
    return bcount / nof_iters

In [12]:
perturbation_factor = 3
num_rf_predictors = 500

target_tf = 'AT2G46680'

induction_flag = 0
mp_threads = 20
# if (len(sys.argv)>=3):
#     induction_flag = bool(sys.argv[1])
#     mp_threads = int(sys.argv[2])

In [13]:
tf_df = pd.read_csv('data/wrky_regulators.csv')
tf_list = tf_df['Gene']
scaler = StandardScaler()

In [14]:
def choose_2_3(iterable):
    "powerset([1,2,3]) -->  (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(2,4))

In [15]:
target_df = pd.read_csv('data/wrky_targets_neg.csv')
if (induction_flag):
    target_df = pd.read_csv('data/wrky_targets_pos.csv')
deg_genes = target_df['Gene']

In [16]:
ts_df_raw = pd.read_csv('data/GSE97500/expression.tsv', sep='\t', index_col=0)
ts_df = pd.DataFrame(scaler.fit_transform(ts_df_raw), columns=ts_df_raw.columns)
ts_df.index = ts_df_raw.index

c = pd.Series(list(set(deg_genes).intersection(set(ts_df.index))))
tf_list = pd.Series(list(set(tf_list).intersection(set(ts_df.index))))
meta_df = pd.read_csv('data/GSE97500/meta_data.tsv', sep='\t')
ts_exp_index = meta_df[meta_df['isTs']]
ts_exp_index_target =  ts_exp_index[ts_exp_index['is1stLast'] == 'm'].condName
ts_exp_index_source =  ts_exp_index[ts_exp_index['is1stLast'] == 'm'].prevCol

In [17]:
ts_exp_index_target_val = ts_exp_index[ts_exp_index['is1stLast'] == 'l'].condName
ts_exp_index_source_val =  ts_exp_index[ts_exp_index['is1stLast'] == 'l'].prevCol

In [18]:
target_genes = set(deg_genes).intersection(set(ts_df.index))

In [19]:
non_trivial_targets = []
for target in target_genes:
    if ts_df.loc[target].mean() != 0.0:
        non_trivial_targets.append(target)
target_genes = pd.Series(non_trivial_targets)


In [20]:
# efron trick #1
regr = RandomForestRegressor(random_state=0)
# regr = xgb.XGBRegressor()

ts_train_y_list = ts_df[ts_exp_index_target]
# set training target to expression level change
ts_train_y_list.iloc[:, :] = ts_df[ts_exp_index_target].values - ts_df[ts_exp_index_source].values
ts_val_y_list = ts_df[ts_exp_index_target_val]
# set training target to expression level change
ts_val_y_list.iloc[:, :] = ts_df[ts_exp_index_target_val].values - ts_df[ts_exp_index_source_val].values

ts_val_naive_y_list = ts_df[ts_exp_index_source_val]
ts_val_naive_y_list.loc[:,:] = 0


result_list = []
result_measure_list = []
mse_list = []
naive_mse_list = []
bottom_mse_list = []
top_feature_size_list = []


for target_gene in tqdm(target_genes):
    train_gene_index = tf_list[tf_list != target_gene]
    ts_val_y = ts_val_y_list.loc[target_gene]
    old_mse = sys.maxsize
    old_mse = np.random.randn(len(ts_val_y)) + sys.maxsize
    top_influence_genes = train_gene_index
    data_mean = ts_df.T[top_influence_genes].mean()
    data_std = ts_df.T[top_influence_genes].std()
    ts_train_X = ts_df[ts_exp_index_source].T[top_influence_genes]
    ts_val_X = ts_df[ts_exp_index_source_val].T[top_influence_genes]
    ts_train_y = ts_train_y_list.loc[target_gene]
    ts_val_y = ts_val_y_list.loc[target_gene]
    ts_val_naive_y = ts_val_naive_y_list.loc[target_gene]
    regr = RandomForestRegressor(random_state=42, warm_start=True, n_estimators=300, n_jobs=20)
    regr = regr.fit(ts_train_X, ts_train_y)
    # cur_mse = mean_squared_error(ts_val_y,regr.predict(ts_val_X))
    cur_mse = np.square(ts_val_y - regr.predict(ts_val_X))
    old_mse = cur_mse
    first_mse = cur_mse
    top_half = np.argsort(regr.feature_importances_)[int(top_influence_genes.size/2):]
    old_top_influence_genes = top_influence_genes
    top_influence_genes = top_influence_genes.iloc[top_half]
    if (target_tf not in top_influence_genes.values):
        top_influence_genes = top_influence_genes.append(tf_list[tf_list == target_tf])
    old_data_mean = data_mean
    old_data_std = data_std
    old_regr = regr
    old_ts_val_X = ts_val_X


    while(True):
        data_mean = ts_df.T[top_influence_genes].mean()
        data_std = ts_df.T[top_influence_genes].std()
        regr = RandomForestRegressor(random_state=42, warm_start=True, n_estimators=300, n_jobs=20)
        ts_train_X = ts_df[ts_exp_index_source].T[top_influence_genes]
        ts_val_X = ts_df[ts_exp_index_source_val].T[top_influence_genes]
        ts_train_y = ts_train_y_list.loc[target_gene]
        ts_val_y = ts_val_y_list.loc[target_gene]
        ts_val_naive_y = ts_val_naive_y_list.loc[target_gene]
        regr = regr.fit(ts_train_X, ts_train_y)
        # cur_mse = mean_squared_error(ts_val_y,regr.predict(ts_val_X))
        cur_mse = np.square(ts_val_y - regr.predict(ts_val_X))
        p = pairedtest(cur_mse, first_mse, 10000)
        if (p < 0.05 and np.mean(cur_mse) > np.mean(first_mse)) or len(top_influence_genes) < 3:
            data_mean = old_data_mean
            data_std = old_data_std
            regr = old_regr
            ts_val_X = old_ts_val_X
            top_influence_genes = old_top_influence_genes
            break
        else:
            old_mse = cur_mse
            top_half = np.argsort(regr.feature_importances_)[int(top_influence_genes.size/2):]
            old_top_influence_genes = top_influence_genes
            top_influence_genes = top_influence_genes.iloc[top_half]
            if (target_tf not in top_influence_genes.values):
                top_influence_genes = top_influence_genes.append(tf_list[tf_list == target_tf])
            # if (len(old_top_influence_genes) == len(top_influence_genes)):
            if (len(old_top_influence_genes) == len(top_influence_genes) or len(top_influence_genes) < 3):
                data_mean = old_data_mean
                data_std = old_data_std
                regr = old_regr
                ts_val_X = old_ts_val_X
                top_influence_genes = old_top_influence_genes
                break

            old_data_mean = data_mean
            old_data_std = data_std
            old_regr = regr
            old_ts_val_X = ts_val_X

            
    top_feature_size_list.append(top_influence_genes.size)
    if (top_influence_genes.size < tf_list.size):
        bottom_influence_genes = list(set(tf_list) - (set(top_influence_genes)))
        bottom_regr = RandomForestRegressor(random_state=42, warm_start=True, n_estimators=300, n_jobs=20)
        ts_train_X_comp = ts_df[ts_exp_index_source].T[bottom_influence_genes]
        ts_val_X_comp = ts_df[ts_exp_index_source_val].T[bottom_influence_genes]
        ts_train_y = ts_train_y_list.loc[target_gene]
        ts_val_y = ts_val_y_list.loc[target_gene]
        bottom_regr = bottom_regr.fit(ts_train_X_comp, ts_train_y)
        bottom_mse_list.append(mean_squared_error(ts_val_y,bottom_regr.predict(ts_val_X_comp)))
    else:
        bottom_mse_list.append(0)

    

    input_mean = data_mean
    # input_std = ts_train_X.std()
    naive_mse_list.append(mean_squared_error(ts_val_y,ts_val_naive_y))
    mse_list.append(mean_squared_error(ts_val_y,regr.predict(ts_val_X)))

    base_prediction = regr.predict(np.array(input_mean).reshape(1,-1))[0]
    y_std = ts_df.T.std()[target_gene]
    perturbation_list = list(choose_2_3(top_influence_genes))
    # perturbation_list = [set(perturbation).union(set([target_tf])) for perturbation in perturbation_list]
    # perturbation_list.insert(0, set([target_tff]))
    
    perturbation_input_list = []
    for perturbation_genes in perturbation_list:
        perturbation_input = data_mean.copy()
        for gene in perturbation_genes:
            perturbation_input[gene] += data_std[gene] * perturbation_factor
        perturbation_input_list.append(perturbation_input)
    
    perturbation_list_names = ['; '.join(perturbation_genes) for perturbation_genes in perturbation_list]
    perturbation_input = np.array(perturbation_input_list)
    perturbation_result_list = (regr.predict(perturbation_input) - base_prediction)/y_std
    if (perturbation_result_list[0] > 0):
        result_list.append(np.array(perturbation_list_names)[np.argsort(perturbation_result_list)[::-1][:5]])
        result_measure_list.append(np.array(perturbation_result_list)[np.argsort(perturbation_result_list)[::-1][:5]])
    else:
        result_list.append(np.array(perturbation_list_names)[np.argsort(perturbation_result_list)[:5]])
        result_measure_list.append(np.array(perturbation_result_list)[np.argsort(perturbation_result_list)[:5]])
   
    result_list.append([target_tf])
    result_measure_list.append(perturbation_result_list[0])


100%|██████████| 642/642 [57:42<00:00,  5.39s/it] 


In [71]:
# efron trick #2
regr = RandomForestRegressor(random_state=0)
# regr = xgb.XGBRegressor()

ts_train_y_list = ts_df[ts_exp_index_target]
# set training target to expression level change
ts_train_y_list.iloc[:, :] = ts_df[ts_exp_index_target].values - ts_df[ts_exp_index_source].values
ts_val_y_list = ts_df[ts_exp_index_target_val]
# set training target to expression level change
ts_val_y_list.iloc[:, :] = ts_df[ts_exp_index_target_val].values - ts_df[ts_exp_index_source_val].values

ts_val_naive_y_list = ts_df[ts_exp_index_source_val]
ts_val_naive_y_list.loc[:,:] = 0


result_list = []
result_measure_list = []
mse_list = []
naive_mse_list = []
bottom_mse_list = []
top_feature_size_list = []


for target_gene in tqdm(target_genes):
    old_mse = cur_mse
    first_mse = cur_mse
    outer_loop_flag = True
    feature_pool = train_gene_index
    selected_features = pd.Series([])
    while (outer_loop_flag):
        top_influence_genes = feature_pool
        ts_train_X = ts_df[ts_exp_index_source].T[top_influence_genes]
        ts_val_X = ts_df[ts_exp_index_source_val].T[top_influence_genes]
        regr = regr.fit(ts_train_X, ts_train_y)
        cur_mse = np.square(ts_val_y - regr.predict(ts_val_X))
        p = pairedtest(cur_mse, first_mse, 10000)
        if (p < 0.05 and np.mean(cur_mse) > np.mean(first_mse)) or len(top_influence_genes) < 3:
            outer_loop_flag = False
        top_half = np.argsort(regr.feature_importances_)[int(top_influence_genes.size/2):]
        old_top_influence_genes = top_influence_genes
        top_influence_genes = top_influence_genes.iloc[top_half]
        old_data_mean = data_mean
        old_data_std = data_std
        old_regr = regr
        old_ts_val_X = ts_val_X

        inner_loop_flag = True

        while(inner_loop_flag):
            data_mean = ts_df.T[top_influence_genes].mean()
            data_std = ts_df.T[top_influence_genes].std()
            regr = RandomForestRegressor(random_state=42, warm_start=False, n_estimators=300, n_jobs=20)
            ts_train_X = ts_df[ts_exp_index_source].T[top_influence_genes]
            ts_val_X = ts_df[ts_exp_index_source_val].T[top_influence_genes]
            ts_train_y = ts_train_y_list.loc[target_gene]
            ts_val_y = ts_val_y_list.loc[target_gene]
            ts_val_naive_y = ts_val_naive_y_list.loc[target_gene]
            regr = regr.fit(ts_train_X, ts_train_y)
            # cur_mse = mean_squared_error(ts_val_y,regr.predict(ts_val_X))
            cur_mse = np.square(ts_val_y - regr.predict(ts_val_X))
            p = pairedtest(cur_mse, first_mse, 10000)
            if (p < 0.05 and np.mean(cur_mse) > np.mean(first_mse)) or len(top_influence_genes) < 3:
                data_mean = old_data_mean
                data_std = old_data_std
                regr = old_regr
                ts_val_X = old_ts_val_X
                top_influence_genes = old_top_influence_genes
                inner_loop_flag = False
            else:
                old_mse = cur_mse
                top_half = np.argsort(regr.feature_importances_)[int(top_influence_genes.size/2):]
                old_top_influence_genes = top_influence_genes
                top_influence_genes = top_influence_genes.iloc[top_half]
                # if (len(old_top_influence_genes) == len(top_influence_genes)):
                if (len(old_top_influence_genes) == len(top_influence_genes) or len(top_influence_genes) < 3):
                    data_mean = old_data_mean
                    data_std = old_data_std
                    regr = old_regr
                    ts_val_X = old_ts_val_X
                    top_influence_genes = old_top_influence_genes
                    inner_loop_flag = False

                old_data_mean = data_mean
                old_data_std = data_std
                old_regr = regr
                old_ts_val_X = ts_val_X
        
        feature_pool = feature_pool[~feature_pool.isin(top_influence_genes)]
        if (feature_pool.size == 0):
            outer_loop_flag = False
        selected_features = selected_features.append(top_influence_genes)
        
        
        
                



    top_influence_genes = selected_features
    if (target_tf not in top_influence_genes.values):
        top_influence_genes = top_influence_genes.append(tf_list[tf_list == target_tf])
    
    data_mean = ts_df.T[top_influence_genes].mean()
    data_std = ts_df.T[top_influence_genes].std()
    ts_train_X = ts_df[ts_exp_index_source].T[top_influence_genes]
    ts_val_X = ts_df[ts_exp_index_source_val].T[top_influence_genes]
    input_mean = data_mean
    input_std = data_std

    regr = regr.fit(ts_train_X, ts_train_y)
    
    top_feature_size_list.append(top_influence_genes.size)
    if (top_influence_genes.size < tf_list.size):
        bottom_influence_genes = list(set(tf_list) - (set(top_influence_genes)))
        bottom_regr = RandomForestRegressor(random_state=42, warm_start=True, n_estimators=300, n_jobs=20)
        ts_train_X_comp = ts_df[ts_exp_index_source].T[bottom_influence_genes]
        ts_val_X_comp = ts_df[ts_exp_index_source_val].T[bottom_influence_genes]
        ts_train_y = ts_train_y_list.loc[target_gene]
        ts_val_y = ts_val_y_list.loc[target_gene]
        bottom_regr = bottom_regr.fit(ts_train_X_comp, ts_train_y)
        bottom_mse_list.append(mean_squared_error(ts_val_y,bottom_regr.predict(ts_val_X_comp)))
    else:
        bottom_mse_list.append(0)

    
    naive_mse_list.append(mean_squared_error(ts_val_y,ts_val_naive_y))
    mse_list.append(mean_squared_error(ts_val_y,regr.predict(ts_val_X)))

    base_prediction = regr.predict(np.array(input_mean).reshape(1,-1))[0]
    y_std = ts_df.T.std()[target_gene]
    perturbation_list = list(choose_2_3(top_influence_genes))
    # perturbation_list = [set(perturbation).union(set([target_tf])) for perturbation in perturbation_list]
    # perturbation_list.insert(0, set([target_tff]))
    
    perturbation_input_list = []
    for perturbation_genes in perturbation_list:
        perturbation_input = data_mean.copy()
        for gene in perturbation_genes:
            perturbation_input[gene] += data_std[gene] * perturbation_factor
        perturbation_input_list.append(perturbation_input)
    
    perturbation_list_names = ['; '.join(perturbation_genes) for perturbation_genes in perturbation_list]
    perturbation_input = np.array(perturbation_input_list)
    perturbation_result_list = (regr.predict(perturbation_input) - base_prediction)/y_std
    if (perturbation_result_list[0] > 0):
        result_list.append(np.array(perturbation_list_names)[np.argsort(perturbation_result_list)[::-1][:5]])
        result_measure_list.append(np.array(perturbation_result_list)[np.argsort(perturbation_result_list)[::-1][:5]])
    else:
        result_list.append(np.array(perturbation_list_names)[np.argsort(perturbation_result_list)[:5]])
        result_measure_list.append(np.array(perturbation_result_list)[np.argsort(perturbation_result_list)[:5]])
   
    result_list.append([target_tf])
    result_measure_list.append(perturbation_result_list[0])


100%|██████████| 642/642 [9:33:28<00:00, 53.60s/it]   


In [69]:
feature_pool

Series([], dtype: object)

In [21]:
fixed_result_list = []
for i in range(0, len(result_list), 2):
    current_list = result_list[i]
    if (len(current_list) == 4):
        current_list = np.append(current_list, 'NA')
    fixed_result_list.append(np.append(current_list, target_tf))

In [22]:
fixed_result_measure_list = []
for i in range(0, len(result_measure_list), 2):
    current_list = result_measure_list[i]
    if (len(current_list) == 4):
        current_list = np.append(current_list, 0)
    fixed_result_measure_list.append(np.append(current_list, result_measure_list[i+1]))

In [23]:
fixed_result_measure_list = np.array(fixed_result_measure_list)
fixed_result_list = np.array(fixed_result_list)

In [24]:
fixed_result_measure_list.shape

(642, 6)

In [25]:
result_measure_list = np.array(result_measure_list)
result_list = np.array(result_list)
out_df = pd.DataFrame()
out_df.index = target_genes
for i in range(6):
    comb_name = 'top_{}_combination'.format(i+1)
    score_name = 'top_{}_score'.format(i+1)
    out_df[comb_name] = fixed_result_list[:,i]
    out_df[score_name] = fixed_result_measure_list[:,i]
out_df['mse'] = mse_list
out_df['naive_mse'] = naive_mse_list
out_df['mse_diff'] = np.array(mse_list) - np.array(naive_mse_list)
out_df['bottom_mse'] = bottom_mse_list

# out_df['top_comb'] = fixed_result_list
# out_df['top_comb_score'] = fixed_result_measure_list


out_df['feature_size'] = top_feature_size_list

In [26]:
out_df

Unnamed: 0,top_1_combination,top_1_score,top_2_combination,top_2_score,top_3_combination,top_3_score,top_4_combination,top_4_score,top_5_combination,top_5_score,top_6_combination,top_6_score,mse,naive_mse,mse_diff,bottom_mse,feature_size
AT5G02290,AT1G13960; AT2G04880; AT2G03340,-1.013079,AT4G12020; AT2G04880; AT2G03340,-0.929601,AT4G12020; AT1G13960; AT2G03340,-0.928378,AT4G12020; AT1G13960; AT2G04880,-0.890273,AT2G25000; AT2G04880; AT2G03340,-0.816273,AT2G46680,-0.118319,0.010453,0.008838,0.001615,0.011816,19
AT2G07725,AT2G04880; AT1G13960; AT2G30250,0.617720,AT4G31800; AT2G04880; AT1G13960,0.558061,AT2G04880; AT1G13960; AT1G30650,0.556204,AT2G04880; AT1G13960; AT4G12020,0.545465,AT2G04880; AT3G01970; AT1G13960,0.541068,AT2G46680,0.054014,0.001261,0.000917,0.000344,0.000000,72
AT1G71697,AT2G46400; AT1G29280; AT4G22070,0.738358,AT5G41570; AT1G29280; AT4G22070,0.730320,AT1G29280; AT2G21900; AT4G22070,0.729243,AT4G31800; AT1G29280; AT4G22070,0.697773,AT3G58710; AT1G29280; AT4G22070,0.691159,AT2G46680,0.028477,0.000605,0.000849,-0.000244,0.000552,18
AT1G59660,AT5G49520; AT4G04450; AT2G46680,-0.278103,AT5G49520; AT2G46680,-0.254253,AT5G49520; AT4G04450,-0.238396,AT4G04450; AT2G46680,-0.182022,,0.000000,AT2G46680,-0.238396,0.000246,0.000348,-0.000102,0.000318,3
AT3G03110,AT5G49520; AT5G41570,-0.108275,AT5G49520; AT5G41570; AT2G46680,-0.036007,AT5G49520; AT2G46680,0.049921,AT5G41570; AT2G46680,0.393232,,0.000000,AT2G46680,-0.108275,0.000768,0.000459,0.000309,0.000597,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AT5G05730,AT4G01250; AT2G46680,0.854913,AT4G01250; AT3G01970,-2.225456,AT4G01250; AT2G46680; AT3G01970,-2.234743,AT2G46680; AT3G01970,-2.374312,,0.000000,AT2G46680,0.854913,0.060081,0.041942,0.018139,0.065035,3
AT3G15340,AT5G43290; AT1G13960,0.754760,AT5G43290; AT1G13960; AT2G46680,0.563082,AT1G13960; AT2G46680,0.524012,AT5G43290; AT2G46680,-0.345993,,0.000000,AT2G46680,0.754760,0.000564,0.000587,-0.000023,0.000674,3
AT4G11480,AT1G30650; AT5G07100; AT1G13960,1.004514,AT5G07100; AT4G31800; AT1G13960,0.983172,AT3G58710; AT5G07100; AT1G13960,0.983146,AT1G30650; AT4G31800; AT1G13960,0.973563,AT3G58710; AT1G30650; AT1G13960,0.969352,AT2G46680,0.080494,0.001409,0.000952,0.000456,0.000989,10
AT1G18890,AT5G07100; AT4G26640; AT4G01250,0.395281,AT5G07100; AT2G38470; AT4G26640,0.370442,AT1G69310; AT5G07100; AT4G26640,0.367763,AT2G38470; AT4G26640; AT4G01250,0.345363,AT2G46680; AT5G07100; AT4G26640,0.344611,AT2G46680,0.017414,0.007060,0.004553,0.002507,0.005009,36


In [27]:
out_df.to_csv('output/wrky_presentation_neg_rf_new.csv')

In [4]:
out_df = pd.read_csv('output/wrky_presentation_neg_rf_new.csv', index_col=0)

In [30]:
import re
regex = re.compile(r'[-+]?\d*\.\d+|\d+')

res_list = []
for i in out_df['top_comb_score'].values:
    numbers = regex.findall(i)
    value = float(numbers[-1])
    res_list.append(value)

In [32]:
out_df['top_6_score'] = res_list

In [50]:
(out_df['top_6_score'] < 0).sum()

333

In [52]:
(out_df['mse_diff'] > 0).sum()

478

In [20]:
out_df

Unnamed: 0,top_1_combination,top_1_score,top_2_combination,top_2_score,top_3_combination,top_3_score,top_4_combination,top_4_score,top_5_combination,top_5_score,top_6_combination,top_6_score,mse,naive_mse,mse_diff,bottom_mse,feature_size
AT5G02400,AT2G04880; AT1G69810; AT3G58710,0.477039,AT1G69810; AT5G15130; AT3G58710,0.449460,AT1G69810; AT2G21900; AT3G58710,0.440417,AT1G69810; AT3G58710,0.426258,AT2G04880; AT5G15130; AT3G58710,0.382458,AT2G46680,0.242310,0.000295,0.000664,-0.000369,0.000890,7
AT5G57010,AT2G03340; AT1G13960; AT4G23550,0.306276,AT1G13960; AT4G23550; AT5G24110,0.295107,AT1G13960; AT4G23550; AT3G58710,0.291633,AT4G12020; AT1G13960; AT4G23550,0.284343,AT1G13960; AT4G39410; AT4G23550,0.280021,AT2G46680,0.068326,0.000605,0.001169,-0.000564,0.001222,37
AT4G23010,AT4G01250; AT4G31550; AT4G01720,0.410815,AT4G31550; AT4G01720; AT3G04670,0.403659,AT1G80840; AT4G31550; AT4G01720,0.395708,AT2G46680; AT4G31550; AT4G01720,0.392128,AT4G01250; AT2G46680; AT4G31550,0.391550,AT2G46680,0.002761,0.021592,0.043545,-0.021953,0.000000,72
AT2G40730,AT2G03340; AT2G04880; AT1G13960,-1.201697,AT4G12020; AT2G03340; AT1G13960,-1.147590,AT4G12020; AT2G04880; AT1G13960,-1.136804,AT2G30250; AT2G03340; AT1G13960,-0.987785,AT2G30250; AT2G04880; AT1G13960,-0.983205,AT2G46680,-0.016949,0.001022,0.002144,-0.001122,0.000801,36
AT2G07180,AT2G21900; AT1G69810; AT1G66600,-0.053371,AT2G21900; AT1G30650; AT1G66600,-0.049369,AT2G21900; AT1G66600; AT5G01900,-0.048483,AT2G21900; AT1G66600; AT5G52830,-0.044495,AT2G21900; AT1G30650; AT5G01900,-0.043959,AT2G46680,-0.001911,0.002137,0.002837,-0.000700,0.001309,37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AT1G07000,AT4G01250; AT5G56270; AT4G31550,0.776311,AT1G13960; AT4G01250; AT4G31550,0.775026,AT2G23320; AT4G01250; AT4G31550,0.761206,AT1G62300; AT4G01250; AT4G31550,0.760608,AT2G46680; AT4G01250; AT4G31550,0.758343,AT2G46680,0.002904,0.026028,0.041213,-0.015185,0.029775,36
AT5G19290,AT2G03340; AT1G13960; AT1G30650,-0.149591,AT2G03340; AT1G30650; AT4G18170,-0.148271,AT2G03340; AT2G04880; AT1G30650,-0.146021,AT2G03340; AT2G44745; AT1G30650,-0.142120,AT2G03340; AT1G30650; AT5G43290,-0.140904,AT2G46680,-0.018791,0.005652,0.014436,-0.008785,0.000000,72
AT4G03480,AT1G30650; AT2G46680,0.987003,AT2G38470; AT1G30650; AT2G46680,0.640048,AT2G38470; AT1G30650,0.624371,AT2G38470; AT2G46680,-0.539617,,0.000000,AT2G46680,0.624371,0.000441,0.001059,-0.000618,0.001018,3
AT1G16130,AT2G03340; AT1G13960; AT5G45050,-0.372897,AT2G03340; AT5G22570; AT5G45050,-0.348711,AT2G03340; AT5G45050; AT2G40750,-0.347527,AT2G03340; AT3G01970; AT5G45050,-0.347074,AT1G13960; AT5G45050; AT2G40750,-0.341321,AT2G46680,-0.009542,0.000258,0.000082,0.000176,0.000000,72


In [5]:
A = np.random.rand(5)
B = np.random.rand(5)


In [7]:
A-B

array([ 0.56661186, -0.18662621, -0.36603361,  0.48380576,  0.91346389])