In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm 
import os

In [2]:
all_dir = [x[0] for x in os.walk('.')]
set_dirs = []
for dir in all_dir:
    if len(dir.split('/')) == 3:
        set_dirs.append(dir)

In [9]:
for set_dir in set_dirs:
    algo_paths = [f.path for f in os.scandir(set_dir) if f.is_dir()]
    result_files = [path + '/rankedEdges.csv' for path in algo_paths]
    algo_names = [path.split('/')[3] for path in algo_paths]
    matrix_list = []
    target_df = pd.read_csv(set_dir + '/target_edges.csv')
    target_edge_set = set()
    for index, row in target_df.iterrows():
        target_edge_set.add(row['regulator']+'_'+row['target'])        
    ensemble_df = pd.DataFrame(columns=['edge_name'] + algo_names + ['edge_exist'])
    for result_file in tqdm(result_files):
        df = pd.read_csv(result_file, sep='\t')
        tf_set = set(df['Gene1'])
        gene_set = set(df['Gene2'])
        matrix = pd.DataFrame(0.0, index=gene_set, columns=tf_set)
        for index, row in df.iterrows():
            matrix.at[row['Gene2'], row['Gene1']] = np.abs(row['EdgeWeight'])
        matrix_list.append(matrix)
    for regulator_name in tqdm(tf_set):
        for target_name in gene_set:
            edge_name = regulator_name + '_' + target_name
            score_list = [matrix.loc[target_name][regulator_name] for matrix in matrix_list]
            edge_exist = 0
            if edge_name in target_edge_set:
                edge_exist = 1
            ensemble_df = ensemble_df.append(pd.DataFrame([[edge_name]+ score_list + [edge_exist]], columns=['edge_name'] + algo_names + ['edge_exist']), ignore_index=True)
    ensemble_df = ensemble_df.astype({'edge_exist': 'int8'})
    ensemble_df.to_csv(set_dir + '/ensemble.csv', index=False)

100%|██████████| 10/10 [00:09<00:00,  1.00it/s]
100%|██████████| 100/100 [00:21<00:00,  4.68it/s]
100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
100%|██████████| 100/100 [00:21<00:00,  4.69it/s]
100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
100%|██████████| 100/100 [00:21<00:00,  4.70it/s]
100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
100%|██████████| 100/100 [00:21<00:00,  4.71it/s]
100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
100%|██████████| 100/100 [00:21<00:00,  4.71it/s]
100%|██████████| 10/10 [00:08<00:00,  1.16it/s]
0it [00:00, ?it/s]
100%|██████████| 10/10 [00:09<00:00,  1.02it/s]
100%|██████████| 100/100 [00:21<00:00,  4.72it/s]
100%|██████████| 10/10 [00:09<00:00,  1.02it/s]
100%|██████████| 100/100 [00:21<00:00,  4.70it/s]
100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
100%|██████████| 100/100 [00:21<00:00,  4.69it/s]
100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
100%|██████████| 100/100 [00:21<00:00,  4.69it/s]
100%|██████████| 10/10 [00:02<00:00,  4.02it/s]
100