In [1]:
import pandas as pd
import argparse
from PreprocessData import *
from EstimateMI import *
from mRMR import *
from Evaluate import *
from itertools import permutations, product

In [2]:
def cal_mutual_information(data, count, TF_set=[]):
    df_res = pd.DataFrame(columns=['Gene1', 'Gene2', 'score'])
    if len(TF_set) == 0:
        gene_combs = list(permutations(data.columns.values, 2))
    else:
        TG_set = set(data.columns)
        gene_combs = product(TF_set, TG_set)
    gene_combs = filter(lambda x: x[0]!=x[1], gene_combs)
    for v in gene_combs:
        res = cal_mi(v[0], v[1], data[v[0]].values, data[v[1]].values, count)
        temp = pd.DataFrame(res, index=[0])
        df_res = pd.concat([df_res, temp])
    return df_res  

In [12]:
def filter_by_mRMR(data):
    df_res = pd.DataFrame(columns=data.columns)
    target = list(data.Gene2.drop_duplicates())
    for v in target:
        res = MRMR(data.loc[data.Gene2==v], data)
        res = pd.DataFrame.from_dict(res)
        df_res = pd.concat([df_res, res])
    return df_res

In [14]:
def runMKG(df_exp, df_pse, slide=1, k=5):
    count, df_exp = smooth(df_pse, df_exp, slide, k)
    
    # --------------- STEP2 estimate the MI ---------------
    df_mi = cal_mutual_information(df_exp, count)
    df_mi = df_mi.loc[df_mi.score>0]
    df_mi.sort_values(by='score', ascending=False, inplace=True)
    print(df_mi.shape)

    # --------------- STEP3 filter out redundant edges using mRMR ---------------
    df_mrmr = filter_by_mRMR(df_mi)
    df_mrmr = df_mrmr.sort_values(by='score', ascending=False)
    df_mrmr = df_mrmr.loc[df_mrmr.score>0]
    df_mrmr.to_csv("rankedEdges.csv", header=True, index=False)

    return df_mrmr


In [5]:
# input expression data file (genes in columns and cells in rows)
data_file = "./input/ExpressionData.csv"
df_exp = pd.read_csv(data_file, header=0, index_col=0)
df_exp

Unnamed: 0,AMH,CBX2,CTNNB1,DHH,DKK1,DMRT1,FGF9,FOXL2,GATA4,NR0B1,NR5A1,PGD2,RSPO1,SOX9,SRY,UGR,WNT4,WT1mKTS,WT1pKTS
E801_438,0.338789,0.002717,0.011462,0.353490,1.847789,1.570476,0.552496,0.010542,2.107367,1.859390,2.479147,0.497765,0.062891,1.496546,2.173483,0.002842,0.015687,1.876589,1.742427
E1_769,1.841659,0.002191,0.005118,2.554467,2.208555,2.207165,1.708263,0.035365,2.257457,0.071098,2.175349,2.187490,0.020600,2.053794,2.022430,0.042965,0.010180,1.535546,2.694481
E1795_733,0.017036,0.008347,1.845364,0.009483,0.014649,0.001894,0.001159,2.329127,1.777964,1.992085,0.002785,0.040321,2.011789,0.000423,0.133693,0.000880,1.868029,1.701487,0.003895
E645_767,1.704565,0.000082,0.011418,2.252899,1.713882,1.683637,1.753460,0.008139,1.725430,0.013730,1.460529,1.927144,0.093350,1.537387,2.389513,0.007614,0.008643,2.239090,1.723586
E487_58,0.038238,2.653297,0.000567,0.006458,0.012911,0.025892,0.002862,0.052882,2.317660,0.045446,2.017028,0.003215,0.027589,0.003213,0.001202,1.466632,0.644407,2.220691,2.211617
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
E1668_701,2.007235,0.004051,0.038742,1.912524,2.060732,2.412576,2.352512,0.009410,1.132894,0.070532,2.443853,2.248547,0.066583,1.493709,1.677380,0.064790,0.127811,1.994041,1.848790
E1825_132,0.003299,2.236282,0.026806,0.015670,0.011654,0.002592,0.056516,0.020031,1.932312,2.356923,1.937043,0.095029,0.014272,0.001954,1.574976,0.027421,1.547554,2.172956,1.708257
E1061_461,1.845126,0.041465,0.010069,2.227554,1.681645,2.380255,1.840398,0.002129,2.344368,0.058909,1.624976,2.536339,0.004035,2.374522,1.917857,0.004376,0.004686,2.599745,2.025330
E1754_124,0.098357,1.921999,0.011521,0.004356,0.007200,0.001699,0.002197,0.046130,2.058861,1.911860,2.336047,0.000774,0.133083,0.012217,1.895441,0.540321,1.204940,1.892498,2.133672


In [6]:
# input time information  
time_file = "./input/PseudoTime.csv"
df_pse = pd.read_csv(time_file, header=0, index_col=0)
df_pse

Unnamed: 0,PseudoTime1,PseudoTime2
E801_438,,0.54762
E1_769,,0.96241
E1795_733,0.91729,
E645_767,,0.95990
E487_58,,0.07143
...,...,...
E1668_701,,0.87719
E1825_132,0.16416,
E1061_461,,0.57644
E1754_124,,0.15414


In [15]:
# run MKG algorithm to reconstruct GRN
# the sliding length s and window size k are set to 1, 5 by default
# user can change them by setting the parameters of function runMKG()
df_out = runMKG(df_exp, df_pse)
df_out

(342, 3)


Unnamed: 0,Gene1,Gene2,score
0,DMRT1,DKK1,1.165015
0,DKK1,DMRT1,1.165015
0,FGF9,PGD2,1.041024
0,PGD2,FGF9,1.041024
0,FGF9,DHH,1.034303
...,...,...,...
0,DHH,DKK1,0.002166
0,WT1mKTS,PGD2,0.001738
0,GATA4,DKK1,0.001657
0,AMH,DMRT1,0.001644


In [16]:
# input ground truth network
ref_file = "./input/refNetwork.csv"
df_ref = pd.read_csv(ref_file, header=0).iloc[:, :2]
df_ref

Unnamed: 0,Gene1,Gene2
0,SOX9,AMH
1,GATA4,AMH
2,NR5A1,AMH
3,WT1mKTS,AMH
4,NR0B1,AMH
...,...,...
74,CTNNB1,WNT4
75,RSPO1,WNT4
76,NR0B1,WNT4
77,FGF9,WNT4


In [17]:
# Evaluation
df = concat_ref(df_out, df_ref)
res = cal_auc_aupr(df)
res

{'AUROC': 0.6046228710462287, 'AUPRC': 0.32650787499599787}