In [4]:
import pandas as pd
import os
import re

from ConSReg.ConSReg import ConSReg

File names of input data

In [5]:
# Dap-seq narrow peak files
dap_file_list = os.listdir("data/dap_seq_all_peaks/")
dap_file = [ "data/dap_seq_all_peaks/" + file for file in dap_file_list if re.match(".*narrowPeak",file) is not None]

# ATAC-seq peak file
atac_file = "data/atac_seq_all_merged.bed"

# Arabidopsis genome annotation file
gff_file = "data/TAIR10_GFF3_genes.gff"

# Differential contrast result generated by DESeq2
diff_file_list = os.listdir("data/diff_evalB/")
diff_file = [ "data/diff_evalB/" + file for file in diff_file_list if re.match(".*csv",file) is not None]

Create a ConSReg object for analysis

In [6]:
analysis = ConSReg()

Read and preprocess all data files

In [7]:
analysis.preprocess(dap_file = dap_file, diff_file = diff_file, atac_file = atac_file, gff_file=gff_file)

Merging DAP-seq peaks...





>> preparing features information...		 2018-10-21 02:20:40 PM 
>> identifying nearest features...		 2018-10-21 02:20:40 PM 
>> calculating distance from peak to TSS...	 2018-10-21 02:21:58 PM 
>> assigning genomic annotation...		 2018-10-21 02:21:58 PM 
>> assigning chromosome lengths			 2018-10-21 02:22:24 PM 
>> done...					 2018-10-21 02:22:24 PM 
Done
Overlapping DAP-seq with ATAC-seq...
Done
Reading diff tables...
Done


<ConSReg.ConSReg.ConSReg instance at 0x7f64b0313a28>

Generate feature matrices for each differential contrast

In [8]:
analysis.gen_feature_mat()

Generating feature matrices...
Done


<ConSReg.ConSReg.ConSReg instance at 0x7f64b0313a28>

Compute AUC-ROC and AUC-PRC from corss-validation (CV) using LRLASSO method. Mean and standard deviation of AUC-ROC and AUC-PRC were reporeted from five replicates of CV 

In [9]:
analysis.eval_by_cv(ml_engine = 'lrlasso',rep = 5, n_jobs = 44)

Performing cross-validation for each feature matrix using lrlasso engine...


[Parallel(n_jobs=44)]: Done   1 tasks      | elapsed:    9.9s
[Parallel(n_jobs=44)]: Done   2 out of  44 | elapsed:   12.0s remaining:  4.2min
[Parallel(n_jobs=44)]: Done   5 out of  44 | elapsed:   15.2s remaining:  2.0min
[Parallel(n_jobs=44)]: Done   8 out of  44 | elapsed:   16.7s remaining:  1.3min
[Parallel(n_jobs=44)]: Done  11 out of  44 | elapsed:   18.8s remaining:   56.4s
[Parallel(n_jobs=44)]: Done  14 out of  44 | elapsed:   19.6s remaining:   42.0s
[Parallel(n_jobs=44)]: Done  17 out of  44 | elapsed:   20.4s remaining:   32.4s
[Parallel(n_jobs=44)]: Done  20 out of  44 | elapsed:   23.4s remaining:   28.1s
[Parallel(n_jobs=44)]: Done  23 out of  44 | elapsed:   28.4s remaining:   25.9s
[Parallel(n_jobs=44)]: Done  26 out of  44 | elapsed:   33.8s remaining:   23.4s
[Parallel(n_jobs=44)]: Done  29 out of  44 | elapsed:   46.5s remaining:   24.1s
[Parallel(n_jobs=44)]: Done  32 out of  44 | elapsed:   48.9s remaining:   18.3s
[Parallel(n_jobs=44)]: Done  35 out of  44 | el

Done


<ConSReg.ConSReg.ConSReg instance at 0x7f64b0313a28>

Check the CV results

In [10]:
analysis.auroc

Unnamed: 0,diff_name,auroc_mean_UR,auroc_std_UR,auroc_mean_DR,auroc_std_DR
0,GSE81202_13-GSE81202_11.csv,0.840303,0.014519,0.867364,0.020495
1,GSE72806_3-GSE72806_1.csv,0.810262,0.006943,0.842399,0.006255
2,GSE69510_2-GSE69510_1.csv,0.877589,0.023525,0.838269,0.006123
3,PRJEB10930_4_T-PRJEB10930_4_C.csv,0.865207,0.023652,0.825285,0.016382
4,GSE81202_18-GSE81202_16.csv,0.843651,0.004296,0.826559,0.009895
5,GSE67332_5-GSE67332_4.csv,0.805297,0.006391,0.830441,0.0061
6,GSE81202_17-GSE81202_15.csv,0.850454,0.009279,0.839388,0.011173
7,GSE80565_7-GSE80565_8.csv,0.830671,0.016441,0.804367,0.014704
8,GSE63406_5-GSE63406_3.csv,0.826973,0.008192,0.855747,0.010314
9,GSE80565_9-GSE80565_10.csv,0.830737,0.009592,0.808786,0.024848


Generate importance score for each TF

In [11]:
analysis.compute_imp_score(n_resampling = 200, n_jobs = 44, verbose = True)

Performing stability selection and compute importance score for each TF...
Done


<ConSReg.ConSReg.ConSReg instance at 0x7f64b0313a28>

Generate GRN for each differential contrast

In [12]:
analysis.gen_networks(imp_cutoff = 0.5, verbose = True)

<ConSReg.ConSReg.ConSReg instance at 0x7f64b0313a28>

Save analysis result

In [13]:
# Cross-validation result
analysis.auroc.to_csv("results/bulk_analysis/auroc_result.csv")
analysis.auprc.to_csv("results/bulk_analysis/auprc_result.csv")

# Importance scores
analysis.imp_scores_UR.to_csv("results/bulk_analysis/imp_score_UR.csv")
analysis.imp_scores_DR.to_csv("results/bulk_analysis/imp_score_DR.csv")

# Networks were saved in the format of edge list
for diff_name, network in zip(analysis.diff_name_list, analysis.networks_UR):
    network.to_csv("results/bulk_analysis/{}_UR_network.csv".format(diff_name))
    
for diff_name, network in zip(analysis.diff_name_list, analysis.networks_DR):
    network.to_csv("results/bulk_analysis/{}_DR_network.csv".format(diff_name))