In [3]:
%matplotlib inline
import pycistarget
pycistarget.__version__

import os
import glob
import pandas as pd
import matplotlib.pyplot as plt

# Load cistarget functions
from pycistarget.motif_enrichment_cistarget import *

import pyranges as pr
from pycistarget.utils import *

In [4]:
## Defining paths to input/output and cistarget db
input_dir = '/nfs/team205/nh3/projects/matthias_fibro/20210418_matthias_bulk_atac/stimulation_de/'
cistarget_db = '/lustre/scratch126/cellgen/team205/is10/fibroblasts/bulk_stim/cistarget/cistarget_db.regions_vs_motifs.rankings.feather'
motif_anno = '/nfs/team205/is10/resources/aerts_motifs/v10nr_clust_public/snapshots/motifs-v10-nr.hgnc-m0.00001-o0.0.tbl'
out_dir = '/lustre/scratch126/cellgen/team205/is10/fibroblasts/bulk_stim/cistarget/enrichment_padj0.01/'
os.makedirs(out_dir, exist_ok=True)

In [5]:
## Cutoffs for padj and logFC for DA peaks
padj_cutoff = 0.01
logFC_cutoff = 0.5

In [6]:
files = os.listdir(input_dir)
condition_paths = {}
for f in files:
    if 'vs-unstimulated.txt' in f:
        condition_paths[f.split('.')[0]] = (input_dir + f)

In [7]:
condition_paths.keys()

dict_keys(['IL17A', 'IL6', 'IL17A_IL22', 'IFNa', 'IL1B', 'TNFa_IL17A', 'TNF_IFNg', 'IL22', 'TNFa', 'TNFa_TGFb1', 'TGFb1', 'OSM', 'TNF_IFNg_IL17A', 'IL13', 'TNFa_IL13', 'TNFa_OSM', 'IFNg'])

In [8]:
region_sets = {}

for cond in condition_paths.keys():
    peaks = pd.read_csv(condition_paths[cond], sep='\t')
    ## Up
    up = peaks[(peaks['padj'] < padj_cutoff) & (peaks['log2FoldChange'] >= logFC_cutoff)]
    up_peaks=up.index.to_series().str.split(',',expand=True)[0].tolist()

    region_sets[(cond + '_up')] = pr.PyRanges(region_names_to_coordinates(up_peaks))

    ## Down
    down = peaks[(peaks['padj'] < padj_cutoff) & (peaks['log2FoldChange'] < ((-1) * logFC_cutoff))]
    down_peaks=down.index.to_series().str.split(',',expand=True)[0].tolist()  

    region_sets[(cond + '_down')] = pr.PyRanges(region_names_to_coordinates(down_peaks))

In [65]:
# Run
cistarget_dict = run_cistarget(ctx_db = cistarget_db,
                               region_sets = region_sets,
                               specie = 'homo_sapiens',
                               annotation_version = 'v10nr_clust',
                               path_to_motif_annotations = motif_anno,
                               auc_threshold = 0.005,
                               nes_threshold = 3.0,
                               rank_threshold = 0.05,
                               annotation = ['Direct_annot', 'Orthology_annot'],
                               n_cpu = 1,
                               _temp_dir='/lustre/scratch126/cellgen/team205/is10/tmp/')

2023-11-27 23:10:58,773 cisTarget    INFO     Reading cisTarget database
2023-11-27 23:13:08,007 cisTarget    INFO     Running cisTarget for IL17A_up which has 186 regions
2023-11-27 23:13:28,513 cisTarget    INFO     Annotating motifs for IL17A_up
2023-11-27 23:13:33,064 cisTarget    INFO     Getting cistromes for IL17A_up
2023-11-27 23:13:33,272 cisTarget    INFO     Running cisTarget for IL17A_down which has 161 regions
2023-11-27 23:13:38,244 cisTarget    INFO     Annotating motifs for IL17A_down
2023-11-27 23:13:41,215 cisTarget    INFO     Getting cistromes for IL17A_down
2023-11-27 23:13:41,522 cisTarget    INFO     Running cisTarget for IL6_up which has 32 regions
2023-11-27 23:13:46,593 cisTarget    INFO     Annotating motifs for IL6_up
2023-11-27 23:13:49,768 cisTarget    INFO     Getting cistromes for IL6_up
2023-11-27 23:13:49,990 cisTarget    INFO     Running cisTarget for IL6_down which has 82 regions
2023-11-27 23:13:54,436 cisTarget    INFO     Annotating motifs for IL6

In [71]:
# Save
import pickle
with open((out_dir + 'cistarget_dict_NES3.0.pkl'), 'wb') as f:
  pickle.dump(cistarget_dict, f)

In [72]:
## Also exporting results in a table format:
motif_enrichment_dict = {key: cistarget_dict[key].motif_enrichment for key in cistarget_dict.keys()}
motif_enrichment_table=pd.concat([motif_enrichment_dict[key] for key in motif_enrichment_dict.keys()], axis=0, sort=False)
motif_enrichment_table.to_csv((out_dir + "cistarget_res_df_NES3.0.txt"), sep="\t")

In [67]:
cistarget_results(cistarget_dict, name="IFNg_up")

Unnamed: 0,Logo,Region_set,Direct_annot,Orthology_annot,NES,AUC,Rank_at_max,Motif_hits
tfdimers__MD00007,,IFNg_up,"E2F1, IRF8",,26.395015,0.038425,20351.0,1656
metacluster_2.9,,IFNg_up,"IRF2, PRDM1",PRDM1,25.377587,0.03704,20365.0,1816
metacluster_2.7,,IFNg_up,"IRF1, STAT2, IRF8, IRF3, IRF9, IRF4, PRDM1, IRF5, IRF6, IRF7, IRF2, STAT1",PRDM1,23.714218,0.034777,20367.0,1810
metacluster_2.6,,IFNg_up,"IRF1, STAT2, IRF8, IRF9, IRF3, ZNF426, IRF4, IRF5, IRF6, ZNF71, IRF7, IRF2, STAT1","IRF1, IRF8, IRF3",19.040612,0.028418,20368.0,1899
tfdimers__MD00208,,IFNg_up,"IRF1, LTF",,16.452555,0.024896,20299.0,1153
stark__RAGTGAAAGT,,IFNg_up,,,15.192157,0.023181,20126.0,880
tfdimers__MD00138,,IFNg_up,"IRF1, MYB, IRF8, IRF3, IRF9, IRF4, IRF5, IRF7, IRF2",,14.111759,0.021711,20362.0,1041
tfdimers__MD00026,,IFNg_up,"IRF1, MYB",,13.964102,0.02151,20355.0,1177
flyfactorsurvey__Blimp-1_SANGER_5_FBgn0035625,,IFNg_up,,PRDM1,13.913079,0.021441,20354.0,1258
tfdimers__MD00527,,IFNg_up,"IRF1, IRF8, IRF3, ZEB1, IRF4, IRF5, IRF6, IRF7, IRF2",,13.286939,0.020589,20291.0,1048


In [68]:
cistarget_results(cistarget_dict, name="TGFb1_up")

Unnamed: 0,Logo,Region_set,Direct_annot,Orthology_annot,NES,AUC,Rank_at_max,Motif_hits
metacluster_91.2,,TGFb1_up,"SMAD5, SMAD3",,13.819849,0.017911,20302.0,588
taipale_cyt_meth__SMAD5_YGTCTAGACA_eDBD_meth_repr,,TGFb1_up,SMAD5,,12.851282,0.016797,15054.0,467
swissregulon__hs__FOSB,,TGFb1_up,FOSB,,12.489977,0.016381,20347.0,1075
stark__TGANTCA,,TGFb1_up,,"JUNB, JUN, JUND",12.255203,0.016111,20223.0,942
metacluster_137.2,,TGFb1_up,"FOSL2, INSM2, FOSL1, JUNB, FOS, FOSB, BACH2, JUN, BATF3, SMARCC1, JUND, ATF3, BATF","FOSL2, FOSL1, JUNB, FOS, FOSB, JUN, JUND, BATF, ATF3",11.687695,0.015459,20340.0,1075
jaspar__MA1448.1,,TGFb1_up,,,11.507043,0.015251,20355.0,889
elemento__TGACTCA,,TGFb1_up,,,11.461655,0.015199,20347.0,870
metacluster_50.1,,TGFb1_up,"FOSL1, BNC2, JUNB, FOS, FOSB, IRF4, JUN, SMAD3, SMAD2, JUND, BATF","FOSL2, FOSL1, FOS, JDP2, JUN, JUND, BATF",10.955698,0.014617,20322.0,1055
cisbp__M01523,,TGFb1_up,,,10.941413,0.014601,20362.0,1048
jaspar__MA1633.2,,TGFb1_up,BACH1,,10.489251,0.014081,20294.0,837


In [9]:
# Also running a permissive analysis (NES > 0.1 instead of 3) to be used for more quantitative comparisons between classes
cistarget_dict_perm = run_cistarget(ctx_db = cistarget_db,
                               region_sets = region_sets,
                               specie = 'homo_sapiens',
                               annotation_version = 'v10nr_clust',
                               path_to_motif_annotations = motif_anno,
                               auc_threshold = 0.005,
                               nes_threshold = 0.1,
                               rank_threshold = 0.05,
                               annotation = ['Direct_annot', 'Orthology_annot'],
                               n_cpu = 1,
                               _temp_dir='/lustre/scratch126/cellgen/team205/is10/tmp/')

2023-12-03 17:40:17,067 cisTarget    INFO     Reading cisTarget database
2023-12-03 17:45:47,847 cisTarget    INFO     Annotating motifs for IL17A_up
2023-12-03 17:45:59,262 cisTarget    INFO     Getting cistromes for IL17A_up
2023-12-03 17:46:23,139 cisTarget    INFO     Running cisTarget for IL17A_down which has 161 regions
2023-12-03 17:47:50,561 cisTarget    INFO     Annotating motifs for IL17A_down
2023-12-03 17:48:01,352 cisTarget    INFO     Getting cistromes for IL17A_down
2023-12-03 17:48:25,473 cisTarget    INFO     Running cisTarget for IL6_up which has 32 regions
2023-12-03 17:49:33,247 cisTarget    INFO     Annotating motifs for IL6_up
2023-12-03 17:49:41,965 cisTarget    INFO     Getting cistromes for IL6_up
2023-12-03 17:49:51,810 cisTarget    INFO     Running cisTarget for IL6_down which has 82 regions
2023-12-03 17:51:15,477 cisTarget    INFO     Annotating motifs for IL6_down
2023-12-03 17:51:25,183 cisTarget    INFO     Getting cistromes for IL6_down
2023-12-03 17:51

In [10]:
# Save
import pickle
with open((out_dir + 'cistarget_dict_NES0.1.pkl'), 'wb') as f:
  pickle.dump(cistarget_dict_perm, f)

In [11]:
## Also exporting results in a table format:
motif_enrichment_dict_perm = {key: cistarget_dict_perm[key].motif_enrichment for key in cistarget_dict_perm.keys()}
motif_enrichment_table_perm=pd.concat([motif_enrichment_dict_perm[key] for key in motif_enrichment_dict_perm.keys()], axis=0, sort=False)
motif_enrichment_table_perm.to_csv((out_dir + "cistarget_res_df_NES0.1.txt"), sep="\t")