**Set environment**

In [1]:
### set env
source ../config/config_duke.sh
show_env

You are on Duke Server: HARDAC
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code
SING DIRECTORY (FD_SING): /data/reddylab/Kuei/singularity
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log



**Check environment**

In [2]:
${FD_PRJ}/notebooks/sing_proj_encode_fcc.sh which R

/usr/bin/R


In [3]:
${FD_PRJ}/notebooks/sing_proj_encode_fcc.sh Rscript -e 'R.Version()$version.string'

[1] "R version 4.3.0 (2023-04-21)"


## Script for running enrichment

In [4]:
cat > run_fgsea.R << 'EOF'
library(tidyverse)
library(fgsea)

### parse arguemnts
myargs = commandArgs(trailingOnly=TRUE)
FP_INP_SCORE = myargs[1]
FP_INP_ANNOT = myargs[2]
FD_OUT       = myargs[3]

### Define output
txt1   = sub(".rds", "", basename(FP_INP_SCORE))
txt2   = sub(".rds", "", basename(FP_INP_ANNOT))
txt    = paste(txt1, txt2, sep="_")
txt    = paste(txt, "rds", sep=".")
FN_OUT = txt
FP_OUT = file.path(FD_OUT, FN_OUT)

### show
cat("\n")
cat("Input score:", FP_INP_SCORE, "\n\n")
cat("Input annot:", FP_INP_ANNOT, "\n\n")
cat("Output file:", FP_OUT,       "\n\n")
flush.console()

### import data
vec_score = readRDS(FP_INP_SCORE)
lst_annot = readRDS(FP_INP_ANNOT)

### run GSEA
cat("RUN GSEA: Start.\n")
set.seed(123)
fgseaRes = fgsea(
    pathways  = lst_annot, 
    stats     = vec_score,
    eps       = 0.0,
    scoreType = "pos"
)
cat("RUN GSEA: Done!\n\n")

### show results
cat("Show results:\n")
print(fgseaRes)
cat("\n")

### Save the results
cat("Save results.\n")
saveRDS(fgseaRes, FP_OUT)

EOF

In [5]:
cat > run_fgsea.sh << 'EOF'
#!/bin/bash

### print start message
timer_start=`date +%s`
echo "Hostname:          " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### setup input and output
source ../config/config_duke.sh
FP_INP_SCORE=$1
FD_INP_ANNOT=$2
FD_OUT=$3

FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}/*rds))
FP_INP_ANNOT=${FP_INP_ANNOTS[${SLURM_ARRAY_TASK_ID}]}

### execute
${FD_PRJ}/notebooks/sing_proj_encode_fcc.sh Rscript ./run_fgsea.R ${FP_INP_SCORE} ${FP_INP_ANNOT} ${FD_OUT}

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

chmod +x run_fgsea.sh

## Check data

**Score**

In [6]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge

In [7]:
ls ${FD_INP_SCORE}

score_merge_astarr.rds   score_merge_tmpra.rds
score_merge_overall.rds  score_merge_wstarr.rds


In [8]:
FP_INP_SCORES=($(ls ${FD_INP_SCORE}))
echo ${#FP_INP_SCORES[@]}

4


**Annotation**

In [9]:
ls -d ${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot*

[0m[38;5;27m/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone[0m[K
[38;5;27m/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf[0m[K
[38;5;27m/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset[0m[K
[38;5;27m/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others[0m[K


## Reset

```
rm ${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset/*rds
rm ${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/*rds
rm ${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf_subset/*rds
```

## Run enrichment: All annotations besides ChIP-seq

**Check data**

In [10]:
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others
ls ${FD_INP_ANNOT}

annot_ccre_v3_CTCF_only_CTCF_bound.rds
annot_ccre_v3_dELS_CTCF_bound.rds
annot_ccre_v3_dELS.rds
annot_ccre_v3_DNase_H3K4me3_CTCF_bound.rds
annot_ccre_v3_DNase_H3K4me3.rds
annot_ccre_v3_DNase_only.rds
annot_ccre_v3_Low_DNase.rds
annot_ccre_v3_pELS_CTCF_bound.rds
annot_ccre_v3_pELS.rds
annot_ccre_v3_PLS_CTCF_bound.rds
annot_ccre_v3_PLS.rds
annot_ccre_v4_CA_CTCF.rds
annot_ccre_v4_CA_H3K4me3.rds
annot_ccre_v4_CA_only.rds
annot_ccre_v4_CA_TF.rds
annot_ccre_v4_dELS.rds
annot_ccre_v4_Low_DNase.rds
annot_ccre_v4_pELS.rds
annot_ccre_v4_PLS.rds
annot_ChromHMM_Biv.rds
annot_ChromHMM_Enh1.rds
annot_ChromHMM_Enh2.rds
annot_ChromHMM_EnhG1.rds
annot_ChromHMM_EnhG2.rds
annot_ChromHMM_Het.rds
annot_ChromHMM_Quies.rds
annot_ChromHMM_ReprPC.rds
annot_ChromHMM_TssFlnkD.rds
annot_ChromHMM_TssFlnk.rds
annot_ChromHMM_TssFlnkU.rds
annot_ChromHMM_Tss.rds
annot_ChromHMM_Tx.rds
annot_ChromHMM_TxWk.rds
annot_ChromHMM_ZNF_Rpts.rds
annot_CRISPRi_HCRFF_CRISPR_CASA.rds
annot_Enhancer_Enhancer_STARR.rds
annot_Enhancer

In [11]:
FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}))
echo ${#FP_INP_ANNOTS[@]}

38


**Test loop**

In [12]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${FP_INP_SCORE}
    echo ${FN_INP_SCORE}
    echo ${ID_INP_SCORE}
    echo ${FD_OUT}
    echo
done

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds
score_merge_astarr.rds
score_merge_astarr
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds
score_merge_overall.rds
score_merge_overall
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_tmpra.rds
score_merge_tmpra.rds
score_merge_tmpra
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

/data/reddylab/Kuei/ou

**RUN**

In [13]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    
    echo ${ID_INP_SCORE}
    sbatch -p ${NODE} \
        --exclude=dl-01 \
        --cpus-per-task 8 \
        --mem 4G \
        --output ${FD_LOG}/run_fgsea_${ID_INP_SCORE}_annot_others.%a.txt \
        --array 0-37 \
        ./run_fgsea.sh ${FP_INP_SCORE} ${FD_INP_ANNOT} ${FD_OUT}
done

score_merge_astarr
Submitted batch job 29821714
score_merge_overall
Submitted batch job 29821715
score_merge_tmpra
Submitted batch job 29821716
score_merge_wstarr
Submitted batch job 29821717


**Check results**

In [14]:
cat /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log/run_fgsea_score_merge_overall_annot_others.0.txt

Hostname:           x2-03-3.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         05-11-23+13:27:22

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others/annot_ccre_v3_CTCF_only_CTCF_bound.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score_merge

In [15]:
cat /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log/run_fgsea_score_merge_astarr_annot_others.0.txt

Hostname:           x1-03-1.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         05-11-23+13:27:22

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others/annot_ccre_v3_CTCF_only_CTCF_bound.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score_merge_

In [16]:
cat /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log/run_fgsea_score_merge_astarr_annot_others.37.txt

Hostname:           x2-03-3.genome.duke.edu
Slurm Array Index:  37
Time Stamp:         05-11-23+13:27:22

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others/annot_TSS_POL2_TSS.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score_merge_astarr_annot_TS

## Run enrichment: ChIP-seq Histone modifications

**Check data**

In [17]:
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone
ls ${FD_INP_ANNOT}

annot_ChIPseq_Histone_H2AFZ.rds     annot_ChIPseq_Histone_H3K4me3.rds
annot_ChIPseq_Histone_H3K27ac.rds   annot_ChIPseq_Histone_H3K79me2.rds
annot_ChIPseq_Histone_H3K27me3.rds  annot_ChIPseq_Histone_H3K9ac.rds
annot_ChIPseq_Histone_H3K36me3.rds  annot_ChIPseq_Histone_H3K9me1.rds
annot_ChIPseq_Histone_H3K4me1.rds   annot_ChIPseq_Histone_H3K9me3.rds
annot_ChIPseq_Histone_H3K4me2.rds   annot_ChIPseq_Histone_H4K20me1.rds


In [18]:
FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}))
echo ${#FP_INP_ANNOTS[@]}

12


**Test loop**

In [19]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${FP_INP_SCORE}
    echo ${FN_INP_SCORE}
    echo ${ID_INP_SCORE}
    echo ${FD_OUT}
    echo
done

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds
score_merge_astarr.rds
score_merge_astarr
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds
score_merge_overall.rds
score_merge_overall
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_tmpra.rds
score_merge_tmpra.rds
score_merge_tmpra
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

/data/reddylab/Kuei/ou

**RUN**

In [20]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    sbatch -p ${NODE} \
        --exclude=dl-01 \
        --cpus-per-task 8 \
        --mem 4G \
        --output ${FD_LOG}/run_fgsea_${ID_INP_SCORE}_annot_chipseq_histone.%a.txt \
        --array 0-11 \
        ./run_fgsea.sh ${FP_INP_SCORE} ${FD_INP_ANNOT} ${FD_OUT}
done

Submitted batch job 29821866
Submitted batch job 29821867
Submitted batch job 29821868
Submitted batch job 29821869


**Check results**

In [21]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_histone.0.txt

Hostname:           x1-03-3.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         05-11-23+13:27:46

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone/annot_ChIPseq_Histone_H2AFZ.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score_mer

In [22]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_histone.11.txt

Hostname:           x2-02-2.genome.duke.edu
Slurm Array Index:  11
Time Stamp:         05-11-23+13:27:46

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone/annot_ChIPseq_Histone_H4K20me1.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score

## Run enrichment: ChIP-seq TF Subset

**Check data**

In [23]:
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset
ls ${FD_INP_ANNOT}

annot_ChIPseq_TF_ATF1_ENCFF627RSK.rds   annot_ChIPseq_TF_NFE2_ENCFF023IFO.rds
annot_ChIPseq_TF_CTCF_ENCFF660GHM.rds   annot_ChIPseq_TF_NRF1_ENCFF777PKJ.rds
annot_ChIPseq_TF_ELK1_ENCFF715WGN.rds   annot_ChIPseq_TF_POLR2A_ENCFF355MNE.rds
annot_ChIPseq_TF_EP300_ENCFF702XPO.rds  annot_ChIPseq_TF_REST_ENCFF707MDI.rds
annot_ChIPseq_TF_GATA1_ENCFF657CTC.rds  annot_ChIPseq_TF_SP1_ENCFF553GPK.rds
annot_ChIPseq_TF_JUN_ENCFF190CGV.rds    annot_ChIPseq_TF_TAL1_ENCFF852ZRK.rds
annot_ChIPseq_TF_KLF10_ENCFF142ZTD.rds  annot_ChIPseq_TF_YY1_ENCFF398UQZ.rds
annot_ChIPseq_TF_KLF16_ENCFF488OTN.rds  annot_ChIPseq_TF_ZEB2_ENCFF242AOL.rds


In [24]:
FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}))
echo ${#FP_INP_ANNOTS[@]}

16


**Test loop**

In [25]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf_subset

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${FP_INP_SCORE}
    echo ${FN_INP_SCORE}
    echo ${ID_INP_SCORE}
    echo ${FD_OUT}
    echo
done

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds
score_merge_astarr.rds
score_merge_astarr
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf_subset

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds
score_merge_overall.rds
score_merge_overall
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf_subset

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_tmpra.rds
score_merge_tmpra.rds
score_merge_tmpra
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_r

**RUN**

In [26]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf_subset

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${ID_INP_SCORE}
    sbatch -p ${NODE} \
        --exclude=dl-01 \
        --cpus-per-task 8 \
        --mem 4G \
        --output ${FD_LOG}/run_fgsea_${ID_INP_SCORE}_annot_chipseq_tf_subset.%a.txt \
        --array 0-15 \
        ./run_fgsea.sh ${FP_INP_SCORE} ${FD_INP_ANNOT} ${FD_OUT}
done

score_merge_astarr
Submitted batch job 29821914
score_merge_overall
Submitted batch job 29821915
score_merge_tmpra
Submitted batch job 29821916
score_merge_wstarr
Submitted batch job 29821917


**Check results**

In [27]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_tf_subset.0.txt

Hostname:           x1-03-4.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         05-11-23+13:28:06

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset/annot_ChIPseq_TF_ATF1_ENCFF627RSK.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_c

In [28]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_tf_subset.15.txt

Hostname:           x2-03-2.genome.duke.edu
Slurm Array Index:  15
Time Stamp:         05-11-23+13:28:06

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset/annot_ChIPseq_TF_ZEB2_ENCFF242AOL.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_

## Run enrichment: ChIP-seq TF

**Check data**

In [34]:
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf
ls ${FD_INP_ANNOT} | head

annot_ChIPseq_TF_ADNP_ENCFF739AJO.rds
annot_ChIPseq_TF_AFF1_ENCFF195YGC.rds
annot_ChIPseq_TF_AFF1_ENCFF674XTY.rds
annot_ChIPseq_TF_AGO1_ENCFF794IRP.rds
annot_ChIPseq_TF_ARHGAP35_ENCFF952WKN.rds
annot_ChIPseq_TF_ARID1B_ENCFF879NTL.rds
annot_ChIPseq_TF_ARID2_ENCFF913WRW.rds
annot_ChIPseq_TF_ARID3A_ENCFF891OQP.rds
annot_ChIPseq_TF_ARID3B_ENCFF270TSN.rds
annot_ChIPseq_TF_ARID4B_ENCFF086FAZ.rds


In [35]:
FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}))
echo ${#FP_INP_ANNOTS[@]}

733


**Test loop**

In [36]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${FP_INP_SCORE}
    echo ${FN_INP_SCORE}
    echo ${ID_INP_SCORE}
    echo ${FD_OUT}
    echo
done

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds
score_merge_astarr.rds
score_merge_astarr
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds
score_merge_overall.rds
score_merge_overall
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_tmpra.rds
score_merge_tmpra.rds
score_merge_tmpra
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_c

**RUN**

In [37]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${ID_INP_SCORE}
    sbatch -p ${NODE} \
        --exclude=dl-01 \
        --cpus-per-task 8 \
        --mem 4G \
        --output ${FD_LOG}/run_fgsea_${ID_INP_SCORE}_annot_chipseq_tf.%a.txt \
        --array 0-732 \
        ./run_fgsea.sh ${FP_INP_SCORE} ${FD_INP_ANNOT} ${FD_OUT}
done

score_merge_astarr
Submitted batch job 29818214
score_merge_overall
Submitted batch job 29818215
score_merge_tmpra
Submitted batch job 29818216
score_merge_wstarr
Submitted batch job 29818217


**Check results**

In [39]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_tf.0.txt

Hostname:           x3-02-1.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         05-10-23+15:56:16

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf/annot_ChIPseq_TF_ADNP_ENCFF739AJO.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_

In [43]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_tf.732.txt

Hostname:           x1-03-4.genome.duke.edu
Slurm Array Index:  732
Time Stamp:         05-10-23+15:56:31

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf/annot_ChIPseq_TF_ZZZ3_ENCFF797VEK.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipse