**Set environment**

In [1]:
### set env
source ../config/config_duke.sh
show_env

You are on Duke Server: HARDAC
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code
SING DIRECTORY (FD_SING): /data/reddylab/Kuei/singularity
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log



**Check environment**

In [2]:
${FD_PRJ}/notebooks/sing_proj_encode_fcc.sh which R

/usr/bin/R


In [3]:
${FD_PRJ}/notebooks/sing_proj_encode_fcc.sh Rscript -e 'R.Version()$version.string'

[1] "R version 4.3.0 (2023-04-21)"


## Script for running enrichment

In [4]:
cat > run_fgsea.R << 'EOF'
library(tidyverse)
library(fgsea)

### parse arguemnts
myargs = commandArgs(trailingOnly=TRUE)
FP_INP_SCORE = myargs[1]
FP_INP_ANNOT = myargs[2]
FP_OUT       = myargs[3]

### show
cat("\n")
cat("Input score:", FP_INP_SCORE, "\n\n")
cat("Input annot:", FP_INP_ANNOT, "\n\n")
cat("Output file:", FP_OUT,       "\n\n")
flush.console()

### import data
vec_score = readRDS(FP_INP_SCORE)
lst_annot = readRDS(FP_INP_ANNOT)

### run GSEA
cat("RUN GSEA: Start.\n")
set.seed(123)
fgseaRes = fgsea(
    pathways  = lst_annot, 
    stats     = vec_score,
    eps       = 0.0,
    scoreType = "pos"
)
cat("RUN GSEA: Done!\n\n")

### show results
cat("Show results:\n")
print(fgseaRes)
cat("\n")

### Save the results
cat("Save results.\n")
saveRDS(fgseaRes, FP_OUT)

EOF

In [5]:
cat > run_fgsea.sh << 'EOF'
#!/bin/bash

### print start message
timer_start=`date +%s`
echo "Hostname:          " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### setup input and output
source ../config/config_duke.sh
FP_INP_SCORE=$1
FP_INP_ANNOT=$2
FP_OUT=$3

#FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}/*rds))
#FP_INP_ANNOT=${FP_INP_ANNOTS[${SLURM_ARRAY_TASK_ID}]}

### execute
${FD_PRJ}/notebooks/sing_proj_encode_fcc.sh Rscript ./run_fgsea.R ${FP_INP_SCORE} ${FP_INP_ANNOT} ${FP_OUT}

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

chmod +x run_fgsea.sh

## Check data

**Score**

In [6]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge

ls -1 ${FD_INP_SCORE}

score_merge.astarr.rds
score_merge.atac.rds
score_merge.crispri_growth_tot.rds
score_merge.crispri_hcrff_tot.rds
score_merge.lmpra_tot.rds
score_merge.overall.rds
score_merge.tmpra.rds
score_merge.wstarr.rds


**Annotation**

In [7]:
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annotation

ls -1 ${FD_INP_ANNOT}
echo
ls -1 ${FD_INP_ANNOT}/annotation_chipseq_histone

[0m[38;5;27mannotation_chipseq_histone[0m
[38;5;27mannotation_chipseq_tf[0m
[38;5;27mannotation_chipseq_tf_subset[0m
[38;5;27mannotation_genomic[0m

annot.ChIPseq_Histone.H2AFZ_ENCFF213OTI.rds
annot.ChIPseq_Histone.H3K27ac_ENCFF544LXB.rds
annot.ChIPseq_Histone.H3K27me3_ENCFF323WOT.rds
annot.ChIPseq_Histone.H3K27me3_ENCFF801AHF.rds
annot.ChIPseq_Histone.H3K36me3_ENCFF193ERO.rds
annot.ChIPseq_Histone.H3K36me3_ENCFF561OUZ.rds
annot.ChIPseq_Histone.H3K4me1_ENCFF135ZLM.rds
annot.ChIPseq_Histone.H3K4me1_ENCFF540NGG.rds
annot.ChIPseq_Histone.H3K4me2_ENCFF749KLQ.rds
annot.ChIPseq_Histone.H3K4me3_ENCFF122CSI.rds
annot.ChIPseq_Histone.H3K4me3_ENCFF689QIJ.rds
annot.ChIPseq_Histone.H3K4me3_ENCFF706WUF.rds
annot.ChIPseq_Histone.H3K4me3_ENCFF885FQN.rds
annot.ChIPseq_Histone.H3K79me2_ENCFF209OQD.rds
annot.ChIPseq_Histone.H3K9ac_ENCFF148UQI.rds
annot.ChIPseq_Histone.H3K9ac_ENCFF891CHI.rds
annot.ChIPseq_Histone.H3K9me1_ENCFF462AVD.rds
annot.ChIPseq_Histone.H3K9me3_ENCFF963GZJ.rds
annot.ChIPseq

In [8]:
FOLDERS=(annotation_genomic annotation_chipseq_histone annotation_chipseq_tf_subset annotation_chipseq_tf)
for FOLDER in ${FOLDERS[@]}; do
    FP_INPS_ANNOT=($(ls ${FD_INP_ANNOT}/${FOLDER}/annot*rds))
    echo ${FOLDER}
    echo ${#FP_INPS_ANNOT[@]}
    echo
done

annotation_genomic
49

annotation_chipseq_histone
19

annotation_chipseq_tf_subset
16

annotation_chipseq_tf
733



## Run enrichment

In [None]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annotation
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

FOLDERS=(annotation_genomic annotation_chipseq_histone annotation_chipseq_tf_subset annotation_chipseq_tf)
for FOLDER in ${FOLDERS[@]}; do
    FP_INPS_SCORE=($(ls ${FD_INP_SCORE}/score*rds))
    FP_INPS_ANNOT=($(ls ${FD_INP_ANNOT}/${FOLDER}/annot*rds))
    echo ${FOLDER}
    echo ${#FP_INPS_ANNOT[@]}
    echo
    
    for FP_INP_SCORE in ${FP_INPS_SCORE[@]}; do
        for FP_INP_ANNOT in ${FP_INPS_ANNOT[@]}; do
            FN_INP_SCORE=$(basename ${FP_INP_SCORE})
            FN_INP_ANNOT=$(basename ${FP_INP_ANNOT})
            
            ID_INP_SCORE=${FN_INP_SCORE%.rds}
            ID_INP_ANNOT=${FN_INP_ANNOT%.rds}
            
            FDIRY=${FD_OUT}/${FOLDER}
            FN_OUT=${ID_INP_SCORE}.${ID_INP_ANNOT}.rds
            FP_OUT=${FDIRY}/${FN_OUT}
            mkdir -p ${FDIRY}
            
            FN_LOG=run_fgsea.${ID_INP_SCORE}.${ID_INP_ANNOT}.%a.txt
            FP_LOG=${FD_LOG}/${FN_LOG}
            
            echo "FPATH  (INPUT; SCORE):" ${FP_INP_SCORE}
            echo "FPATH  (INPUT; ANNOT):" ${FP_INP_ANNOT}
            echo "FPATH  (OUTPUT):      " ${FP_OUT}
            echo "PREFIX (INPUT; SCORE):" ${ID_INP_SCORE}
            echo "PREFIX (INPUT; ANNOT):" ${ID_INP_ANNOT}
            echo "FNAME  (OUTPUT):      " ${FN_OUT}
            echo "FPATH  (LOG):         " '${FD_LOG}'/run_fgsea.${ID_INP_SCORE}.${ID_INP_ANNOT}.0.txt
            echo
            
            sbatch -p ${NODE} \
                --exclude=dl-01 \
                --cpus-per-task 8 \
                --mem 4G \
                --output ${FP_LOG} \
                --array 0 \
                ./run_fgsea.sh ${FP_INP_SCORE} ${FP_INP_ANNOT} ${FP_OUT}
            echo
        done
    done
    echo
done

annotation_genomic
49

FPATH  (INPUT; SCORE): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge.astarr.rds
FPATH  (INPUT; ANNOT): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annotation/annotation_genomic/annot.ccre_v3.CTCF_only_CTCF_bound.rds
FPATH  (OUTPUT):       /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/annotation_genomic/score_merge.astarr.annot.ccre_v3.CTCF_only_CTCF_bound.rds
PREFIX (INPUT; SCORE): score_merge.astarr
PREFIX (INPUT; ANNOT): annot.ccre_v3.CTCF_only_CTCF_bound
FNAME  (OUTPUT):       score_merge.astarr.annot.ccre_v3.CTCF_only_CTCF_bound.rds
FPATH  (LOG):          ${FD_LOG}/run_fgsea.score_merge.astarr.annot.ccre_v3.CTCF_only_CTCF_bound.0.txt

Submitted batch job 30022339

FPATH  (INPUT; SCORE): /data/reddylab/Kuei/

In [10]:
cat ${FD_LOG}/run_fgsea.score_merge.astarr.annot.ccre_v3.CTCF_only_CTCF_bound.0.txt

Hostname:           x1-01-2.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         06-24-23+18:45:25

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge.astarr.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annotation/annotation_genomic/annot.ccre_v3.CTCF_only_CTCF_bound.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_m

In [11]:
cat ${FD_LOG}/run_fgsea.score_merge.astarr.annot.ChromHMM.TssFlnkU.0.txt

Hostname:           x1-02-3.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         06-24-23+18:45:25

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge.astarr.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annotation/annotation_genomic/annot.ChromHMM.TssFlnkU.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/annota

In [12]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annotation

FP_INPS_SCORE=($(ls ${FD_INP_SCORE}/score*rds))

FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

FOLDERS=(annotation_genomic annotation_chipseq_histone annotation_chipseq_tf)
for FOLDER in ${FOLDERS[@]}; do
    FP_INPS_ANNOT=($(ls ${FD_INP_SCORE}/score*rds))
    echo ${FOLDER}
    FDIRY=${FD_OUT}/${FOLDER}
    
done

annotation_genomic
annotation_chipseq_histone
annotation_chipseq_tf


## Run enrichment: All annotations besides ChIP-seq

**Check data**

In [35]:
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others
ls ${FD_INP_ANNOT}

annot.ccre_v3.CTCF_only_CTCF_bound.rds      annot.ChromHMM.Het.rds
annot.ccre_v3.dELS_CTCF_bound.rds           annot.ChromHMM.Quies.rds
annot.ccre_v3.dELS.rds                      annot.ChromHMM.ReprPC.rds
annot.ccre_v3.DNase_H3K4me3_CTCF_bound.rds  annot.ChromHMM.TssFlnkD.rds
annot.ccre_v3.DNase_H3K4me3.rds             annot.ChromHMM.TssFlnk.rds
annot.ccre_v3.DNase_only.rds                annot.ChromHMM.TssFlnkU.rds
annot.ccre_v3.Low_DNase.rds                 annot.ChromHMM.Tss.rds
annot.ccre_v3.pELS_CTCF_bound.rds           annot.ChromHMM.Tx.rds
annot.ccre_v3.pELS.rds                      annot.ChromHMM.TxWk.rds
annot.ccre_v3.PLS_CTCF_bound.rds            annot.ChromHMM.ZNF_Rpts.rds
annot.ccre_v3.PLS.rds                       annot.CRISPRi.CRISPRi_Growth.rds
annot.ccre_v4.CA_CTCF.rds                   annot.CRISPRi.CRISPRi_HCRFF.rds
annot.ccre_v4.CA_H3K4me3.rds                annot.Enhancer_ZScore.ASTARR_AB.rds
annot.ccre_v4.CA_only.rds                   annot.Enhancer_ZScore.ASTARR_

In [36]:
FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}))
echo ${#FP_INP_ANNOTS[@]}

48


**Test loop**

In [41]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge
echo ${FD_OUT}
echo

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${FP_INP_SCORE}
    echo ${FN_INP_SCORE}
    echo ${ID_INP_SCORE}
    echo
done

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds
score_merge_astarr.rds
score_merge_astarr

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_crispri_growth_tot.rds
score_merge_crispri_growth_tot.rds
score_merge_crispri_growth_tot

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_crispri_hcrff_tot.rds
score_merge_crispri_hcrff_tot.rds
score_merge_crispri_hcrff_tot

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_lmpra_tot.rds
score_merge_lmpra_tot.rds
score_merge_lmpra_tot

**RUN**

In [38]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    
    echo ${ID_INP_SCORE}
    sbatch -p ${NODE} \
        --exclude=dl-01 \
        --cpus-per-task 8 \
        --mem 4G \
        --output ${FD_LOG}/run_fgsea_${ID_INP_SCORE}_annot_others.%a.txt \
        --array 0-37 \
        ./run_fgsea.sh ${FP_INP_SCORE} ${FD_INP_ANNOT} ${FD_OUT}
done

score_merge_astarr
Submitted batch job 29983194
score_merge_crispri_growth_tot
Submitted batch job 29983195
score_merge_crispri_hcrff_tot
Submitted batch job 29983196
score_merge_lmpra_tot
Submitted batch job 29983197
score_merge_overall
Submitted batch job 29983198
score_merge_tmpra
Submitted batch job 29983199
score_merge_wstarr
Submitted batch job 29983200


**Check results**

In [39]:
cat /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log/run_fgsea_score_merge_overall_annot_others.0.txt

Hostname:           x3-03-4.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         06-12-23+15:43:14

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others/annot.ccre_v3.CTCF_only_CTCF_bound.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score_merge

In [17]:
cat /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log/run_fgsea_score_merge_astarr_annot_others.0.txt

Hostname:           x1-01-3.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         05-10-23+15:52:03

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others/annot_ccre_v3_CTCF_only_CTCF_bound.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score_merge_

In [14]:
cat /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log/run_fgsea_score_merge_astarr_annot_others.37.txt

Hostname:           x2-03-3.genome.duke.edu
Slurm Array Index:  37
Time Stamp:         06-12-23+06:05:17

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others/annot_TSS_POL2_TSS.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score_merge_astarr_annot_TS

## Run enrichment: ChIP-seq Histone modifications

**Check data**

In [15]:
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone
ls ${FD_INP_ANNOT}

annot_ChIPseq_Histone_H2AFZ.rds     annot_ChIPseq_Histone_H3K4me3.rds
annot_ChIPseq_Histone_H3K27ac.rds   annot_ChIPseq_Histone_H3K79me2.rds
annot_ChIPseq_Histone_H3K27me3.rds  annot_ChIPseq_Histone_H3K9ac.rds
annot_ChIPseq_Histone_H3K36me3.rds  annot_ChIPseq_Histone_H3K9me1.rds
annot_ChIPseq_Histone_H3K4me1.rds   annot_ChIPseq_Histone_H3K9me3.rds
annot_ChIPseq_Histone_H3K4me2.rds   annot_ChIPseq_Histone_H4K20me1.rds


In [16]:
FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}))
echo ${#FP_INP_ANNOTS[@]}

12


**Test loop**

In [17]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${FP_INP_SCORE}
    echo ${FN_INP_SCORE}
    echo ${ID_INP_SCORE}
    echo ${FD_OUT}
    echo
done

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds
score_merge_astarr.rds
score_merge_astarr
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_crispri_growth_tot.rds
score_merge_crispri_growth_tot.rds
score_merge_crispri_growth_tot
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_crispri_hcrff_tot.rds
score_merge_crispri_hcrff_tot.rds
score_merge_crispri_hcrff_tot
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTAR

**RUN**

In [18]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    sbatch -p ${NODE} \
        --exclude=dl-01 \
        --cpus-per-task 8 \
        --mem 4G \
        --output ${FD_LOG}/run_fgsea_${ID_INP_SCORE}_annot_chipseq_histone.%a.txt \
        --array 0-11 \
        ./run_fgsea.sh ${FP_INP_SCORE} ${FD_INP_ANNOT} ${FD_OUT}
done

Submitted batch job 29960580
Submitted batch job 29960581
Submitted batch job 29960582
Submitted batch job 29960583
Submitted batch job 29960584
Submitted batch job 29960585
Submitted batch job 29960586


**Check results**

In [19]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_histone.0.txt

Hostname:           x2-04-1.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         06-12-23+06:06:51

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone/annot_ChIPseq_Histone_H2AFZ.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score_mer

In [20]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_histone.11.txt

Hostname:           x2-04-3.genome.duke.edu
Slurm Array Index:  11
Time Stamp:         06-12-23+06:06:51

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_histone/annot_ChIPseq_Histone_H4K20me1.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge/score

## Run enrichment: ChIP-seq TF Subset

**Check data**

In [21]:
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset
ls ${FD_INP_ANNOT}

annot_ChIPseq_TF_ATF1_ENCFF627RSK.rds   annot_ChIPseq_TF_NFE2_ENCFF023IFO.rds
annot_ChIPseq_TF_CTCF_ENCFF660GHM.rds   annot_ChIPseq_TF_NRF1_ENCFF777PKJ.rds
annot_ChIPseq_TF_ELK1_ENCFF715WGN.rds   annot_ChIPseq_TF_POLR2A_ENCFF355MNE.rds
annot_ChIPseq_TF_EP300_ENCFF702XPO.rds  annot_ChIPseq_TF_REST_ENCFF707MDI.rds
annot_ChIPseq_TF_GATA1_ENCFF657CTC.rds  annot_ChIPseq_TF_SP1_ENCFF553GPK.rds
annot_ChIPseq_TF_JUN_ENCFF190CGV.rds    annot_ChIPseq_TF_TAL1_ENCFF852ZRK.rds
annot_ChIPseq_TF_KLF10_ENCFF142ZTD.rds  annot_ChIPseq_TF_YY1_ENCFF398UQZ.rds
annot_ChIPseq_TF_KLF16_ENCFF488OTN.rds  annot_ChIPseq_TF_ZEB2_ENCFF242AOL.rds


In [22]:
FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}))
echo ${#FP_INP_ANNOTS[@]}

16


**Test loop**

In [23]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf_subset

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${FP_INP_SCORE}
    echo ${FN_INP_SCORE}
    echo ${ID_INP_SCORE}
    echo ${FD_OUT}
    echo
done

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds
score_merge_astarr.rds
score_merge_astarr
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf_subset

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_crispri_growth_tot.rds
score_merge_crispri_growth_tot.rds
score_merge_crispri_growth_tot
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf_subset

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_crispri_hcrff_tot.rds
score_merge_crispri_hcrff_tot.rds
score_merge_crispri_hcrff_tot
/data/reddylab/Kuei/out/proj_combeffect_encod

**RUN**

In [24]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf_subset

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${ID_INP_SCORE}
    sbatch -p ${NODE} \
        --exclude=dl-01 \
        --cpus-per-task 8 \
        --mem 4G \
        --output ${FD_LOG}/run_fgsea_${ID_INP_SCORE}_annot_chipseq_tf_subset.%a.txt \
        --array 0-15 \
        ./run_fgsea.sh ${FP_INP_SCORE} ${FD_INP_ANNOT} ${FD_OUT}
done

score_merge_astarr
Submitted batch job 29960664
score_merge_crispri_growth_tot
Submitted batch job 29960665
score_merge_crispri_hcrff_tot
Submitted batch job 29960666
score_merge_lmpra_tot
Submitted batch job 29960667
score_merge_overall
Submitted batch job 29960668
score_merge_tmpra
Submitted batch job 29960669
score_merge_wstarr
Submitted batch job 29960670


**Check results**

In [25]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_tf_subset.0.txt

Hostname:           x1-03-4.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         05-11-23+13:28:06

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset/annot_ChIPseq_TF_ATF1_ENCFF627RSK.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_c

In [26]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_tf_subset.15.txt

Hostname:           x2-03-2.genome.duke.edu
Slurm Array Index:  15
Time Stamp:         05-11-23+13:28:06

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf_subset/annot_ChIPseq_TF_ZEB2_ENCFF242AOL.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_

## Run enrichment: ChIP-seq TF

**Check data**

In [27]:
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf
ls ${FD_INP_ANNOT} | head

annot_ChIPseq_TF_ADNP_ENCFF739AJO.rds
annot_ChIPseq_TF_AFF1_ENCFF195YGC.rds
annot_ChIPseq_TF_AFF1_ENCFF674XTY.rds
annot_ChIPseq_TF_AGO1_ENCFF794IRP.rds
annot_ChIPseq_TF_ARHGAP35_ENCFF952WKN.rds
annot_ChIPseq_TF_ARID1B_ENCFF879NTL.rds
annot_ChIPseq_TF_ARID2_ENCFF913WRW.rds
annot_ChIPseq_TF_ARID3A_ENCFF891OQP.rds
annot_ChIPseq_TF_ARID3B_ENCFF270TSN.rds
annot_ChIPseq_TF_ARID4B_ENCFF086FAZ.rds


In [28]:
FP_INP_ANNOTS=($(ls ${FD_INP_ANNOT}))
echo ${#FP_INP_ANNOTS[@]}

733


**Test loop**

In [29]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${FP_INP_SCORE}
    echo ${FN_INP_SCORE}
    echo ${ID_INP_SCORE}
    echo ${FD_OUT}
    echo
done

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_astarr.rds
score_merge_astarr.rds
score_merge_astarr
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_crispri_growth_tot.rds
score_merge_crispri_growth_tot.rds
score_merge_crispri_growth_tot
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf

/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_crispri_hcrff_tot.rds
score_merge_crispri_hcrff_tot.rds
score_merge_crispri_hcrff_tot
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/

**RUN**

In [30]:
FD_INP_SCORE=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge
FD_INP_ANNOT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf
FD_OUT=${FD_RES}/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_tf

FP_INP_SCORES=($(ls ${FD_INP_SCORE}/*.rds))
for FP_INP_SCORE in ${FP_INP_SCORES[@]}; do
    FN_INP_SCORE=$(basename ${FP_INP_SCORE})
    ID_INP_SCORE=${FN_INP_SCORE%.rds}
    echo ${ID_INP_SCORE}
    sbatch -p ${NODE} \
        --exclude=dl-01 \
        --cpus-per-task 8 \
        --mem 4G \
        --output ${FD_LOG}/run_fgsea_${ID_INP_SCORE}_annot_chipseq_tf.%a.txt \
        --array 0-732 \
        ./run_fgsea.sh ${FP_INP_SCORE} ${FD_INP_ANNOT} ${FD_OUT}
done

score_merge_astarr
Submitted batch job 29960776
score_merge_crispri_growth_tot
Submitted batch job 29960878
score_merge_crispri_hcrff_tot
Submitted batch job 29960879
score_merge_lmpra_tot
Submitted batch job 29960880
score_merge_overall
Submitted batch job 29960881
score_merge_tmpra
Submitted batch job 29960882
score_merge_wstarr
Submitted batch job 29960883


**Check results**

In [31]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_tf.0.txt

Hostname:           x3-02-1.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         05-10-23+15:56:16

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf/annot_ChIPseq_TF_ADNP_ENCFF739AJO.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipseq_

In [32]:
cat ${FD_LOG}/run_fgsea_score_merge_overall_annot_chipseq_tf.732.txt

Hostname:           x1-03-4.genome.duke.edu
Slurm Array Index:  732
Time Stamp:         05-10-23+15:56:31

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.3.0     ✔ stringr 1.4.1
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Input score: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_score_merge/score_merge_overall.rds 

Input annot: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_chipseq_tf/annot_ChIPseq_TF_ZZZ3_ENCFF797VEK.rds 

Output file: /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_results_merge_chipse