**Set environment**

In [1]:
source ../config/config_duke.sh
source ../config/config_load_module_bedtools.sh
show_env

You are on Duke Server: HARDAC
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code
SING DIRECTORY (FD_SING): /data/reddylab/Kuei/singularity
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log



**Check data**

In [2]:
ASSAY=KS91_K562_ASTARRseq
ls ${FD_RES}/results/${ASSAY}/coverage/track_normalized

[0m[38;5;9mKS91_K562_ASTARRseq.Input.mean.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Input.rep1.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Input.rep2.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Input.rep3.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Input.rep4.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Input.rep5.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Input.rep6.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Output.mean.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Output.rep1.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Output.rep2.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Output.rep3.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.Output.rep4.SUBSET.unstranded.bed.gz[0m
[38;5;9mKS91_K562_ASTARRseq.pLog2FC.mean.SUBSET.unstranded.bed.gz[0m


## RUN

In [3]:
sbatch -p ${NODE} \
    --exclude=dl-01 \
    --cpus-per-task 8 \
    --mem 20G \
    --output ${FD_LOG}/coverage_perbase_mean_subset_ASTARR.%a.txt \
    --array 0-1 \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config/config_duke.sh
ASSAY=KS91_K562_ASTARRseq
SAMPLE_GROUPS=("Input" "Output")

### print start message
timer_start=`date +%s`
echo "Hostname:          " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### set input and output files
FD_INP=${FD_RES}/results/${ASSAY}/coverage/track_normalized
FD_OUT=${FD_RES}/results/${ASSAY}/coverage/track_normalized

GROUP=${SAMPLE_GROUPS[${SLURM_ARRAY_TASK_ID}]}
FP_INPS=($(ls ${FD_INP}/*${GROUP}*rep*SUBSET*bed.gz))

FP_INP=$(ls ${FD_INP}/*${GROUP}*rep1*SUBSET*bed.gz)
FN_INP=$(basename ${FP_INP})
FN_OUT=${FN_INP/rep1/mean}
FP_OUT=${FD_OUT}/${FN_OUT}

### show I/O file
echo ++++++++++++++++++++++++++++++++++++++
for FP_INP in ${FP_INPS[@]}; do
    echo "Input file: " ${FP_INP}
    echo
    echo "show first few lines of file"
    zcat ${FP_INP} | head -10
    echo
done

### execute
bedtools unionbedg -i ${FP_INPS[@]} \
| awk '{
    sum = 0; 
    for (col = 4; col <= NF; col++) { 
       sum += $col; 
    }
    mean = sum / (NF-4+1);
    print $1 "\t" $2 "\t" $3 "\t" mean; 
  }' \
| gzip -c \
> ${FP_OUT}
  
### show I/O file
echo ++++++++++++++++++++++++++++++++++++++
echo "Output file: " ${FP_OUT}
echo
echo "show first few lines of file"
zcat ${FP_OUT} | head -10
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"
EOF

Submitted batch job 29573937


## Check

In [4]:
cat ${FD_LOG}/coverage_perbase_mean_subset_ASTARR.0.txt

Hostname:           x2-08-4.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         03-14-23+16:05:17

++++++++++++++++++++++++++++++++++++++
Input file:  /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/KS91_K562_ASTARRseq/coverage/track_normalized/KS91_K562_ASTARRseq.Input.rep1.SUBSET.unstranded.bed.gz

show first few lines of file
chr11	4000000	4000001	0.0946386
chr11	4000001	4000002	0.0917707
chr11	4000002	4000003	0.0917707
chr11	4000003	4000004	0.0917707
chr11	4000004	4000005	0.0917707
chr11	4000005	4000006	0.0946386
chr11	4000006	4000007	0.0917707
chr11	4000007	4000008	0.0917707
chr11	4000008	4000009	0.0917707
chr11	4000009	4000010	0.0917707

Input file:  /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/KS91_K562_ASTARRseq/coverage/track_normalized/KS91_K562_ASTARRseq.Input.rep2.SUBSET.unstranded.bed.gz

show first few lines of file
chr11	4000000	4000001	0.146222
chr11	4000001	4000002	0.144006
chr11	4000002	4000003	0.144006
chr11	4000003	4000004	0.144006
chr

In [5]:
cat ${FD_LOG}/coverage_perbase_mean_subset_ASTARR.1.txt

Hostname:           x2-08-4.genome.duke.edu
Slurm Array Index:  1
Time Stamp:         03-14-23+16:05:17

++++++++++++++++++++++++++++++++++++++
Input file:  /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/KS91_K562_ASTARRseq/coverage/track_normalized/KS91_K562_ASTARRseq.Output.rep1.SUBSET.unstranded.bed.gz

show first few lines of file
chr11	4000000	4000001	0.317433
chr11	4000001	4000002	0.317433
chr11	4000002	4000003	0.317433
chr11	4000003	4000004	0.317433
chr11	4000004	4000005	0.317433
chr11	4000005	4000006	0.317433
chr11	4000006	4000007	0.317433
chr11	4000007	4000008	0.317433
chr11	4000008	4000009	0.317433
chr11	4000009	4000010	0.317433

Input file:  /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/KS91_K562_ASTARRseq/coverage/track_normalized/KS91_K562_ASTARRseq.Output.rep2.SUBSET.unstranded.bed.gz

show first few lines of file
chr11	4000000	4000001	0.256486
chr11	4000001	4000002	0.256486
chr11	4000002	4000003	0.256486
chr11	4000003	4000004	0.256486
chr11	40000