**Set environment**

In [1]:
source ../config/config_duke.sh
show_env

You are on Duke Server: HARDAC
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code
SING DIRECTORY (FD_SING): /data/reddylab/Kuei/singularity
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log



**Check data**

In [4]:
ASSAY=KS91_K562_ASTARRseq
FD_INP=${FD_RES}/results/${ASSAY}/fragment_count
FP_INPS=($(ls ${FD_INP}/*WGS*bed.gz))

echo Total: ${#FP_INPS[@]}
for FP_INP in ${FP_INPS[@]}; do
    echo $(basename ${FP_INP})
done

Total: 10
KS91_K562_ASTARRseq.Input.rep1.WGS.unstranded.bed.gz
KS91_K562_ASTARRseq.Input.rep2.WGS.unstranded.bed.gz
KS91_K562_ASTARRseq.Input.rep3.WGS.unstranded.bed.gz
KS91_K562_ASTARRseq.Input.rep4.WGS.unstranded.bed.gz
KS91_K562_ASTARRseq.Input.rep5.WGS.unstranded.bed.gz
KS91_K562_ASTARRseq.Input.rep6.WGS.unstranded.bed.gz
KS91_K562_ASTARRseq.Output.rep1.WGS.unstranded.bed.gz
KS91_K562_ASTARRseq.Output.rep2.WGS.unstranded.bed.gz
KS91_K562_ASTARRseq.Output.rep3.WGS.unstranded.bed.gz
KS91_K562_ASTARRseq.Output.rep4.WGS.unstranded.bed.gz


## RUN

In [7]:
sbatch -p ${NODE} \
    --exclude=dl-01 \
    --cpus-per-task 8 \
    --mem 8G \
    --output ${FD_LOG}/coverage_library_size_ASTARR.%a.txt \
    --array 0 \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config/config_duke.sh
ASSAY=KS91_K562_ASTARRseq

### print start message
timer_start=`date +%s`
echo "Hostname:          " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### set input and output files
FD_INP=${FD_RES}/results/${ASSAY}/fragment_count
FP_INPS=($(ls ${FD_INP}/*WGS*bed.gz))

FD_OUT=${FD_RES}/results/${ASSAY}/coverage/summary
FN_OUT=library_size_count.csv
FP_OUT=${FD_OUT}/${FN_OUT}

### init
mkdir -p   ${FD_OUT}
echo  -n > ${FP_OUT}

### get library size
for FP_INP in ${FP_INPS[@]}; do
    
    ### show I/O file
    echo ++++++++++++++++++++++++++++++++++++++
    echo "Input: " ${FP_INP}
    echo
    echo "show first few lines of file"
    zcat ${FP_INP} | head -n 3
    echo
    
    ### count the total counts and append the results to the output file
    FN_INP=$(basename ${FP_INP})
    zcat ${FP_INP} |\
        awk -v OFS=, -v FNAME=${FN_INP} '{sum += $5} END {print FNAME, sum}' \
        >> ${FP_OUT}
done

### show I/O file
echo "Output file: ${FP_OUT}"
echo
echo "Show the lines"
cat ${FP_OUT}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"
EOF

Submitted batch job 29562024


## Check

In [9]:
cat ${FD_LOG}/coverage_library_size_ASTARR.0.txt

Hostname:           x1-02-4.genome.duke.edu
Slurm Array Index:  0
Time Stamp:         03-12-23+13:54:01

++++++++++++++++++++++++++++++++++++++
Input:  /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/KS91_K562_ASTARRseq/fragment_count/KS91_K562_ASTARRseq.Input.rep1.WGS.unstranded.bed.gz

show first few lines of file
chr1	10001	10143	chr1_10001_10143	1	.
chr1	10002	10118	chr1_10002_10118	1	.
chr1	10002	10131	chr1_10002_10131	1	.

++++++++++++++++++++++++++++++++++++++
Input:  /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/KS91_K562_ASTARRseq/fragment_count/KS91_K562_ASTARRseq.Input.rep2.WGS.unstranded.bed.gz

show first few lines of file
chr1	10001	10233	chr1_10001_10233	1	.
chr1	10001	10351	chr1_10001_10351	1	.
chr1	10002	10119	chr1_10002_10119	1	.

++++++++++++++++++++++++++++++++++++++
Input:  /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/KS91_K562_ASTARRseq/fragment_count/KS91_K562_ASTARRseq.Input.rep3.WGS.unstranded.bed.gz

show first few lines o