**Set environment**

In [1]:
source ../run_config_project.sh
show_env

You are working on             Duke Server: HARDAC
BASE DIRECTORY (FD_BASE):      /data/reddylab/Kuei
REPO DIRECTORY (FD_REPO):      /data/reddylab/Kuei/repo
WORK DIRECTORY (FD_WORK):      /data/reddylab/Kuei/work
DATA DIRECTORY (FD_DATA):      /data/reddylab/Kuei/data
CONTAINER DIR. (FD_SING):      /data/reddylab/Kuei/container

You are working with           ENCODE FCC
PATH OF PROJECT (FD_PRJ):      /data/reddylab/Kuei/repo/Proj_CombEffect_ENCODE_FCC
PROJECT RESULTS (FD_RES):      /data/reddylab/Kuei/repo/Proj_CombEffect_ENCODE_FCC/results
PROJECT SCRIPTS (FD_EXE):      /data/reddylab/Kuei/repo/Proj_CombEffect_ENCODE_FCC/scripts
PROJECT DATA    (FD_DAT):      /data/reddylab/Kuei/repo/Proj_CombEffect_ENCODE_FCC/data
PROJECT NOTE    (FD_NBK):      /data/reddylab/Kuei/repo/Proj_CombEffect_ENCODE_FCC/notebooks
PROJECT DOCS    (FD_DOC):      /data/reddylab/Kuei/repo/Proj_CombEffect_ENCODE_FCC/docs
PROJECT LOG     (FD_LOG):      /data/reddylab/Kuei/repo/Proj_CombEffect_ENCODE_FCC/log
PROJE

**Script folder**

In [4]:
echo ${FD_EXE}

/data/reddylab/Kuei/repo/Proj_CombEffect_ENCODE_FCC/scripts


## Setup config for loading bedtools module

In [11]:
cat > ${FD_EXE}/config_load_module_bedtools.sh << EOF
#!/bin/bash

### Check which duke server I am at and load the corresponding modules

if echo $(pwd -P) | grep -q "gpfs"; then
    #echo "You are on Duke Server: HARDAC"
    module load bedtools2
fi

if echo $(pwd -P) | grep -q "hpc"; then
    #echo "You are on Duke Server: DCC"
    module load Bedtools
fi

EOF

## Helper functions available

In [3]:
cat ${FD_EXE}/config_func.sh

### https://unix.stackexchange.com/questions/72661/show-sum-of-file-sizes-in-directory-listing
#dir () { ls -FaGlh "${@}" | awk '{ total += $4; print }; END { print total }'; }
dir() { 
    ls -lhaG --color=always "${@}" |\
    sed -re 's/^([^ ]* ){3}//' |\
    awk '{ total += $1; print }; END { print total }'
}

### https://unix.stackexchange.com/questions/27013/displaying-seconds-as-days-hours-mins-seconds
function displaytime {
  local T=$1
  local D=$((T/60/60/24))
  local H=$((T/60/60%24))
  local M=$((T/60%60))
  local S=$((T%60))
  (( $D > 0 )) && printf '%d days ' $D
  (( $H > 0 )) && printf '%d hours ' $H
  (( $M > 0 )) && printf '%d minutes ' $M
  (( $D > 0 || $H > 0 || $M > 0 )) && printf 'and '
  printf '%d seconds\n' $S
}

### helper function
function fun_cat {
    FPATH=$1
    if (file ${FPATH} | grep -q compressed); then
        zcat ${FPATH}
    else
        cat  ${FPATH}
    fi
}

## Test script for bedtools

In [14]:
cat > ${FD_EXE}/run_bedtools_test.sh << 'EOF'
#!/bin/bash

### print start message
timer_start=`date +%s`
echo "Hostname:          " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### setup env:
###     get project root path
###     load helper functions
FD_PRJ=$1
FD_EXE=${FD_PRJ}/scripts
source ${FD_EXE}/config_func.sh

### setup input and output
FP_INP=$2

### show input file
echo "Input: " ${FP_INP}
echo
echo "show first few lines of input"
fun_cat ${FP_INP} | head
echo

### execute
bedtools --help
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

chmod +x ${FD_EXE}/run_bedtools_test.sh

## Intersection

In [None]:
cat > ${FD_EXE}/run_bedtools_intersect.sh << 'EOF'
#!/bin/bash

### print start message
timer_start=`date +%s`
echo "Hostname:          " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### setup env: load helper function
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
source ${SCRIPT_DIR}/config_func.sh

### setup input and output
FP_INP1=$1
FP_INP2=$2
FP_OUT=$3

### show input file
echo "Input: " ${FP_INP1}
echo
echo "show first few lines of input"
fun_cat ${FP_INP1} | head
echo
echo "Input: " ${FP_INP2}
echo
echo "show first few lines of input"
fun_cat ${FP_INP2} | head
echo

### execute
bedtools intersect \
    -a ${FP_INP1} \
    -b ${FP_INP2} \
    -wo \
| gzip -c \
> ${FP_OUT}

### show output file
echo
echo "Output: " ${FP_OUT}
echo
echo "show first few lines of output:"
fun_cat ${FP_OUT} | head
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

chmod +x ${FD_EXE}/run_bedtools_intersect.sh

## Coverage

In [15]:
cat > ${FD_EXE}/run_bedtools_coverage.sh << 'EOF'
#!/bin/bash

### print start message
timer_start=`date +%s`
echo "Hostname:          " $(hostname)
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### setup env:
###     get project root path
###     load helper functions
FD_PRJ=$1
FD_EXE=${FD_PRJ}/scripts
source ${FD_EXE}/config_func.sh

### setup input and output
FP_INP1=$2
FP_INP2=$3
FP_OUT=$4

### show input file
echo "Input: " ${FP_INP1}
echo
echo "show first few lines of input"
fun_cat ${FP_INP1} | head
echo
echo "Input: " ${FP_INP2}
echo
echo "show first few lines of input"
fun_cat ${FP_INP2} | head
echo

### execute
bedtools map \
    -a ${FP_INP1} \
    -b ${FP_INP2} \
    -o sum \
    -f 0.5 \
    -F 0.5 \
    -e \
| gzip -c \
> ${FP_OUT}

### show output file
echo
echo "Output: " ${FP_OUT}
echo
echo "show first few lines of output:"
fun_cat ${FP_OUT} | head
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

chmod +x ${FD_EXE}/run_bedtools_coverage.sh

## Annotation