# kmer count in PER1 region

## Count fragments for Input

### Test looping input directories

In [17]:
%%bash
FD_BASE=/work/kk319
FD_WRK=${FD_BASE}/out/CombEffect_STARR
FD_BEDS=($(ls -d ${FD_WRK}/bootstrap/Input*/))
FD_KMR=${FD_WRK}/kmer
CHROM=chr17
TARGET=PER1

### the input kmer
ls ${FD_KMR}/kmer_${TARGET}.bed
echo ++++++++++++++++++++++++++++++++++++++

for FD_BED in ${FD_BEDS[@]}; do
    FD_OUT=${FD_WRK}/kmer/$(basename ${FD_BED})
    
    echo $(basename ${FD_BED})
    echo ${FD_BED}
    echo ${FD_OUT}
    
    for FN_BED in $(ls ${FD_BED}); do
        echo "    Input:" ${FN_BED}
        echo "   PREFIX:" ${FN_BED%.*}
        echo "   Output:" kmer_${TARGET}_${FN_BED%.*}.bedpe
        echo "   Output:" kmer_${TARGET}_${FN_BED%.*}_count.bed
    done
    echo ++++++++++++++++++++++++++++++++++++++
done

/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
++++++++++++++++++++++++++++++++++++++
Input
/work/kk319/out/CombEffect_STARR/bootstrap/Input/
/work/kk319/out/CombEffect_STARR/kmer/Input
    Input: chr17_B1.bed
   PREFIX: chr17_B1
   Output: kmer_PER1_chr17_B1.bedpe
   Output: kmer_PER1_chr17_B1_count.bed
    Input: chr17_B10.bed
   PREFIX: chr17_B10
   Output: kmer_PER1_chr17_B10.bedpe
   Output: kmer_PER1_chr17_B10_count.bed
    Input: chr17_B2.bed
   PREFIX: chr17_B2
   Output: kmer_PER1_chr17_B2.bedpe
   Output: kmer_PER1_chr17_B2_count.bed
    Input: chr17_B3.bed
   PREFIX: chr17_B3
   Output: kmer_PER1_chr17_B3.bedpe
   Output: kmer_PER1_chr17_B3_count.bed
    Input: chr17_B4.bed
   PREFIX: chr17_B4
   Output: kmer_PER1_chr17_B4.bedpe
   Output: kmer_PER1_chr17_B4_count.bed
    Input: chr17_B5.bed
   PREFIX: chr17_B5
   Output: kmer_PER1_chr17_B5.bedpe
   Output: kmer_PER1_chr17_B5_count.bed
    Input: chr17_B6.bed
   PREFIX: chr17_B6
   Output: kmer_PER1_chr17_B6.bedpe
   Ou

### Intersect K-MER (PER1) and for each input (chr17)

In [26]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-5 \
    --mem 8G \
    -o ${FD_LOG}/kmer_count_per1_input_chr17_bstrp.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
#FD_BEDS=($(ls -d ${FD_WRK}/data/Input*/))
FD_BEDS=($(ls -d ${FD_WRK}/bootstrap/Input*/))
CHROM=chr17
TARGET=PER1

### set input files
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FD_KMR=${FD_WRK}/kmer
FN_KMR=kmer_${TARGET}.bed

### set output
FD_OUT=${FD_WRK}/kmer/$(basename ${FD_BED})

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_KMR}/${FN_KMR}
echo
echo "Show the first few lines of the input file"
echo ${FD_KMR}/${FN_KMR}
head ${FD_KMR}/${FN_KMR}


### init: create folder
mkdir -p ${FD_OUT}

### loop through each bootstrap sample
for FN_BED in $(ls ${FD_BED}); do
    ### set output file
    FN_OUT=kmer_${TARGET}_${FN_BED%.*}.bedpe
    FN_CNT=kmer_${TARGET}_${FN_BED%.*}_count.bed
    echo ++++++++++++++++++++++++++++++++++++++
    echo "Input  file: " ${FD_BED}/${FN_BED}
    echo "Output file: " ${FD_OUT}/${FN_OUT}
    echo "Output file: " ${FD_OUT}/${FN_CNT}
    
    ### intersect
    FP_BED_A=${FD_KMR}/${FN_KMR}
    FP_BED_B=${FD_BED}/${FN_BED}
    FP_BED_O=${FD_OUT}/${FN_OUT}
    bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}
    
    ### count the kmers
    cat ${FP_BED_O} |\
        awk -F $'\t' '($7 == 750)' |\
        cut -f1-3 |\
        uniq -c   |\
        awk '{$(NF+1)=$1;$1=""}1'   |\
        sed -e 's/^[[:space:]]*//'  |\
        sed -e 's/[[:space:]]/\t/g' > ${FD_OUT}/${FN_CNT}
    
    ### print end message
    echo
    echo "Show the first few lines of the output file"
    echo ${FD_OUT}/${FN_OUT}
    head -n 3 ${FD_OUT}/${FN_OUT}
    echo
    echo "Show the first few lines of the output file"
    echo ${FD_CNT}/${FN_CNT}
    head -n 3 ${FD_OUT}/${FN_CNT}
    echo
done

echo
echo "Done!"

EOF

Bedtools 2.27.1
Submitted batch job 11876538


**Check results**

In [25]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/kmer_count_per1_input_chr17_bstrp.0.txt 

Slurm Array Index:  0
Input  file:        /work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148010	8148760
chr17	8148015	8148765
chr17	8148020	8148770
chr17	8148025	8148775
chr17	8148030	8148780
chr17	8148035	8148785
chr17	8148040	8148790
chr17	8148045	8148795
++++++++++++++++++++++++++++++++++++++
Input  file:  /work/kk319/out/CombEffect_STARR/bootstrap/Input//chr17_B1.bed
Output file:  /work/kk319/out/CombEffect_STARR/kmer/Input/kmer_PER1_chr17_B1.bedpe
Output file:  /work/kk319/out/CombEffect_STARR/kmer/Input/kmer_PER1_chr17_B1_count.bed
/var/spool/slurmd/job11876216/slurm_script: line 52: /kmer_PER1_chr17_B1_count.bed: Permission denied

Show the first few lines of the output file
/work/kk319/out/CombEffect_STARR/kmer/Input/kmer_PER1_chr17_B1.bedpe
chr17	8148000	8148750	chr17	8146974	8148006	6
chr17	8148000	8148750	chr17	8146974	814800

## Count fragments for Output (DMSO)

### Intersect K-MER (PER1) and for each DMSO (chr17)

In [27]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-5 \
    --mem 8G \
    -o ${FD_LOG}/kmer_count_per1_output_dmso_chr17_bstrp.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
#FD_BEDS=($(ls -d ${FD_WRK}/data/Input*/))
FD_BEDS=($(ls -d ${FD_WRK}/bootstrap/TFX*_DMSO/))
CHROM=chr17
TARGET=PER1

### set input files
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FD_KMR=${FD_WRK}/kmer
FN_KMR=kmer_${TARGET}.bed

### set output
FD_OUT=${FD_WRK}/kmer/$(basename ${FD_BED})

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_KMR}/${FN_KMR}
echo
echo "Show the first few lines of the input file"
echo ${FD_KMR}/${FN_KMR}
head ${FD_KMR}/${FN_KMR}


### init: create folder
mkdir -p ${FD_OUT}

### loop through each bootstrap sample
for FN_BED in $(ls ${FD_BED}); do
    ### set output file
    FN_OUT=kmer_${TARGET}_${FN_BED%.*}.bedpe
    FN_CNT=kmer_${TARGET}_${FN_BED%.*}_count.bed
    echo ++++++++++++++++++++++++++++++++++++++
    echo "Input  file: " ${FD_BED}/${FN_BED}
    echo "Output file: " ${FD_OUT}/${FN_OUT}
    echo "Output file: " ${FD_OUT}/${FN_CNT}
    
    ### intersect
    FP_BED_A=${FD_KMR}/${FN_KMR}
    FP_BED_B=${FD_BED}/${FN_BED}
    FP_BED_O=${FD_OUT}/${FN_OUT}
    bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}
    
    ### count the kmers
    cat ${FP_BED_O} |\
        awk -F $'\t' '($7 == 750)' |\
        cut -f1-3 |\
        uniq -c   |\
        awk '{$(NF+1)=$1;$1=""}1'   |\
        sed -e 's/^[[:space:]]*//'  |\
        sed -e 's/[[:space:]]/\t/g' > ${FD_OUT}/${FN_CNT}
    
    ### print end message
    echo
    echo "Show the first few lines of the output file"
    echo ${FD_OUT}/${FN_OUT}
    head -n 3 ${FD_OUT}/${FN_OUT}
    echo
    echo "Show the first few lines of the output file"
    echo ${FD_CNT}/${FN_CNT}
    head -n 3 ${FD_OUT}/${FN_CNT}
    echo
done

echo
echo "Done!"

EOF

Bedtools 2.27.1
Submitted batch job 11876588


**Check results**

In [30]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/kmer_count_per1_output_dmso_chr17_bstrp.0.txt 

Slurm Array Index:  0
Input  file:        /work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148010	8148760
chr17	8148015	8148765
chr17	8148020	8148770
chr17	8148025	8148775
chr17	8148030	8148780
chr17	8148035	8148785
chr17	8148040	8148790
chr17	8148045	8148795
++++++++++++++++++++++++++++++++++++++
Input  file:  /work/kk319/out/CombEffect_STARR/bootstrap/TFX2_DMSO//chr17_B1.bed
Output file:  /work/kk319/out/CombEffect_STARR/kmer/TFX2_DMSO/kmer_PER1_chr17_B1.bedpe
Output file:  /work/kk319/out/CombEffect_STARR/kmer/TFX2_DMSO/kmer_PER1_chr17_B1_count.bed

Show the first few lines of the output file
/work/kk319/out/CombEffect_STARR/kmer/TFX2_DMSO/kmer_PER1_chr17_B1.bedpe
chr17	8148000	8148750	chr17	8146914	8148026	26
chr17	8148000	8148750	chr17	8146916	8148025	25
chr17	8148000	8148750	chr17	8147292	8148313	313

Show the first few lines of the

## Count fragments for Output (Dex)

### Intersect K-MER (PER1) and for each Dex (chr17)

In [28]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-5 \
    --mem 8G \
    -o ${FD_LOG}/kmer_count_per1_output_dex_chr17_bstrp.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
#FD_BEDS=($(ls -d ${FD_WRK}/data/Input*/))
FD_BEDS=($(ls -d ${FD_WRK}/bootstrap/TFX*_Dex/))
CHROM=chr17
TARGET=PER1

### set input files
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FD_KMR=${FD_WRK}/kmer
FN_KMR=kmer_${TARGET}.bed

### set output
FD_OUT=${FD_WRK}/kmer/$(basename ${FD_BED})

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_KMR}/${FN_KMR}
echo
echo "Show the first few lines of the input file"
echo ${FD_KMR}/${FN_KMR}
head ${FD_KMR}/${FN_KMR}


### init: create folder
mkdir -p ${FD_OUT}

### loop through each bootstrap sample
for FN_BED in $(ls ${FD_BED}); do
    ### set output file
    FN_OUT=kmer_${TARGET}_${FN_BED%.*}.bedpe
    FN_CNT=kmer_${TARGET}_${FN_BED%.*}_count.bed
    echo ++++++++++++++++++++++++++++++++++++++
    echo "Input  file: " ${FD_BED}/${FN_BED}
    echo "Output file: " ${FD_OUT}/${FN_OUT}
    echo "Output file: " ${FD_OUT}/${FN_CNT}
    
    ### intersect
    FP_BED_A=${FD_KMR}/${FN_KMR}
    FP_BED_B=${FD_BED}/${FN_BED}
    FP_BED_O=${FD_OUT}/${FN_OUT}
    bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}
    
    ### count the kmers
    cat ${FP_BED_O} |\
        awk -F $'\t' '($7 == 750)' |\
        cut -f1-3 |\
        uniq -c   |\
        awk '{$(NF+1)=$1;$1=""}1'   |\
        sed -e 's/^[[:space:]]*//'  |\
        sed -e 's/[[:space:]]/\t/g' > ${FD_OUT}/${FN_CNT}
    
    ### print end message
    echo
    echo "Show the first few lines of the output file"
    echo ${FD_OUT}/${FN_OUT}
    head -n 3 ${FD_OUT}/${FN_OUT}
    echo
    echo "Show the first few lines of the output file"
    echo ${FD_CNT}/${FN_CNT}
    head -n 3 ${FD_OUT}/${FN_CNT}
    echo
done

echo
echo "Done!"

EOF

Bedtools 2.27.1
Submitted batch job 11876695


**Check results**

In [31]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/kmer_count_per1_output_dex_chr17_bstrp.0.txt 

Slurm Array Index:  0
Input  file:        /work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148010	8148760
chr17	8148015	8148765
chr17	8148020	8148770
chr17	8148025	8148775
chr17	8148030	8148780
chr17	8148035	8148785
chr17	8148040	8148790
chr17	8148045	8148795
++++++++++++++++++++++++++++++++++++++
Input  file:  /work/kk319/out/CombEffect_STARR/bootstrap/TFX2_Dex//chr17_B1.bed
Output file:  /work/kk319/out/CombEffect_STARR/kmer/TFX2_Dex/kmer_PER1_chr17_B1.bedpe
Output file:  /work/kk319/out/CombEffect_STARR/kmer/TFX2_Dex/kmer_PER1_chr17_B1_count.bed

Show the first few lines of the output file
/work/kk319/out/CombEffect_STARR/kmer/TFX2_Dex/kmer_PER1_chr17_B1.bedpe
chr17	8148000	8148750	chr17	8146961	8148071	71
chr17	8148000	8148750	chr17	8146961	8148071	71
chr17	8148000	8148750	chr17	8146961	8148071	71

Show the first few lines of the outp