# kmer count in PER1 region

## Count fragments for Input

### Test looping input directories

In [2]:
%%bash
FD_BASE=/work/kk319
FD_WRK=${FD_BASE}/out/CombEffect_STARR
FD_BEDS=($(ls -d ${FD_WRK}/data/Input*/))
FD_OUT=${FD_WRK}/kmer
for FD_BED in ${FD_BEDS[@]}; do
    echo $(basename ${FD_BED})
    echo ${FD_BED}
    echo ${FD_OUT}/kmer_PER1_$(basename ${FD_BED}).bed
done

Input
/work/kk319/out/CombEffect_STARR/data/Input/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_Input.bed
Input1
/work/kk319/out/CombEffect_STARR/data/Input1/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_Input1.bed
Input2
/work/kk319/out/CombEffect_STARR/data/Input2/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_Input2.bed
Input3
/work/kk319/out/CombEffect_STARR/data/Input3/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_Input3.bed
Input4
/work/kk319/out/CombEffect_STARR/data/Input4/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_Input4.bed
Input5
/work/kk319/out/CombEffect_STARR/data/Input5/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_Input5.bed


### Intersect K-MER (PER1) and for each input (chr17)

In [35]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-5 \
    --mem 8G \
    -o ${FD_LOG}/kmer_count_per1_input_chr17.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
FD_BEDS=($(ls -d ${FD_WRK}/data/Input*/))
CHROM=chr17
TARGET=PER1

### set input files
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed

FD_KMR=${FD_WRK}/kmer
FN_KMR=kmer_${TARGET}.bed

### set output files
FD_OUT=${FD_KMR}/$(basename ${FD_BED})
FN_OUT=kmer_PER1.bedpe

FD_CNT=${FD_KMR}/$(basename ${FD_BED})
FN_CNT=kmer_PER1_count.bed

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_KMR}/${FN_KMR}
echo "Input  file:       " ${FD_BED}/${FN_BED}
echo "Output file:       " ${FD_OUT}/${FN_OUT}
echo "Output file:       " ${FD_CNT}/${FN_CNT}
echo
echo "Show the first few lines of the input file"
echo ${FD_BED}/${FN_BED}
head ${FD_BED}/${FN_BED}
echo
echo "Show the first few lines of the input file"
echo ${FD_KMR}/${FN_KMR}
head ${FD_KMR}/${FN_KMR}

### init: create folder
mkdir -p ${FD_OUT}

### intersect
FP_BED_A=${FD_KMR}/${FN_KMR}
FP_BED_B=${FD_BED}/${FN_BED}
FP_BED_O=${FD_OUT}/${FN_OUT}
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

### count the kmers
cat ${FP_BED_O} |\
    awk -F $'\t' '($7 == 750)' |\
    cut -f1-3 |\
    uniq -c   |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g' > ${FD_CNT}/${FN_CNT}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FD_OUT}/${FN_OUT}
head ${FD_OUT}/${FN_OUT}
echo
echo "Show the first few lines of the output file"
echo ${FD_CNT}/${FN_CNT}
head ${FD_CNT}/${FN_CNT}

EOF

Bedtools 2.27.1
Submitted batch job 11154960


**Check results**

In [37]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/kmer_count_per1_input_chr17.0.txt 

Slurm Array Index:  0
Input  file:        /work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
Input  file:        /work/kk319/out/CombEffect_STARR/data/Input//chr17.bed
Output file:        /work/kk319/out/CombEffect_STARR/kmer/Input/kmer_PER1.bedpe
Output file:        /work/kk319/out/CombEffect_STARR/kmer/Input/kmer_PER1_count.bed

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/data/Input//chr17.bed
chr17	201123	201925
chr17	201160	201968
chr17	201202	202074
chr17	201259	202052
chr17	201366	202130
chr17	206174	207307
chr17	206269	207286
chr17	206283	207194
chr17	206324	207359
chr17	206326	207274

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148010	8148760
chr17	8148015	8148765
chr17	8148020	8148770
chr17	8148025	8148775
chr17	8148030	8148780
chr17	8148035	8148785
chr17	8148040	8148790
chr17	8148045	8148795

Show the first few lines of the output file
/work/k

In [38]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/kmer_count_per1_input_chr17.5.txt 

Slurm Array Index:  5
Input  file:        /work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
Input  file:        /work/kk319/out/CombEffect_STARR/data/Input5//chr17.bed
Output file:        /work/kk319/out/CombEffect_STARR/kmer/Input5/kmer_PER1.bedpe
Output file:        /work/kk319/out/CombEffect_STARR/kmer/Input5/kmer_PER1_count.bed

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/data/Input5//chr17.bed
chr17	197679	198597
chr17	201155	201967
chr17	201364	202119
chr17	205678	206636
chr17	206243	207269
chr17	206253	207246
chr17	206400	207470
chr17	206554	207585
chr17	206559	207627
chr17	206945	207728

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148010	8148760
chr17	8148015	8148765
chr17	8148020	8148770
chr17	8148025	8148775
chr17	8148030	8148780
chr17	8148035	8148785
chr17	8148040	8148790
chr17	8148045	8148795

Show the first few lines of the output file
/wo

## Test code for counting fragment

In [40]:
%%bash
fpath=/work/kk319/out/CombEffect_STARR/kmer/Input5/kmer_PER1.bedpe
head -n 30 $fpath | awk -F $'\t' '($7 == 750)'

chr17	8148000	8148750	chr17	8147785	8148799	750
chr17	8148000	8148750	chr17	8147810	8148759	750
chr17	8148000	8148750	chr17	8147918	8148919	750
chr17	8148000	8148750	chr17	8147950	8149020	750
chr17	8148005	8148755	chr17	8147785	8148799	750
chr17	8148005	8148755	chr17	8147810	8148759	750
chr17	8148005	8148755	chr17	8147918	8148919	750
chr17	8148005	8148755	chr17	8147950	8149020	750


In [41]:
%%bash
fpath=/work/kk319/out/CombEffect_STARR/kmer/Input5/kmer_PER1.bedpe
head -n 30 $fpath | awk -F $'\t' '($7 == 750)' | cut -f1-3

chr17	8148000	8148750
chr17	8148000	8148750
chr17	8148000	8148750
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148005	8148755
chr17	8148005	8148755
chr17	8148005	8148755


In [42]:
%%bash
fpath=/work/kk319/out/CombEffect_STARR/kmer/Input5/kmer_PER1.bedpe
head -n 30 $fpath | awk -F $'\t' '($7 == 750)' |\
    cut -f1-3 |\
    uniq -c   |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g'

chr17	8148000	8148750	4
chr17	8148005	8148755	4


## Count fragments for Output (DMSO)

### Test looping output (DMSO) directories

In [25]:
%%bash
FD_BASE=/work/kk319
FD_WRK=${FD_BASE}/out/CombEffect_STARR
FD_BEDS=($(ls -d ${FD_WRK}/data/TFX*_DMSO/))
FD_OUT=${FD_WRK}/kmer
for FD_BED in ${FD_BEDS[@]}; do
    echo $(basename ${FD_BED})
    echo ${FD_BED}
    echo ${FD_OUT}/kmer_PER1_$(basename ${FD_BED}).bed
done

TFX2_DMSO
/work/kk319/out/CombEffect_STARR/data/TFX2_DMSO/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX2_DMSO.bed
TFX3_DMSO
/work/kk319/out/CombEffect_STARR/data/TFX3_DMSO/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX3_DMSO.bed
TFX4_DMSO
/work/kk319/out/CombEffect_STARR/data/TFX4_DMSO/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX4_DMSO.bed
TFX5_DMSO
/work/kk319/out/CombEffect_STARR/data/TFX5_DMSO/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX5_DMSO.bed
TFX_DMSO
/work/kk319/out/CombEffect_STARR/data/TFX_DMSO/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX_DMSO.bed


### Intersect K-MER (PER1) and for each DMSO (chr17)

In [43]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-4 \
    --mem 8G \
    -o ${FD_LOG}/kmer_count_per1_output_dmso_chr17.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
FD_BEDS=($(ls -d ${FD_WRK}/data/TFX*_DMSO/))
CHROM=chr17
TARGET=PER1

### set input files
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed

FD_KMR=${FD_WRK}/kmer
FN_KMR=kmer_${TARGET}.bed

### set output files
FD_OUT=${FD_KMR}/$(basename ${FD_BED})
FN_OUT=kmer_PER1.bedpe

FD_CNT=${FD_KMR}/$(basename ${FD_BED})
FN_CNT=kmer_PER1_count.bed

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_KMR}/${FN_KMR}
echo "Input  file:       " ${FD_BED}/${FN_BED}
echo "Output file:       " ${FD_OUT}/${FN_OUT}
echo "Output file:       " ${FD_CNT}/${FN_CNT}
echo
echo "Show the first few lines of the input file"
echo ${FD_BED}/${FN_BED}
head ${FD_BED}/${FN_BED}
echo
echo "Show the first few lines of the input file"
echo ${FD_KMR}/${FN_KMR}
head ${FD_KMR}/${FN_KMR}

### init: create folder
mkdir -p ${FD_OUT}

### intersect
FP_BED_A=${FD_KMR}/${FN_KMR}
FP_BED_B=${FD_BED}/${FN_BED}
FP_BED_O=${FD_OUT}/${FN_OUT}
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

### count the kmers
cat ${FP_BED_O} |\
    awk -F $'\t' '($7 == 750)' |\
    cut -f1-3 |\
    uniq -c   |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g' > ${FD_CNT}/${FN_CNT}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FD_OUT}/${FN_OUT}
head ${FD_OUT}/${FN_OUT}
echo
echo "Show the first few lines of the output file"
echo ${FD_CNT}/${FN_CNT}
head ${FD_CNT}/${FN_CNT}

EOF

Bedtools 2.27.1
Submitted batch job 11155346


**Check results**

In [45]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/kmer_count_per1_output_dmso_chr17.0.txt 

Slurm Array Index:  0
Input  file:        /work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
Input  file:        /work/kk319/out/CombEffect_STARR/data/TFX2_DMSO//chr17.bed
Output file:        /work/kk319/out/CombEffect_STARR/kmer/TFX2_DMSO/kmer_PER1.bedpe
Output file:        /work/kk319/out/CombEffect_STARR/kmer/TFX2_DMSO/kmer_PER1_count.bed

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/data/TFX2_DMSO//chr17.bed
chr17	87067	87989
chr17	158043	159067
chr17	158043	159066
chr17	159137	160020
chr17	170572	172531
chr17	172392	173515
chr17	172393	173515
chr17	172396	173514
chr17	197679	198597
chr17	197681	198597

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148010	8148760
chr17	8148015	8148765
chr17	8148020	8148770
chr17	8148025	8148775
chr17	8148030	8148780
chr17	8148035	8148785
chr17	8148040	8148790
chr17	8148045	8148795

Show the first few lines of the outpu

In [46]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/kmer_count_per1_output_dmso_chr17.4.txt 

Slurm Array Index:  4
Input  file:        /work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
Input  file:        /work/kk319/out/CombEffect_STARR/data/TFX_DMSO//chr17.bed
Output file:        /work/kk319/out/CombEffect_STARR/kmer/TFX_DMSO/kmer_PER1.bedpe
Output file:        /work/kk319/out/CombEffect_STARR/kmer/TFX_DMSO/kmer_PER1_count.bed

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/data/TFX_DMSO//chr17.bed
chr17	87067	87989
chr17	158043	159067
chr17	158043	159066
chr17	159137	160020
chr17	170572	172531
chr17	172392	173515
chr17	172393	173515
chr17	172396	173514
chr17	197679	198597
chr17	197681	198597

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148010	8148760
chr17	8148015	8148765
chr17	8148020	8148770
chr17	8148025	8148775
chr17	8148030	8148780
chr17	8148035	8148785
chr17	8148040	8148790
chr17	8148045	8148795

Show the first few lines of the output fi

## Count fragments for Output (Dex)

### Test looping output (Dex) directories

In [29]:
%%bash
FD_BASE=/work/kk319
FD_WRK=${FD_BASE}/out/CombEffect_STARR
FD_BEDS=($(ls -d ${FD_WRK}/data/TFX*_Dex/))
FD_OUT=${FD_WRK}/kmer
for FD_BED in ${FD_BEDS[@]}; do
    echo $(basename ${FD_BED})
    echo ${FD_BED}
    echo ${FD_OUT}/kmer_PER1_$(basename ${FD_BED}).bed
done

TFX2_Dex
/work/kk319/out/CombEffect_STARR/data/TFX2_Dex/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX2_Dex.bed
TFX3_Dex
/work/kk319/out/CombEffect_STARR/data/TFX3_Dex/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX3_Dex.bed
TFX4_Dex
/work/kk319/out/CombEffect_STARR/data/TFX4_Dex/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX4_Dex.bed
TFX5_Dex
/work/kk319/out/CombEffect_STARR/data/TFX5_Dex/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX5_Dex.bed
TFX_Dex
/work/kk319/out/CombEffect_STARR/data/TFX_Dex/
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1_TFX_Dex.bed


### Intersect K-MER (PER1) and for each Dex (chr17)

In [44]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-4 \
    --mem 8G \
    -o ${FD_LOG}/kmer_count_per1_output_dex_chr17.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
FD_BEDS=($(ls -d ${FD_WRK}/data/TFX*_Dex/))
CHROM=chr17
TARGET=PER1

### set input files
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed

FD_KMR=${FD_WRK}/kmer
FN_KMR=kmer_${TARGET}.bed

### set output files
FD_OUT=${FD_KMR}/$(basename ${FD_BED})
FN_OUT=kmer_PER1.bedpe

FD_CNT=${FD_KMR}/$(basename ${FD_BED})
FN_CNT=kmer_PER1_count.bed

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_KMR}/${FN_KMR}
echo "Input  file:       " ${FD_BED}/${FN_BED}
echo "Output file:       " ${FD_OUT}/${FN_OUT}
echo "Output file:       " ${FD_CNT}/${FN_CNT}
echo
echo "Show the first few lines of the input file"
echo ${FD_BED}/${FN_BED}
head ${FD_BED}/${FN_BED}
echo
echo "Show the first few lines of the input file"
echo ${FD_KMR}/${FN_KMR}
head ${FD_KMR}/${FN_KMR}

### init: create folder
mkdir -p ${FD_OUT}

### intersect
FP_BED_A=${FD_KMR}/${FN_KMR}
FP_BED_B=${FD_BED}/${FN_BED}
FP_BED_O=${FD_OUT}/${FN_OUT}
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

### count the kmers
cat ${FP_BED_O} |\
    awk -F $'\t' '($7 == 750)' |\
    cut -f1-3 |\
    uniq -c   |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g' > ${FD_CNT}/${FN_CNT}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FD_OUT}/${FN_OUT}
head ${FD_OUT}/${FN_OUT}
echo
echo "Show the first few lines of the output file"
echo ${FD_CNT}/${FN_CNT}
head ${FD_CNT}/${FN_CNT}

EOF

Bedtools 2.27.1
Submitted batch job 11155353


**Check results**

In [47]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/kmer_count_per1_output_dex_chr17.0.txt 

Slurm Array Index:  0
Input  file:        /work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
Input  file:        /work/kk319/out/CombEffect_STARR/data/TFX2_Dex//chr17.bed
Output file:        /work/kk319/out/CombEffect_STARR/kmer/TFX2_Dex/kmer_PER1.bedpe
Output file:        /work/kk319/out/CombEffect_STARR/kmer/TFX2_Dex/kmer_PER1_count.bed

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/data/TFX2_Dex//chr17.bed
chr17	83638	84547
chr17	92503	93508
chr17	152590	153715
chr17	159027	160041
chr17	173500	174429
chr17	174388	175345
chr17	174388	175346
chr17	197582	198583
chr17	201248	202059
chr17	201249	202059

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148010	8148760
chr17	8148015	8148765
chr17	8148020	8148770
chr17	8148025	8148775
chr17	8148030	8148780
chr17	8148035	8148785
chr17	8148040	8148790
chr17	8148045	8148795

Show the first few lines of the output file

In [48]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/kmer_count_per1_output_dex_chr17.4.txt 

Slurm Array Index:  4
Input  file:        /work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
Input  file:        /work/kk319/out/CombEffect_STARR/data/TFX_Dex//chr17.bed
Output file:        /work/kk319/out/CombEffect_STARR/kmer/TFX_Dex/kmer_PER1.bedpe
Output file:        /work/kk319/out/CombEffect_STARR/kmer/TFX_Dex/kmer_PER1_count.bed

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/data/TFX_Dex//chr17.bed
chr17	83638	84547
chr17	92503	93508
chr17	152590	153715
chr17	159027	160041
chr17	173500	174429
chr17	174388	175345
chr17	174388	175346
chr17	197582	198583
chr17	201248	202059
chr17	201249	202059

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/kmer/kmer_PER1.bed
chr17	8148000	8148750
chr17	8148005	8148755
chr17	8148010	8148760
chr17	8148015	8148765
chr17	8148020	8148770
chr17	8148025	8148775
chr17	8148030	8148780
chr17	8148035	8148785
chr17	8148040	8148790
chr17	8148045	8148795
