# Connection between fragments and segments
count fragments that overlayed on top of each segment

In [1]:
%%bash
wc -l /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/chr17.bed
wc -l /data/reddylab/Kuei/out/CombEffect_STARR/count_fragment/TFX_Dex/cnt_PER1.bed
wc -l /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/seg_PER1.bed

6239658 /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/chr17.bed
4827 /data/reddylab/Kuei/out/CombEffect_STARR/count_fragment/TFX_Dex/cnt_PER1.bed
3562 /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/seg_PER1.bed


## Test loop: ensure the code that loop through output directories

In [2]:
%%bash
ls -d /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX?_Dex
ls    /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_Dex

/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_Dex
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX3_Dex
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX4_Dex
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX5_Dex
chr10.bed
chr11.bed
chr12.bed
chr13.bed
chr14.bed
chr15.bed
chr16.bed
chr17.bed
chr18.bed
chr19.bed
chr1.bed
chr20.bed
chr21.bed
chr22.bed
chr2.bed
chr3.bed
chr4.bed
chr5.bed
chr6.bed
chr7.bed
chr8.bed
chr9.bed
chrX.bed
chrY.bed


## intersect segments and fragments of Output (Dex)

### Test looping code

In [3]:
%%bash
for fpath in $(ls -d /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX?_Dex); do
    tmp=$(basename $fpath)
    echo $fpath, $tmp
done

/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_Dex, TFX2_Dex
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX3_Dex, TFX3_Dex
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX4_Dex, TFX4_Dex
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX5_Dex, TFX5_Dex


In [4]:
%%bash
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_BEDS=($(ls -d ${FD_WRK}/data/TFX?_Dex))
echo ${FD_BEDS[0]}
echo ${FD_BEDS[1]}
echo ${FD_BEDS[2]}
echo ${FD_BEDS[3]}

/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_Dex
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX3_Dex
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX4_Dex
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX5_Dex


### connect segments and fragments by `bedtools intersect`

In [11]:
%%bash
### set environment
module load bedtools2
module load perl
module load gcc
source /data/reddylab/software/miniconda2/bin/activate alex_dev
export PATH=/data/reddylab/software/homer/bin/:$PATH

### set log file directory
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log

### run script using sbatch
sbatch -pnew,all \
    --array 0-3 \
    --mem 8G \
    -o ${FD_LOG}/prep_count_segment_output_dex_per1.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR

### set global variables
CHROM=chr17
TARGET=PER1
SAMPLE=TFX_Dex

### set input and output 
FD_BEDS=($(ls -d ${FD_WRK}/data/TFX?_Dex))
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed
FD_SEG=${FD_WRK}/count_segment/${SAMPLE}
FN_SEG=seg_${TARGET}.bed
FNAME=$(basename ${FD_BED})

### Print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  Segment   file: " ${FD_SEG}/${FN_SEG}
echo "Input  Fragment  file: " ${FD_BED}/${FN_BED}
echo "Output Intersect file: " ${FD_SEG}/${TARGET}_${CHROM}_${FNAME}.bedpe
echo "Output Count     file: " ${FD_SEG}/cnt_${TARGET}_${CHROM}_${FNAME}.bed
echo
echo "Show the first few lines of the input file"
head ${FD_SEG}/${FN_SEG}
echo
echo "Show the first few lines of the input file"
head ${FD_BED}/${FN_BED}

### intersect
FP_BED_A=${FD_SEG}/${FN_SEG}
FP_BED_B=${FD_BED}/${FN_BED}
FP_BED_O=${FD_SEG}/${TARGET}_${CHROM}_${FNAME}.bedpe
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

### count segments
cat ${FP_BED_O} | cut -f1-3 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g'  \
    > ${FD_SEG}/cnt_${TARGET}_${CHROM}_${FNAME}.bed

### Print end message
echo
echo "Show the first few lines of the output file"
head ${FD_SEG}/${TARGET}_${CHROM}_${FNAME}.bedpe
echo
echo "Show the first few lines of the output file"
head ${FD_SEG}/cnt_${TARGET}_${CHROM}_${FNAME}.bed

EOF

Submitted batch job 25436394


### Check log

In [12]:
%%bash
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log
cat ${FD_LOG}/prep_count_segment_output_dex_per1.0.txt

Slurm Array Index:  0
Input  Segment   file:  /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/seg_PER1.bed
Input  Fragment  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_Dex/chr17.bed
Output Intersect file:  /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/PER1_chr17_TFX2_Dex.bedpe
Output Count     file:  /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/cnt_PER1_chr17_TFX2_Dex.bed

Show the first few lines of the input file
chr17	8148987	8148988
chr17	8148988	8149016
chr17	8149016	8149018
chr17	8149018	8149019
chr17	8149019	8149023
chr17	8149023	8149029
chr17	8149029	8149050
chr17	8149050	8149051
chr17	8149051	8149062
chr17	8149062	8149063

Show the first few lines of the input file
chr17	83638	84547
chr17	92503	93508
chr17	152590	153715
chr17	159027	160041
chr17	173500	174429
chr17	174388	175345
chr17	174388	175346
chr17	197582	198583
chr17	201248	202059
chr17	201249	202059

Show the first few lines of the output file
chr17	814898

## intersect segments and fragments of Output (DMSO)

### Test looping code

In [13]:
%%bash
for fpath in $(ls -d /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX?_DMSO); do
    tmp=$(basename $fpath)
    echo $fpath, $tmp
done

/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_DMSO, TFX2_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX3_DMSO, TFX3_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX4_DMSO, TFX4_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX5_DMSO, TFX5_DMSO


In [14]:
%%bash
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_DATS=($(ls -d ${FD_WRK}/data/TFX?_DMSO))
echo ${FD_DATS[0]}
echo ${FD_DATS[1]}
echo ${FD_DATS[2]}
echo ${FD_DATS[3]}

/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX3_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX4_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX5_DMSO


### connect segments and fragments by `bedtools intersect`

In [15]:
%%bash
### set environment
module load bedtools2
module load perl
module load gcc
source /data/reddylab/software/miniconda2/bin/activate alex_dev
export PATH=/data/reddylab/software/homer/bin/:$PATH

### set log file directory
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log

### run script using sbatch
sbatch -pnew,all \
    --array 0-3 \
    --mem 8G \
    -o ${FD_LOG}/prep_count_segment_output_dmso_per1.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR

### set global variables
CHROM=chr17
TARGET=PER1
SAMPLE=TFX_Dex

### set input and output 
FD_BEDS=($(ls -d ${FD_WRK}/data/TFX?_DMSO))
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed
FD_SEG=${FD_WRK}/count_segment/${SAMPLE}
FN_SEG=seg_${TARGET}.bed
FNAME=$(basename ${FD_BED})

### Print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  Segment   file: " ${FD_SEG}/${FN_SEG}
echo "Input  Fragment  file: " ${FD_BED}/${FN_BED}
echo "Output Intersect file: " ${FD_SEG}/${TARGET}_${CHROM}_${FNAME}.bedpe
echo "Output Count     file: " ${FD_SEG}/cnt_${TARGET}_${CHROM}_${FNAME}.bed
echo
echo "Show the first few lines of the input file"
head ${FD_SEG}/${FN_SEG}
echo
echo "Show the first few lines of the input file"
head ${FD_BED}/${FN_BED}

### intersect
FP_BED_A=${FD_SEG}/${FN_SEG}
FP_BED_B=${FD_BED}/${FN_BED}
FP_BED_O=${FD_SEG}/${TARGET}_${CHROM}_${FNAME}.bedpe
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

### count segments
cat ${FP_BED_O} | cut -f1-3 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g'  \
    > ${FD_SEG}/cnt_${TARGET}_${CHROM}_${FNAME}.bed

### Print end message
echo
echo "Show the first few lines of the output file"
head ${FD_SEG}/${TARGET}_${CHROM}_${FNAME}.bedpe
echo
echo "Show the first few lines of the output file"
head ${FD_SEG}/cnt_${TARGET}_${CHROM}_${FNAME}.bed

EOF

Submitted batch job 25436398


### Check log

In [17]:
%%bash
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log
cat ${FD_LOG}/prep_count_segment_output_dmso_per1.0.txt

Slurm Array Index:  0
Input  Segment   file:  /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/seg_PER1.bed
Input  Fragment  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_DMSO/chr17.bed
Output Intersect file:  /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/PER1_chr17_TFX2_DMSO.bedpe
Output Count     file:  /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/cnt_PER1_chr17_TFX2_DMSO.bed

Show the first few lines of the input file
chr17	8148987	8148988
chr17	8148988	8149016
chr17	8149016	8149018
chr17	8149018	8149019
chr17	8149019	8149023
chr17	8149023	8149029
chr17	8149029	8149050
chr17	8149050	8149051
chr17	8149051	8149062
chr17	8149062	8149063

Show the first few lines of the input file
chr17	87067	87989
chr17	158043	159067
chr17	158043	159066
chr17	159137	160020
chr17	170572	172531
chr17	172392	173515
chr17	172393	173515
chr17	172396	173514
chr17	197679	198597
chr17	197681	198597

Show the first few lines of the output file
chr17	8

## intersect segments and fragments of Input

### Test looping code

In [19]:
%%bash
for fpath in $(ls -d /data/reddylab/Kuei/out/CombEffect_STARR/data/Input?); do
    tmp=$(basename $fpath)
    echo $fpath, $tmp
done

/data/reddylab/Kuei/out/CombEffect_STARR/data/Input1, Input1
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input2, Input2
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input3, Input3
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input4, Input4
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input5, Input5


### connect segments and fragments by `bedtools intersect`

In [16]:
%%bash
### set environment
module load bedtools2
module load perl
module load gcc
source /data/reddylab/software/miniconda2/bin/activate alex_dev
export PATH=/data/reddylab/software/homer/bin/:$PATH

### set log file directory
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log

### run script using sbatch
sbatch -pnew,all \
    --array 0-4 \
    --mem 8G \
    -o ${FD_LOG}/prep_count_segment_input_per1.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR

### set global variables
CHROM=chr17
TARGET=PER1
SAMPLE=TFX_Dex

### set input and output 
FD_BEDS=($(ls -d ${FD_WRK}/data/Input?))
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed
FD_SEG=${FD_WRK}/count_segment/${SAMPLE}
FN_SEG=seg_${TARGET}.bed
FNAME=$(basename ${FD_BED})

### Print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  Segment   file: " ${FD_SEG}/${FN_SEG}
echo "Input  Fragment  file: " ${FD_BED}/${FN_BED}
echo "Output Intersect file: " ${FD_SEG}/${TARGET}_${CHROM}_${FNAME}.bedpe
echo "Output Count     file: " ${FD_SEG}/cnt_${TARGET}_${CHROM}_${FNAME}.bed
echo
echo "Show the first few lines of the input file"
head ${FD_SEG}/${FN_SEG}
echo
echo "Show the first few lines of the input file"
head ${FD_BED}/${FN_BED}

### intersect
FP_BED_A=${FD_SEG}/${FN_SEG}
FP_BED_B=${FD_BED}/${FN_BED}
FP_BED_O=${FD_SEG}/${TARGET}_${CHROM}_${FNAME}.bedpe
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

### count segments
cat ${FP_BED_O} | cut -f1-3 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g'  \
    > ${FD_SEG}/cnt_${TARGET}_${CHROM}_${FNAME}.bed

### Print end message
echo
echo "Show the first few lines of the output file"
head ${FD_SEG}/${TARGET}_${CHROM}_${FNAME}.bedpe
echo
echo "Show the first few lines of the output file"
head ${FD_SEG}/cnt_${TARGET}_${CHROM}_${FNAME}.bed

EOF

Submitted batch job 25436402


### Check log

In [18]:
%%bash
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log
cat ${FD_LOG}/prep_count_segment_input_per1.0.txt

Slurm Array Index:  0
Input  Segment   file:  /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/seg_PER1.bed
Input  Fragment  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input1/chr17.bed
Output Intersect file:  /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/PER1_chr17_Input1.bedpe
Output Count     file:  /data/reddylab/Kuei/out/CombEffect_STARR/count_segment/TFX_Dex/cnt_PER1_chr17_Input1.bed

Show the first few lines of the input file
chr17	8148987	8148988
chr17	8148988	8149016
chr17	8149016	8149018
chr17	8149018	8149019
chr17	8149019	8149023
chr17	8149023	8149029
chr17	8149029	8149050
chr17	8149050	8149051
chr17	8149051	8149062
chr17	8149062	8149063

Show the first few lines of the input file
chr17	201123	201925
chr17	201160	201968
chr17	201202	202074
chr17	201259	202052
chr17	201366	202130
chr17	206174	207307
chr17	206269	207286
chr17	206283	207194
chr17	206324	207359
chr17	206326	207274

Show the first few lines of the output file
chr17	8148987	

In [43]:
%%bash
### set environment
module load bedtools2
module load perl
module load gcc
source /data/reddylab/software/miniconda2/bin/activate alex_dev
export PATH=/data/reddylab/software/homer/bin/:$PATH

### set log file directory
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log

### run script using sbatch
sbatch -pnew,all \
    --array 0-3 \
    --mem 8G \
    -o ${FD_LOG}/prep_count_dex_test.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_OUT=${FD_WRK}/data/TFX_Dex
FD_DATS=($(ls -d ${FD_WRK}/data/TFX?_Dex))
CHROM=chr17

### set input and output file names
FD_DAT=${FD_DATS[${SLURM_ARRAY_TASK_ID}]}
FN_SEG=seg_${CHROM}.bed
FNAME=$(basename ${FD_DAT})

### Print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  Segment   file: " ${FD_OUT}/${FN_SEG}
echo "Input  Fragment  file: " ${FD_DAT}/${CHROM}.bed
echo "Output Intersect file: " ${FD_OUT}/${CHROM}_${FNAME}.bedpe
echo "Output Count     file: " ${FD_OUT}/cnt_${CHROM}_${FNAME}.bed
echo
echo "Show the first few lines of the input file"
head ${FD_OUT}/${FN_SEG}
echo
echo "Show the first few lines of the input file"
head ${FD_DAT}/${CHROM}.bed

###
FP_BED_A=${FD_OUT}/${FN_SEG}
FP_BED_B=${FD_DAT}/${CHROM}.bed
FP_BED_O=${FD_OUT}/${CHROM}_${FNAME}.bedpe
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

###
cat ${FP_BED_O} | cut -f1-3 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g'  \
    > ${FD_OUT}/cnt_${CHROM}_${FNAME}.bed

### Print end message
echo
echo "Show the first few lines of the output file"
head ${FD_OUT}/${CHROM}_${FNAME}.bedpe
echo
echo "Show the first few lines of the output file"
head ${FD_OUT}/cnt_${CHROM}_${FNAME}.bed

EOF

Submitted batch job 25408779


### Check log

In [44]:
%%bash
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log
cat ${FD_LOG}/prep_count_dex_test.0.txt

Slurm Array Index:  0
Input  Segment   file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/seg_chr17.bed
Input  Fragment  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_Dex/chr17.bed
Output Intersect file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/chr17_TFX2_Dex.bedpe
Output Count     file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/cnt_chr17_TFX2_Dex.bed

Show the first few lines of the input file
chr17	8148987	8148988
chr17	8148988	8149016
chr17	8149016	8149018
chr17	8149018	8149019
chr17	8149019	8149023
chr17	8149023	8149029
chr17	8149029	8149050
chr17	8149050	8149051
chr17	8149051	8149062
chr17	8149062	8149063

Show the first few lines of the input file
chr17	83638	84547
chr17	92503	93508
chr17	152590	153715
chr17	159027	160041
chr17	173500	174429
chr17	174388	175345
chr17	174388	175346
chr17	197582	198583
chr17	201248	202059
chr17	201249	202059

Show the first few lines of the output file
chr17	8148987	8148988	chr17	8147924	8149001	1
ch

## intersect segments and fragments of Output (DMSO)

### Test looping code

In [45]:
%%bash
for fpath in $(ls -d /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX?_DMSO); do
    tmp=$(basename $fpath)
    echo $fpath, $tmp
done

/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_DMSO, TFX2_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX3_DMSO, TFX3_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX4_DMSO, TFX4_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX5_DMSO, TFX5_DMSO


In [46]:
%%bash
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_DATS=($(ls -d ${FD_WRK}/data/TFX?_DMSO))
echo ${FD_DATS[0]}
echo ${FD_DATS[1]}
echo ${FD_DATS[2]}
echo ${FD_DATS[3]}

/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX3_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX4_DMSO
/data/reddylab/Kuei/out/CombEffect_STARR/data/TFX5_DMSO


### connect segments and fragments by `bedtools intersect`

In [47]:
%%bash
### set environment
module load bedtools2
module load perl
module load gcc
source /data/reddylab/software/miniconda2/bin/activate alex_dev
export PATH=/data/reddylab/software/homer/bin/:$PATH

### set log file directory
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log

### run script using sbatch
sbatch -pnew,all \
    --array 0-3 \
    --mem 8G \
    -o ${FD_LOG}/prep_count_dmso_test.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_OUT=${FD_WRK}/data/TFX_Dex
FD_DATS=($(ls -d ${FD_WRK}/data/TFX?_DMSO))
CHROM=chr17

### set input and output file names
FD_DAT=${FD_DATS[${SLURM_ARRAY_TASK_ID}]}
FN_SEG=seg_${CHROM}.bed
FNAME=$(basename ${FD_DAT})

### Print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  Segment   file: " ${FD_OUT}/${FN_SEG}
echo "Input  Fragment  file: " ${FD_DAT}/${CHROM}.bed
echo "Output Intersect file: " ${FD_OUT}/${CHROM}_${FNAME}.bedpe
echo "Output Count     file: " ${FD_OUT}/cnt_${CHROM}_${FNAME}.bed
echo
echo "Show the first few lines of the input file"
head ${FD_OUT}/${FN_SEG}
echo
echo "Show the first few lines of the input file"
head ${FD_DAT}/${CHROM}.bed

###
FP_BED_A=${FD_OUT}/${FN_SEG}
FP_BED_B=${FD_DAT}/${CHROM}.bed
FP_BED_O=${FD_OUT}/${CHROM}_${FNAME}.bedpe
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

###
cat ${FP_BED_O} | cut -f1-3 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g'  \
    > ${FD_OUT}/cnt_${CHROM}_${FNAME}.bed

### Print end message
echo
echo "Show the first few lines of the output file"
head ${FD_OUT}/${CHROM}_${FNAME}.bedpe
echo
echo "Show the first few lines of the output file"
head ${FD_OUT}/cnt_${CHROM}_${FNAME}.bed

EOF

Submitted batch job 25408801


### Check log

In [48]:
%%bash
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log
cat ${FD_LOG}/prep_count_dmso_test.0.txt

Slurm Array Index:  0
Input  Segment   file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/seg_chr17.bed
Input  Fragment  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX2_DMSO/chr17.bed
Output Intersect file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/chr17_TFX2_DMSO.bedpe
Output Count     file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/cnt_chr17_TFX2_DMSO.bed

Show the first few lines of the input file
chr17	8148987	8148988
chr17	8148988	8149016
chr17	8149016	8149018
chr17	8149018	8149019
chr17	8149019	8149023
chr17	8149023	8149029
chr17	8149029	8149050
chr17	8149050	8149051
chr17	8149051	8149062
chr17	8149062	8149063

Show the first few lines of the input file
chr17	87067	87989
chr17	158043	159067
chr17	158043	159066
chr17	159137	160020
chr17	170572	172531
chr17	172392	173515
chr17	172393	173515
chr17	172396	173514
chr17	197679	198597
chr17	197681	198597

Show the first few lines of the output file
chr17	8148987	8148988	chr17	8148063	8149178

## intersect segments and fragments of Input

### Test looping code

In [49]:
%%bash
for fpath in $(ls -d /data/reddylab/Kuei/out/CombEffect_STARR/data/Input?); do
    tmp=$(basename $fpath)
    echo $fpath, $tmp
done

/data/reddylab/Kuei/out/CombEffect_STARR/data/Input1, Input1
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input2, Input2
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input3, Input3
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input4, Input4
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input5, Input5


### connect segments and fragments by `bedtools intersect`

In [50]:
%%bash
### set environment
module load bedtools2
module load perl
module load gcc
source /data/reddylab/software/miniconda2/bin/activate alex_dev
export PATH=/data/reddylab/software/homer/bin/:$PATH

### set log file directory
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log

### run script using sbatch
sbatch -pnew,all \
    --array 0-4 \
    --mem 8G \
    -o ${FD_LOG}/prep_count_input_test.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_OUT=${FD_WRK}/data/TFX_Dex
FD_DATS=($(ls -d ${FD_WRK}/data/Input?))
CHROM=chr17

### set input and output file names
FD_DAT=${FD_DATS[${SLURM_ARRAY_TASK_ID}]}
FN_SEG=seg_${CHROM}.bed
FNAME=$(basename ${FD_DAT})

### Print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  Segment   file: " ${FD_OUT}/${FN_SEG}
echo "Input  Fragment  file: " ${FD_DAT}/${CHROM}.bed
echo "Output Intersect file: " ${FD_OUT}/${CHROM}_${FNAME}.bedpe
echo "Output Count     file: " ${FD_OUT}/cnt_${CHROM}_${FNAME}.bed
echo
echo "Show the first few lines of the input file"
head ${FD_OUT}/${FN_SEG}
echo
echo "Show the first few lines of the input file"
head ${FD_DAT}/${CHROM}.bed

###
FP_BED_A=${FD_OUT}/${FN_SEG}
FP_BED_B=${FD_DAT}/${CHROM}.bed
FP_BED_O=${FD_OUT}/${CHROM}_${FNAME}.bedpe
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

###
cat ${FP_BED_O} | cut -f1-3 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g'  \
    > ${FD_OUT}/cnt_${CHROM}_${FNAME}.bed

### Print end message
echo
echo "Show the first few lines of the output file"
head ${FD_OUT}/${CHROM}_${FNAME}.bedpe
echo
echo "Show the first few lines of the output file"
head ${FD_OUT}/cnt_${CHROM}_${FNAME}.bed

EOF

Submitted batch job 25408819


### Check log

In [52]:
%%bash
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log
cat ${FD_LOG}/prep_count_input_test.1.txt

Slurm Array Index:  1
Input  Segment   file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/seg_chr17.bed
Input  Fragment  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input2/chr17.bed
Output Intersect file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/chr17_Input2.bedpe
Output Count     file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/TFX_Dex/cnt_chr17_Input2.bed

Show the first few lines of the input file
chr17	8148987	8148988
chr17	8148988	8149016
chr17	8149016	8149018
chr17	8149018	8149019
chr17	8149019	8149023
chr17	8149023	8149029
chr17	8149029	8149050
chr17	8149050	8149051
chr17	8149051	8149062
chr17	8149062	8149063

Show the first few lines of the input file
chr17	159510	160362
chr17	201155	201968
chr17	201155	201968
chr17	201174	201950
chr17	205516	206550
chr17	205522	206567
chr17	206243	207269
chr17	206270	207284
chr17	206273	207378
chr17	206289	207275

Show the first few lines of the output file
chr17	8148987	8148988	chr17	8148003	8149020	1
chr1