# Fragment Annotation

## Annotate Input

**Test code**

In [23]:
%%bash
source config.sh
CHROM=chr17
SAMPLE=Input

### set input and output file
FD_ANN=${FD_BASE}/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1
FN_ANN=${CHROM}_rm_mouse.bed.gz
FD_BED=${FD_WRK}/count_fragment/${SAMPLE}
FN_BED=cnt_${CHROM}.bed
FD_OUT=${FD_WRK}/annotation_fragment/${SAMPLE}
FN_OUT=${CHROM}.bed.gz

### RUN annotation
mkdir -p ${FD_OUT}
FP_BED_A=${FD_BED}/${FN_BED}
FP_BED_B=${FD_ANN}/${FN_ANN}
FP_BED_O=${FD_OUT}/${FN_OUT}

head ${FP_BED_A}
zcat ${FP_BED_B} | head

chr17	10000001	10000909	1
chr17	10000097	10001138	1
chr17	10000104	10001051	1
chr17	10000112	10000955	1
chr17	10000131	10001070	2
chr17	10000134	10001169	1
chr17	10000150	10001133	1
chr17	10000153	10001218	1
chr17	10000160	10001059	1
chr17	10000162	10001116	1
chr17	60004	60022	ZNF140	5.6897	+	ZN140_HUMAN.H11MO.0.C	1
chr17	60004	60022	ZNF667	8.0240	-	ZN667_HUMAN.H11MO.0.C	1
chr17	60006	60015	Ebox/CAGCTG	7.9275	+	MYOD1_HUMAN.H11MO.0.A	1
chr17	60011	60031	GC-tract	12.1220	-	ZN341_HUMAN.H11MO.0.C	1
chr17	60012	60025	PRDM4	1.3083	+	PRDM4_C2H2_1	1
chr17	60017	60028	NR/19	9.6680	-	NR1D1_HUMAN.H11MO.0.B	2
chr17	60019	60035	HEN1	5.5854	-	HEN1_HUMAN.H11MO.0.C	2
chr17	60023	60042	ZNF680	6.3901	-	ZN680_HUMAN.H11MO.0.C	1
chr17	60027	60037	SMARCA1	7.5566	-	SMCA1_HUMAN.H11MO.0.C	1
chr17	60027	60040	LEF1	7.1402	+	ZN350_HUMAN.H11MO.0.C	1


**RUN: fragment annotation**

In [32]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 15G \
    -o ${FD_LOG}/annot_fragment_input.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source config.sh
CHROM=chr17
SAMPLE=Input

### init: set input and output file
FD_ANN=${FD_BASE}/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1
FN_ANN=${CHROM}_rm_mouse.bed.gz
FD_BED=${FD_WRK}/count_fragment/${SAMPLE}
FN_BED=cnt_${CHROM}.bed
FD_OUT=${FD_WRK}/annotation_fragment/${SAMPLE}
FN_OUT=${CHROM}.bed.gz

### init: set input and output file path
mkdir -p ${FD_OUT}
FP_BED_A=${FD_BED}/${FN_BED}
FP_BED_B=${FD_ANN}/${FN_ANN}
FP_BED_O=${FD_OUT}/${FN_OUT}

### print end message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file: " ${FP_BED_A}
echo "Input  file: " ${FP_BED_B}
echo "Output file: " ${FP_BED_O}
echo
echo "Show the first few lines of the input file"
echo ${FP_BED_A}
head ${FP_BED_A}
echo
echo ${FP_BED_B}
zcat ${FP_BED_B} | head

### RUN: annotation by intersecting two bed files
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo | gzip -cf > ${FP_BED_O}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FP_BED_O}
zcat ${FP_BED_O} | head

EOF

Bedtools 2.27.1
Submitted batch job 10968930


**CHECK: log file**

In [35]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/annot_fragment_input.txt

Slurm Array Index: 
Input  file:  /work/kk319/out/CombEffect_STARR/count_fragment/Input/cnt_chr17.bed
Input  file:  /work/kk319/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1/chr17_rm_mouse.bed.gz
Output file:  /work/kk319/out/CombEffect_STARR/annotation_fragment/Input/chr17.bed.gz

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/count_fragment/Input/cnt_chr17.bed
chr17	10000001	10000909	1
chr17	10000097	10001138	1
chr17	10000104	10001051	1
chr17	10000112	10000955	1
chr17	10000131	10001070	2
chr17	10000134	10001169	1
chr17	10000150	10001133	1
chr17	10000153	10001218	1
chr17	10000160	10001059	1
chr17	10000162	10001116	1

/work/kk319/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1/chr17_rm_mouse.bed.gz
chr17	60004	60022	ZNF140	5.6897	+	ZN140_HUMAN.H11MO.0.C	1
chr17	60004	60022	ZNF667	8.0240	-	ZN667_HUMAN.H11MO.0.C	1
chr17	60006	60015	Ebox/CAGCTG	7.9275	+	MYOD1_HUMAN.H11MO.0.A	1
chr17	60011	60031	GC-tract	12.1220	-	ZN341_HUMAN.H11MO.0.C

## Annotate Output (DMSO)

**Test code**

In [25]:
%%bash
source config.sh
CHROM=chr17
SAMPLE=TFX_DMSO

### set input and output file
FD_ANN=${FD_BASE}/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1
FN_ANN=${CHROM}_rm_mouse.bed.gz
FD_BED=${FD_WRK}/count_fragment/${SAMPLE}
FN_BED=cnt_${CHROM}.bed
FD_OUT=${FD_WRK}/annotation_fragment/${SAMPLE}
FN_OUT=${CHROM}.bed.gz

### RUN annotation
mkdir -p ${FD_OUT}
FP_BED_A=${FD_BED}/${FN_BED}
FP_BED_B=${FD_ANN}/${FN_ANN}
FP_BED_O=${FD_OUT}/${FN_OUT}

echo ${FP_BED_A}
head ${FP_BED_A}
echo
echo ${FP_BED_B}
zcat ${FP_BED_B} | head

/work/kk319/out/CombEffect_STARR/count_fragment/TFX_DMSO/cnt_chr17.bed
chr17	10000013	10001044	1
chr17	10000013	10001045	1
chr17	10000026	10001091	1
chr17	10000120	10001029	1
chr17	10000120	10001031	1
chr17	10000120	10001032	1
chr17	10000120	10001033	2
chr17	10000122	10001032	1
chr17	10000122	10001033	1
chr17	10000160	10001055	1

/work/kk319/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1/chr17_rm_mouse.bed.gz
chr17	60004	60022	ZNF140	5.6897	+	ZN140_HUMAN.H11MO.0.C	1
chr17	60004	60022	ZNF667	8.0240	-	ZN667_HUMAN.H11MO.0.C	1
chr17	60006	60015	Ebox/CAGCTG	7.9275	+	MYOD1_HUMAN.H11MO.0.A	1
chr17	60011	60031	GC-tract	12.1220	-	ZN341_HUMAN.H11MO.0.C	1
chr17	60012	60025	PRDM4	1.3083	+	PRDM4_C2H2_1	1
chr17	60017	60028	NR/19	9.6680	-	NR1D1_HUMAN.H11MO.0.B	2
chr17	60019	60035	HEN1	5.5854	-	HEN1_HUMAN.H11MO.0.C	2
chr17	60023	60042	ZNF680	6.3901	-	ZN680_HUMAN.H11MO.0.C	1
chr17	60027	60037	SMARCA1	7.5566	-	SMCA1_HUMAN.H11MO.0.C	1
chr17	60027	60040	LEF1	7.1402	+	ZN350_HUMAN.H11MO.0.C	1


**RUN: fragment annotation**

In [33]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 15G \
    -o ${FD_LOG}/annot_fragment_output_dmso.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source config.sh
CHROM=chr17
SAMPLE=TFX_DMSO

### init: set input and output file
FD_ANN=${FD_BASE}/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1
FN_ANN=${CHROM}_rm_mouse.bed.gz
FD_BED=${FD_WRK}/count_fragment/${SAMPLE}
FN_BED=cnt_${CHROM}.bed
FD_OUT=${FD_WRK}/annotation_fragment/${SAMPLE}
FN_OUT=${CHROM}.bed.gz

### init: set input and output file path
mkdir -p ${FD_OUT}
FP_BED_A=${FD_BED}/${FN_BED}
FP_BED_B=${FD_ANN}/${FN_ANN}
FP_BED_O=${FD_OUT}/${FN_OUT}

### print end message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file: " ${FP_BED_A}
echo "Input  file: " ${FP_BED_B}
echo "Output file: " ${FP_BED_O}
echo
echo "Show the first few lines of the input file"
echo ${FP_BED_A}
head ${FP_BED_A}
echo
echo ${FP_BED_B}
zcat ${FP_BED_B} | head

### RUN: annotation by intersecting two bed files
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo | gzip -cf > ${FP_BED_O}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FP_BED_O}
zcat ${FP_BED_O} | head

EOF

Bedtools 2.27.1
Submitted batch job 10969326


**CHECK: log file**

In [36]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/annot_fragment_output_dmso.txt

Slurm Array Index: 
Input  file:  /work/kk319/out/CombEffect_STARR/count_fragment/TFX_DMSO/cnt_chr17.bed
Input  file:  /work/kk319/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1/chr17_rm_mouse.bed.gz
Output file:  /work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_DMSO/chr17.bed.gz

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/count_fragment/TFX_DMSO/cnt_chr17.bed
chr17	10000013	10001044	1
chr17	10000013	10001045	1
chr17	10000026	10001091	1
chr17	10000120	10001029	1
chr17	10000120	10001031	1
chr17	10000120	10001032	1
chr17	10000120	10001033	2
chr17	10000122	10001032	1
chr17	10000122	10001033	1
chr17	10000160	10001055	1

/work/kk319/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1/chr17_rm_mouse.bed.gz
chr17	60004	60022	ZNF140	5.6897	+	ZN140_HUMAN.H11MO.0.C	1
chr17	60004	60022	ZNF667	8.0240	-	ZN667_HUMAN.H11MO.0.C	1
chr17	60006	60015	Ebox/CAGCTG	7.9275	+	MYOD1_HUMAN.H11MO.0.A	1
chr17	60011	60031	GC-tract	12.1220	-	ZN341_HUMAN.

## Annotate Output (Dex)

**Test code**

In [27]:
%%bash
source config.sh
CHROM=chr17
SAMPLE=TFX_Dex

### set input and output file
FD_ANN=${FD_BASE}/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1
FN_ANN=${CHROM}_rm_mouse.bed.gz
FD_BED=${FD_WRK}/count_fragment/${SAMPLE}
FN_BED=cnt_${CHROM}.bed
FD_OUT=${FD_WRK}/annotation_fragment/${SAMPLE}
FN_OUT=${CHROM}.bed.gz

### RUN annotation
mkdir -p ${FD_OUT}
FP_BED_A=${FD_BED}/${FN_BED}
FP_BED_B=${FD_ANN}/${FN_ANN}
FP_BED_O=${FD_OUT}/${FN_OUT}

echo ${FP_BED_A}
head ${FP_BED_A}
echo
echo ${FP_BED_B}
zcat ${FP_BED_B} | head

/work/kk319/out/CombEffect_STARR/count_fragment/TFX_Dex/cnt_chr17.bed
chr17	10000013	10001044	1
chr17	10000013	10001045	2
chr17	10000014	10001044	1
chr17	10000014	10001045	1
chr17	10000026	10001090	1
chr17	10000090	10001098	1
chr17	10000091	10001100	1
chr17	10000103	10001181	1
chr17	10000140	10001107	1
chr17	10000140	10001110	1

/work/kk319/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1/chr17_rm_mouse.bed.gz
chr17	60004	60022	ZNF140	5.6897	+	ZN140_HUMAN.H11MO.0.C	1
chr17	60004	60022	ZNF667	8.0240	-	ZN667_HUMAN.H11MO.0.C	1
chr17	60006	60015	Ebox/CAGCTG	7.9275	+	MYOD1_HUMAN.H11MO.0.A	1
chr17	60011	60031	GC-tract	12.1220	-	ZN341_HUMAN.H11MO.0.C	1
chr17	60012	60025	PRDM4	1.3083	+	PRDM4_C2H2_1	1
chr17	60017	60028	NR/19	9.6680	-	NR1D1_HUMAN.H11MO.0.B	2
chr17	60019	60035	HEN1	5.5854	-	HEN1_HUMAN.H11MO.0.C	2
chr17	60023	60042	ZNF680	6.3901	-	ZN680_HUMAN.H11MO.0.C	1
chr17	60027	60037	SMARCA1	7.5566	-	SMCA1_HUMAN.H11MO.0.C	1
chr17	60027	60040	LEF1	7.1402	+	ZN350_HUMAN.H11MO.0.C	1


**RUN: fragment annotation**

In [34]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 15G \
    -o ${FD_LOG}/annot_fragment_output_dex.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source config.sh
CHROM=chr17
SAMPLE=TFX_Dex

### init: set input and output file
FD_ANN=${FD_BASE}/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1
FN_ANN=${CHROM}_rm_mouse.bed.gz
FD_BED=${FD_WRK}/count_fragment/${SAMPLE}
FN_BED=cnt_${CHROM}.bed
FD_OUT=${FD_WRK}/annotation_fragment/${SAMPLE}
FN_OUT=${CHROM}.bed.gz

### init: set input and output file path
mkdir -p ${FD_OUT}
FP_BED_A=${FD_BED}/${FN_BED}
FP_BED_B=${FD_ANN}/${FN_ANN}
FP_BED_O=${FD_OUT}/${FN_OUT}

### print end message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file: " ${FP_BED_A}
echo "Input  file: " ${FP_BED_B}
echo "Output file: " ${FP_BED_O}
echo
echo "Show the first few lines of the input file"
echo ${FP_BED_A}
head ${FP_BED_A}
echo
echo ${FP_BED_B}
zcat ${FP_BED_B} | head

### RUN: annotation by intersecting two bed files
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo | gzip -cf > ${FP_BED_O}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FP_BED_O}
zcat ${FP_BED_O} | head

EOF

Bedtools 2.27.1
Submitted batch job 10969400


**CHECK: log file**

In [37]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/annot_fragment_output_dex.txt

Slurm Array Index: 
Input  file:  /work/kk319/out/CombEffect_STARR/count_fragment/TFX_Dex/cnt_chr17.bed
Input  file:  /work/kk319/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1/chr17_rm_mouse.bed.gz
Output file:  /work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/chr17.bed.gz

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/count_fragment/TFX_Dex/cnt_chr17.bed
chr17	10000013	10001044	1
chr17	10000013	10001045	2
chr17	10000014	10001044	1
chr17	10000014	10001045	1
chr17	10000026	10001090	1
chr17	10000090	10001098	1
chr17	10000091	10001100	1
chr17	10000103	10001181	1
chr17	10000140	10001107	1
chr17	10000140	10001110	1

/work/kk319/annotation/motif_cluster_jvierstra/hg38_archetype_motifs_v1/chr17_rm_mouse.bed.gz
chr17	60004	60022	ZNF140	5.6897	+	ZN140_HUMAN.H11MO.0.C	1
chr17	60004	60022	ZNF667	8.0240	-	ZN667_HUMAN.H11MO.0.C	1
chr17	60006	60015	Ebox/CAGCTG	7.9275	+	MYOD1_HUMAN.H11MO.0.A	1
chr17	60011	60031	GC-tract	12.1220	-	ZN341_HUMAN.H11

## Arrange annotation

In [3]:
%%bash
FP_OUT=/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/chr17.bed.gz
zcat ${FP_OUT} | head

chr17	10000013	10001044	1	chr17	10000008	10000028	ZNF134	7.0903	+	ZN134_HUMAN.H11MO.0.C	1	15
chr17	10000013	10001044	1	chr17	10000019	10000031	GRHL	7.0060	-	GRHL1_CP2_1	1	12
chr17	10000013	10001044	1	chr17	10000023	10000030	HD/21	8.7180	+	NKX28_HUMAN.H11MO.0.C	2	7
chr17	10000013	10001044	1	chr17	10000030	10000045	TFCP2	8.4378	+	TFCP2_CP2_2	1	15
chr17	10000013	10001044	1	chr17	10000038	10000048	GATA	7.0149	-	GATA1+TAL1_MA0140.2	1	10
chr17	10000013	10001044	1	chr17	10000040	10000054	Ebox/CAGATGG	7.7562	+	TWST1_HUMAN.H11MO.0.A	2	14
chr17	10000013	10001044	1	chr17	10000042	10000059	ZNF85	6.6182	-	ZNF85_HUMAN.H11MO.0.C	1	17
chr17	10000013	10001044	1	chr17	10000046	10000055	Ebox/CAGCTG	10.3655	+	Tcf12_MA0521.1	16	9
chr17	10000013	10001044	1	chr17	10000046	10000060	Ebox/CAGATGG	7.7996	-	ATOH1_HUMAN.H11MO.0.B	3	14
chr17	10000013	10001044	1	chr17	10000047	10000054	Ebox/CACCTG	8.6765	-	HTF4_HUMAN.H11MO.0.A	2	7


In [4]:
%%bash
FP_OUT=/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/chr17.bed.gz
zcat ${FP_OUT} | head | cut -f1-4,8

chr17	10000013	10001044	1	ZNF134
chr17	10000013	10001044	1	GRHL
chr17	10000013	10001044	1	HD/21
chr17	10000013	10001044	1	TFCP2
chr17	10000013	10001044	1	GATA
chr17	10000013	10001044	1	Ebox/CAGATGG
chr17	10000013	10001044	1	ZNF85
chr17	10000013	10001044	1	Ebox/CAGCTG
chr17	10000013	10001044	1	Ebox/CAGATGG
chr17	10000013	10001044	1	Ebox/CACCTG


In [9]:
%%bash
FP_OUT=/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/chr17.bed.gz
zcat ${FP_OUT} | head -20 |\
    cut -f1-4,8 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g'

chr17	10000013	10001044	1	BATF	2
chr17	10000013	10001044	1	CTCF	1
chr17	10000013	10001044	1	EBF1	1
chr17	10000013	10001044	1	Ebox/CACCTG	1
chr17	10000013	10001044	1	Ebox/CAGATGG	2
chr17	10000013	10001044	1	Ebox/CAGCTG	1
chr17	10000013	10001044	1	GATA	1
chr17	10000013	10001044	1	GRHL	1
chr17	10000013	10001044	1	HD/18	1
chr17	10000013	10001044	1	HD/21	1
chr17	10000013	10001044	1	INSM1	1
chr17	10000013	10001044	1	KLF/SP/2	1
chr17	10000013	10001044	1	MFZ1	1
chr17	10000013	10001044	1	POU/1	1
chr17	10000013	10001044	1	SMAD	1
chr17	10000013	10001044	1	TFCP2	1
chr17	10000013	10001044	1	ZNF134	1
chr17	10000013	10001044	1	ZNF85	1


**Summarize annotation (Input)**

In [12]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 8G \
    -o ${FD_LOG}/annot_fragment_count_input.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source config.sh
CHROM=chr17
SAMPLE=Input

### init: set input and output file
FD_OUT=${FD_WRK}/annotation_fragment/${SAMPLE}
FN_INT=${CHROM}.bed.gz
FN_OUT=cnt_${CHROM}.bed.gz

### print end message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file: " ${FD_OUT}/${FN_INT}
echo "Output file: " ${FD_OUT}/${FN_OUT}
echo
echo "Show the first few lines of the input file"
echo ${FD_OUT}/${FN_INT}
zcat ${FD_OUT}/${FN_INT} | head

### RUN: annotation by intersecting two bed files
zcat ${FD_OUT}/${FN_INT} |\
    cut -f1-4,8 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g' |\
    gzip -cf > ${FD_OUT}/${FN_OUT}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FD_OUT}/${FN_OUT}
zcat ${FD_OUT}/${FN_OUT} | head

EOF

Bedtools 2.27.1
Submitted batch job 11057065


In [11]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/annot_fragment_count_input.txt

Slurm Array Index: 
Input  file:  /work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/chr17.bed.gz
Output file:  /work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/cnt_chr17.bed.gz

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/chr17.bed.gz
chr17	10000013	10001044	1	chr17	10000008	10000028	ZNF134	7.0903	+	ZN134_HUMAN.H11MO.0.C	1	15
chr17	10000013	10001044	1	chr17	10000019	10000031	GRHL	7.0060	-	GRHL1_CP2_1	1	12
chr17	10000013	10001044	1	chr17	10000023	10000030	HD/21	8.7180	+	NKX28_HUMAN.H11MO.0.C	2	7
chr17	10000013	10001044	1	chr17	10000030	10000045	TFCP2	8.4378	+	TFCP2_CP2_2	1	15
chr17	10000013	10001044	1	chr17	10000038	10000048	GATA	7.0149	-	GATA1+TAL1_MA0140.2	1	10
chr17	10000013	10001044	1	chr17	10000040	10000054	Ebox/CAGATGG	7.7562	+	TWST1_HUMAN.H11MO.0.A	2	14
chr17	10000013	10001044	1	chr17	10000042	10000059	ZNF85	6.6182	-	ZNF85_HUMAN.H11MO.0.C	1	17
chr17	10000013	10001044	1	chr17	10000046	10000055	Ebox/

**Summarize annotation (Output; DMSO)**

In [13]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 8G \
    -o ${FD_LOG}/annot_fragment_count_output_dmso.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source config.sh
CHROM=chr17
SAMPLE=TFX_DMSO

### init: set input and output file
FD_OUT=${FD_WRK}/annotation_fragment/${SAMPLE}
FN_INT=${CHROM}.bed.gz
FN_OUT=cnt_${CHROM}.bed.gz

### print end message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file: " ${FD_OUT}/${FN_INT}
echo "Output file: " ${FD_OUT}/${FN_OUT}
echo
echo "Show the first few lines of the input file"
echo ${FD_OUT}/${FN_INT}
zcat ${FD_OUT}/${FN_INT} | head

### RUN: annotation by intersecting two bed files
zcat ${FD_OUT}/${FN_INT} |\
    cut -f1-4,8 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g' |\
    gzip -cf > ${FD_OUT}/${FN_OUT}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FD_OUT}/${FN_OUT}
zcat ${FD_OUT}/${FN_OUT} | head

EOF

Bedtools 2.27.1
Submitted batch job 11057084


In [None]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/annot_fragment_count_output_dmso.txt

**Summarize annotation (Output; Dex)**

In [14]:
%%bash
### set environment on HARDAC
#module load bedtools2
#module load perl
#module load gcc
#source /data/reddylab/software/miniconda2/bin/activate alex_dev
#export PATH=/data/reddylab/software/homer/bin/:$PATH

### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 8G \
    -o ${FD_LOG}/annot_fragment_count_output_dex.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source config.sh
CHROM=chr17
SAMPLE=TFX_Dex

### init: set input and output file
FD_OUT=${FD_WRK}/annotation_fragment/${SAMPLE}
FN_INT=${CHROM}.bed.gz
FN_OUT=cnt_${CHROM}.bed.gz

### print end message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file: " ${FD_OUT}/${FN_INT}
echo "Output file: " ${FD_OUT}/${FN_OUT}
echo
echo "Show the first few lines of the input file"
echo ${FD_OUT}/${FN_INT}
zcat ${FD_OUT}/${FN_INT} | head

### RUN: annotation by intersecting two bed files
zcat ${FD_OUT}/${FN_INT} |\
    cut -f1-4,8 |\
    sort    |\
    uniq -c |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g' |\
    gzip -cf > ${FD_OUT}/${FN_OUT}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FD_OUT}/${FN_OUT}
zcat ${FD_OUT}/${FN_OUT} | head

EOF

Bedtools 2.27.1
Submitted batch job 11057163


In [None]:
%%bash
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/annot_fragment_count_output_dex.txt

## Example: NR3C1 and AP-1 (JUN/FOS)

NR3C1
```
248 | NR/20 | nuclearreceptor
```

AP-1 binding sites (JUN or FOS)
```
49 | CREB/ATF/1 | bZIP
50 | CREB/ATF/2 | bZIP
54 | AP1/1 | bZIP
```

In [28]:
%%bash
### set environment on DCC
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 8G \
    -o ${FD_LOG}/test_get_motif_cluster_input_cluster248.txt \
    <<'EOF'
#!/bin/bash
FD_OUT=/work/kk319/out/CombEffect_STARR/annotation_fragment/Input
zcat ${FD_OUT}/chr17.bed.gz | grep NR/20      > ${FD_OUT}/cluster248.bed

EOF

Submitted batch job 11061726


In [29]:
%%bash
### set environment on DCC
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 8G \
    -o ${FD_LOG}/test_get_motif_cluster_input_cluster054.txt \
    <<'EOF'
#!/bin/bash
FD_OUT=/work/kk319/out/CombEffect_STARR/annotation_fragment/Input
zcat ${FD_OUT}/chr17.bed.gz | grep AP1/1      > ${FD_OUT}/cluster054.bed

EOF

Submitted batch job 11061728


In [25]:
%%bash
### set environment on DCC
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 8G \
    -o ${FD_LOG}/test_get_motif_cluster_dmso_cluster248.txt \
    <<'EOF'
#!/bin/bash
FD_OUT=/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_DMSO
zcat ${FD_OUT}/chr17.bed.gz | grep NR/20      > ${FD_OUT}/cluster248.bed

EOF

Submitted batch job 11061713


In [26]:
%%bash
### set environment on DCC
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --mem 8G \
    -o ${FD_LOG}/test_get_motif_cluster_dmso_cluster054.txt \
    <<'EOF'
#!/bin/bash
FD_OUT=/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_DMSO
zcat ${FD_OUT}/chr17.bed.gz | grep AP1/1      > ${FD_OUT}/cluster054.bed

EOF

Submitted batch job 11061715


In [23]:
%%bash
FD_OUT=/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex
zcat ${FD_OUT}/chr17.bed.gz | grep NR/20      > ${FD_OUT}/cluster248.bed
zcat ${FD_OUT}/chr17.bed.gz | grep CREB/ATF/1 > ${FD_OUT}/cluster049.bed
zcat ${FD_OUT}/chr17.bed.gz | grep CREB/ATF/2 > ${FD_OUT}/cluster050.bed
zcat ${FD_OUT}/chr17.bed.gz | grep AP1/1      > ${FD_OUT}/cluster054.bed

In [19]:
%%bash
FP_INP=/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/chr17.bed.gz
zcat ${FP_INP} | grep CREB/ATF/1 | head

chr17	10000013	10001044	1	chr17	10000504	10000513	CREB/ATF/1	7.9592	+	CREB1_MA0018.3	2	9
chr17	10000013	10001044	1	chr17	10000505	10000514	CREB/ATF/1	8.3398	-	ATF2_HUMAN.H11MO.0.B	3	9
chr17	10000013	10001045	2	chr17	10000504	10000513	CREB/ATF/1	7.9592	+	CREB1_MA0018.3	2	9
chr17	10000013	10001045	2	chr17	10000505	10000514	CREB/ATF/1	8.3398	-	ATF2_HUMAN.H11MO.0.B	3	9
chr17	10000014	10001044	1	chr17	10000504	10000513	CREB/ATF/1	7.9592	+	CREB1_MA0018.3	2	9
chr17	10000014	10001044	1	chr17	10000505	10000514	CREB/ATF/1	8.3398	-	ATF2_HUMAN.H11MO.0.B	3	9
chr17	10000014	10001045	1	chr17	10000504	10000513	CREB/ATF/1	7.9592	+	CREB1_MA0018.3	2	9
chr17	10000014	10001045	1	chr17	10000505	10000514	CREB/ATF/1	8.3398	-	ATF2_HUMAN.H11MO.0.B	3	9
chr17	10000026	10001090	1	chr17	10000504	10000513	CREB/ATF/1	7.9592	+	CREB1_MA0018.3	2	9
chr17	10000026	10001090	1	chr17	10000505	10000514	CREB/ATF/1	8.3398	-	ATF2_HUMAN.H11MO.0.B	3	9


In [20]:
%%bash
FP_INP=/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/chr17.bed.gz
zcat ${FP_INP} | grep CREB/ATF/2 | head

chr17	10000013	10001044	1	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10
chr17	10000013	10001045	2	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10
chr17	10000014	10001044	1	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10
chr17	10000014	10001045	1	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10
chr17	10000026	10001090	1	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10
chr17	10000090	10001098	1	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10
chr17	10000091	10001100	1	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10
chr17	10000103	10001181	1	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10
chr17	10000140	10001107	1	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10
chr17	10000140	10001110	1	chr17	10000504	10000514	CREB/ATF/2	8.4737	-	JUN_MA0488.1	6	10


In [21]:
%%bash
FP_INP=/work/kk319/out/CombEffect_STARR/annotation_fragment/TFX_Dex/chr17.bed.gz
zcat ${FP_INP} | grep AP1/1 | head

chr17	10000013	10001044	1	chr17	10000569	10000577	AP1/1	6.7321	+	NF2L1_HUMAN.H11MO.0.C	2	8
chr17	10000013	10001044	1	chr17	10000913	10000921	AP1/1	7.2081	+	JUN_HUMAN.H11MO.0.A	4	8
chr17	10000013	10001045	2	chr17	10000569	10000577	AP1/1	6.7321	+	NF2L1_HUMAN.H11MO.0.C	2	8
chr17	10000013	10001045	2	chr17	10000913	10000921	AP1/1	7.2081	+	JUN_HUMAN.H11MO.0.A	4	8
chr17	10000014	10001044	1	chr17	10000569	10000577	AP1/1	6.7321	+	NF2L1_HUMAN.H11MO.0.C	2	8
chr17	10000014	10001044	1	chr17	10000913	10000921	AP1/1	7.2081	+	JUN_HUMAN.H11MO.0.A	4	8
chr17	10000014	10001045	1	chr17	10000569	10000577	AP1/1	6.7321	+	NF2L1_HUMAN.H11MO.0.C	2	8
chr17	10000014	10001045	1	chr17	10000913	10000921	AP1/1	7.2081	+	JUN_HUMAN.H11MO.0.A	4	8
chr17	10000026	10001090	1	chr17	10000569	10000577	AP1/1	6.7321	+	NF2L1_HUMAN.H11MO.0.C	2	8
chr17	10000026	10001090	1	chr17	10000913	10000921	AP1/1	7.2081	+	JUN_HUMAN.H11MO.0.A	4	8


In [1]:
%%bash
###
FD_ANN=/data/reddylab/Kuei/out/annotation
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_DAT=($(ls -d ${FD_WRK}/data/Input/))
CHROM=chr17
ANNOT=GCR

###
FN_DAT=cnt_${CHROM}.bed
FN_OUT=ann_${CHROM}_${ANNOT}.bed

FP_BED_A=${FD_DAT}/${FN_DAT}
FP_BED_B=${FD_ANN}/cistrome/hg38_cistrome/${ANNOT}_HUMAN.A.bed
FP_BED_O=${FD_DAT}/${FN_OUT}

head ${FP_BED_A}
head ${FP_BED_B}

chr17	8148989	8149875	1
chr17	8148991	8149950	1
chr17	8148991	8149951	2
chr17	8148992	8149951	1
chr17	8148999	8149919	1
chr17	8149006	8149923	1
chr17	8149008	8150003	1
chr17	8149008	8150057	1
chr17	8149012	8150052	1
chr17	8149022	8149951	1
chr1	10022	10338
chr1	180626	181057
chr1	629777	630092
chr1	818870	819186
chr1	905200	905664
chr1	906700	907241
chr1	916591	917225
chr1	917373	917730
chr1	933094	933441
chr1	940241	940578


In [2]:
%%bash
###
FD_ANN=/data/reddylab/Kuei/out/annotation
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_DAT=($(ls -d ${FD_WRK}/data/Input/))
CHROM=chr17
ANNOT=GCR

ls ${FD_ANN}/cistrome/hg38_cistrome/${ANNOT}*

/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/GCR_HUMAN.A.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/GCR_HUMAN.B.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/GCR_HUMAN.C.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/GCR_HUMAN.D.bed


In [3]:
%%bash
###
FD_ANN=/data/reddylab/Kuei/out/annotation
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_DAT=($(ls -d ${FD_WRK}/data/Input/))
CHROM=chr17
ANNOT=JUN

ls ${FD_ANN}/cistrome/hg38_cistrome/${ANNOT}*

/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUNB_HUMAN.B.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUNB_HUMAN.C.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUND_HUMAN.A.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUND_HUMAN.B.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUND_HUMAN.C.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUND_HUMAN.D.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUN_HUMAN.A.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUN_HUMAN.B.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUN_HUMAN.C.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUN_HUMAN.D.bed


In [4]:
%%bash
###
FD_ANN=/data/reddylab/Kuei/out/annotation
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_DAT=($(ls -d ${FD_WRK}/data/Input/))
CHROM=chr17
ANNOT=FOS

ls ${FD_ANN}/cistrome/hg38_cistrome/${ANNOT}*

/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOSB_HUMAN.C.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOS_HUMAN.A.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOS_HUMAN.B.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOS_HUMAN.C.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOS_HUMAN.D.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOSL1_HUMAN.B.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOSL1_HUMAN.C.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOSL2_HUMAN.A.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOSL2_HUMAN.B.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOSL2_HUMAN.C.bed


In [21]:
%%bash
###
FD_ANN=/data/reddylab/Kuei/out/annotation
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_DAT=($(ls -d ${FD_WRK}/data/Input/))
CHROM=chr17
ANNOT=CREB1

ls ${FD_ANN}/cistrome/hg38_cistrome/${ANNOT}*

/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/CREB1_HUMAN.A.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/CREB1_HUMAN.B.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/CREB1_HUMAN.C.bed
/data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/CREB1_HUMAN.D.bed


In [8]:
%%bash
i=2
ANNOTS=(GCR JUN FOS)
echo ${ANNOTS[0]}
echo ${ANNOTS[1]}
echo ${ANNOTS[2]}
echo ${ANNOTS[${i}]}

GCR
JUN
FOS
FOS


In [22]:
%%bash
### set environment
module load perl
module load gcc
module load bedtools2
source /data/reddylab/software/miniconda2/bin/activate alex_dev
export PATH=/data/reddylab/software/homer/bin/:$PATH

### set log file directory
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log

### run script using sbatch
sbatch -pnew,all \
    --array=0-3 \
    --mem 16G \
    -o ${FD_LOG}/prep_annot_input.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories
FD_ANN=/data/reddylab/Kuei/out/annotation
FD_WRK=/data/reddylab/Kuei/out/CombEffect_STARR
FD_DAT=($(ls -d ${FD_WRK}/data/Input/))
CHROM=chr17
ANNOTS=(GCR JUN FOS CREB1)

###
ANNOT=${ANNOTS[${SLURM_ARRAY_TASK_ID}]}
FN_DAT=cnt_${CHROM}.bed
FN_OUT=ann_${CHROM}_${ANNOT}.bed

FP_BED_A=${FD_DAT}/${FN_DAT}
FP_BED_B=${FD_ANN}/cistrome/hg38_cistrome/${ANNOT}_HUMAN.A.bed
FP_BED_O=${FD_DAT}/${FN_OUT}

###
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file: " ${FP_BED_A}
echo "Input  file: " ${FP_BED_B}
echo "Output file: " ${FP_BED_O}
echo
echo "Show the first few lines of the input file"
head ${FP_BED_A}
head ${FP_BED_B}

###
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

###
echo
echo "Show the first few lines of the output file"
head ${FP_BED_O}

EOF

Submitted batch job 25382480


In [23]:
%%bash
FD_LOG=/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log
ls ${FD_LOG}/prep_annot_input.*

/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log/prep_annot_input.0.txt
/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log/prep_annot_input.1.txt
/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log/prep_annot_input.2.txt
/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log/prep_annot_input.3.txt
/gpfs/fs1/data/reddylab/Kuei/out/CombEffect_STARR/log/prep_annot_input.txt


In [15]:
cat /data/reddylab/Kuei/out/CombEffect_STARR/log/prep_annot_input.0.txt

Slurm Array Index:  0
Input  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input//cnt_chr17.bed
Input  file:  /data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/GCR_HUMAN.A.bed
Output file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input//ann_chr17_GCR.bed

Show the first few lines of the input file
chr17	8148989	8149875	1
chr17	8148991	8149950	1
chr17	8148991	8149951	2
chr17	8148992	8149951	1
chr17	8148999	8149919	1
chr17	8149006	8149923	1
chr17	8149008	8150003	1
chr17	8149008	8150057	1
chr17	8149012	8150052	1
chr17	8149022	8149951	1
chr1	10022	10338
chr1	180626	181057
chr1	629777	630092
chr1	818870	819186
chr1	905200	905664
chr1	906700	907241
chr1	916591	917225
chr1	917373	917730
chr1	933094	933441
chr1	940241	940578

Show the first few lines of the output file
chr17	8150135	8151183	1	chr17	8151182	8151493	1
chr17	8150184	8151257	1	chr17	8151182	8151493	75
chr17	8150185	8151257	1	chr17	8151182	8151493	75
chr17	8150205	8151230	1	chr17	8151182	8151493	48
chr17	815021

In [16]:
cat /data/reddylab/Kuei/out/CombEffect_STARR/log/prep_annot_input.1.txt

Slurm Array Index:  1
Input  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input//cnt_chr17.bed
Input  file:  /data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/JUN_HUMAN.A.bed
Output file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input//ann_chr17_JUN.bed

Show the first few lines of the input file
chr17	8148989	8149875	1
chr17	8148991	8149950	1
chr17	8148991	8149951	2
chr17	8148992	8149951	1
chr17	8148999	8149919	1
chr17	8149006	8149923	1
chr17	8149008	8150003	1
chr17	8149008	8150057	1
chr17	8149012	8150052	1
chr17	8149022	8149951	1
chr1	629781	630104
chr1	633872	634190
chr1	778476	778918
chr1	940163	940617
chr1	966053	966502
chr1	966844	967153
chr1	1000103	1001116
chr1	1021878	1022218
chr1	1064157	1064553
chr1	1079393	1079956

Show the first few lines of the output file
chr17	8153288	8154373	1	chr17	8154208	8154628	165
chr17	8153290	8154266	1	chr17	8154208	8154628	58
chr17	8153305	8154323	1	chr17	8154208	8154628	115
chr17	8153361	8154250	1	chr17	8154208	8154628	42

In [17]:
cat /data/reddylab/Kuei/out/CombEffect_STARR/log/prep_annot_input.2.txt

Slurm Array Index:  2
Input  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input//cnt_chr17.bed
Input  file:  /data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/FOS_HUMAN.A.bed
Output file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input//ann_chr17_FOS.bed

Show the first few lines of the input file
chr17	8148989	8149875	1
chr17	8148991	8149950	1
chr17	8148991	8149951	2
chr17	8148992	8149951	1
chr17	8148999	8149919	1
chr17	8149006	8149923	1
chr17	8149008	8150003	1
chr17	8149008	8150057	1
chr17	8149012	8150052	1
chr17	8149022	8149951	1
chr1	629774	630091
chr1	633865	634180
chr1	831655	831996
chr1	966032	966528
chr1	966828	967213
chr1	1000731	1001098
chr1	1064188	1064547
chr1	1207913	1208304
chr1	1231957	1232282
chr1	1305395	1305905

Show the first few lines of the output file
chr17	8155456	8156430	1	chr17	8156347	8156825	83
chr17	8155557	8156560	1	chr17	8156347	8156825	213
chr17	8155560	8156375	1	chr17	8156347	8156825	28
chr17	8155586	8156491	1	chr17	8156347	8156825	1

In [24]:
cat /data/reddylab/Kuei/out/CombEffect_STARR/log/prep_annot_input.3.txt

Slurm Array Index:  3
Input  file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input//cnt_chr17.bed
Input  file:  /data/reddylab/Kuei/out/annotation/cistrome/hg38_cistrome/CREB1_HUMAN.A.bed
Output file:  /data/reddylab/Kuei/out/CombEffect_STARR/data/Input//ann_chr17_CREB1.bed

Show the first few lines of the input file
chr17	8148989	8149875	1
chr17	8148991	8149950	1
chr17	8148991	8149951	2
chr17	8148992	8149951	1
chr17	8148999	8149919	1
chr17	8149006	8149923	1
chr17	8149008	8150003	1
chr17	8149008	8150057	1
chr17	8149012	8150052	1
chr17	8149022	8149951	1
chr1	629783	630099
chr1	633851	634188
chr1	778530	778930
chr1	826879	827864
chr1	869681	870123
chr1	904499	904933
chr1	921045	921420
chr1	923604	924102
chr1	959023	959524
chr1	960400	960823

Show the first few lines of the output file
chr17	8151204	8152259	1	chr17	8152256	8153070	3
chr17	8151233	8152266	2	chr17	8152256	8153070	10
chr17	8151239	8152260	1	chr17	8152256	8153070	4
chr17	8151239	8152261	1	chr17	8152256	8153070	5
chr17	81

In [25]:
ls /data/reddylab/Kuei/out/CombEffect_STARR/data/Input//ann_chr17_*.bed

/data/reddylab/Kuei/out/CombEffect_STARR/data/Input//ann_chr17_CREB1.bed
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input//ann_chr17_FOS.bed
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input//ann_chr17_GCR.bed
/data/reddylab/Kuei/out/CombEffect_STARR/data/Input//ann_chr17_JUN.bed
