## Input

In [21]:
### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-5 \
    --mem 8G \
    -o ${FD_LOG}/bootstrap_input.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
FD_BEDS=($(ls -d ${FD_WRK}/data/Input*/))
CHROM=chr17
SEED=123

### set input
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed
N=$(cat ${FD_BED}/${FN_BED} | wc -l)

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_BED}/${FN_BED}
echo "Total #Lines:      " ${N}
echo
echo "Show the first few lines of the input file"
echo ${FD_BED}/${FN_BED}
head ${FD_BED}/${FN_BED}

### init: seed function
get_seeded_random()
{
  seed="$1";
  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt </dev/zero 2>/dev/null;
}

### init: output directory
FD_OUT=${FD_WRK}/bootstrap/$(basename $FD_BED)
mkdir -p ${FD_OUT}

### loop: bootstrapping lines in input bed files
for i in {1..10}; do
    ### set output & print message
    FN_OUT=${CHROM}_B${i}.bed
    seed=$(( $SEED + $i ))
    echo
    echo "Output file: " ${FD_OUT}/${FN_OUT}
    echo "Parameters:  SEED=${seed}" 
    
    ### random shuffle with replacement and sort
    shuf -rn ${N} ${FD_BED}/${FN_BED} --random-source=<(get_seeded_random $seed) > ${FD_OUT}/tmp.bed
    bedtools sort -i ${FD_OUT}/tmp.bed > ${FD_OUT}/${FN_OUT}
    
    ### print end message
    echo
    echo "Show the first few lines of the output file"
    head -n 3 ${FD_OUT}/${FN_OUT}
done

echo
echo "Done"

EOF

Submitted batch job 11869503


In [22]:
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/bootstrap_input.1.txt

Slurm Array Index:  1
Input  file:        /work/kk319/out/CombEffect_STARR/data/Input1//chr17.bed
Total #Lines:       630820

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/data/Input1//chr17.bed
chr17	201123	201925
chr17	201160	201968
chr17	201202	202074
chr17	201259	202052
chr17	201366	202130
chr17	206174	207307
chr17	206269	207286
chr17	206283	207194
chr17	206324	207359
chr17	206326	207274

Output file:  /work/kk319/out/CombEffect_STARR/bootstrap/Input1/chr17_B1.bed
Parameters:  SEED=124

Show the first few lines of the output file
chr17	201123	201925
chr17	201123	201925
chr17	201202	202074

Output file:  /work/kk319/out/CombEffect_STARR/bootstrap/Input1/chr17_B2.bed
Parameters:  SEED=125

Show the first few lines of the output file
chr17	201160	201968
chr17	201160	201968
chr17	201160	201968

Output file:  /work/kk319/out/CombEffect_STARR/bootstrap/Input1/chr17_B3.bed
Parameters:  SEED=126

Show the first few lines of the output file
chr17	201202	202074


## Output (DMSO)

In [23]:
### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-4 \
    --mem 8G \
    -o ${FD_LOG}/bootstrap_output_dmso.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
FD_BEDS=($(ls -d ${FD_WRK}/data/TFX*_DMSO/))
CHROM=chr17
SEED=123

### set input
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed
N=$(cat ${FD_BED}/${FN_BED} | wc -l)

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_BED}/${FN_BED}
echo "Total #Lines:      " ${N}
echo
echo "Show the first few lines of the input file"
echo ${FD_BED}/${FN_BED}
head ${FD_BED}/${FN_BED}

### init: seed function
get_seeded_random()
{
  seed="$1";
  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt </dev/zero 2>/dev/null;
}

### init: output directory
FD_OUT=${FD_WRK}/bootstrap/$(basename $FD_BED)
mkdir -p ${FD_OUT}

### loop: bootstrapping lines in input bed files
for i in {1..10}; do
    ### set output & print message
    FN_OUT=${CHROM}_B${i}.bed
    seed=$(( $SEED + $i ))
    echo
    echo "Output file: " ${FD_OUT}/${FN_OUT}
    echo "Parameters:  SEED=${seed}" 
    
    ### random shuffle with replacement and sort
    shuf -rn ${N} ${FD_BED}/${FN_BED} --random-source=<(get_seeded_random $seed) > ${FD_OUT}/tmp.bed
    bedtools sort -i ${FD_OUT}/tmp.bed > ${FD_OUT}/${FN_OUT}
    
    ### print end message
    echo
    echo "Show the first few lines of the output file"
    head -n 3 ${FD_OUT}/${FN_OUT}
done

echo
echo "Done"

EOF

Submitted batch job 11869632


In [26]:
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/bootstrap_output_dmso.1.txt

Slurm Array Index:  1
Input  file:        /work/kk319/out/CombEffect_STARR/data/TFX3_DMSO//chr17.bed
Total #Lines:       1088938

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/data/TFX3_DMSO//chr17.bed
chr17	159962	160793
chr17	185415	186343
chr17	201202	202074
chr17	201202	202073
chr17	201202	202074
chr17	201205	202074
chr17	201287	202078
chr17	205401	206360
chr17	206145	207313
chr17	206145	207313

Output file:  /work/kk319/out/CombEffect_STARR/bootstrap/TFX3_DMSO/chr17_B1.bed
Parameters:  SEED=124

Show the first few lines of the output file
chr17	201202	202074
chr17	201202	202074
chr17	201202	202074

Output file:  /work/kk319/out/CombEffect_STARR/bootstrap/TFX3_DMSO/chr17_B2.bed
Parameters:  SEED=125

Show the first few lines of the output file
chr17	159962	160793
chr17	185415	186343
chr17	185415	186343

Output file:  /work/kk319/out/CombEffect_STARR/bootstrap/TFX3_DMSO/chr17_B3.bed
Parameters:  SEED=126

Show the first few lines of the output file
chr1

## Output (Dex)

In [24]:
### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-4 \
    --mem 8G \
    -o ${FD_LOG}/bootstrap_output_dex.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
FD_BEDS=($(ls -d ${FD_WRK}/data/TFX*_Dex/))
CHROM=chr17
SEED=123

### set input
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed
N=$(cat ${FD_BED}/${FN_BED} | wc -l)

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_BED}/${FN_BED}
echo "Total #Lines:      " ${N}
echo
echo "Show the first few lines of the input file"
echo ${FD_BED}/${FN_BED}
head ${FD_BED}/${FN_BED}

### init: seed function
get_seeded_random()
{
  seed="$1";
  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt </dev/zero 2>/dev/null;
}

### init: output directory
FD_OUT=${FD_WRK}/bootstrap/$(basename $FD_BED)
mkdir -p ${FD_OUT}

### loop: bootstrapping lines in input bed files
for i in {1..10}; do
    ### set output & print message
    FN_OUT=${CHROM}_B${i}.bed
    seed=$(( $SEED + $i ))
    echo
    echo "Output file: " ${FD_OUT}/${FN_OUT}
    echo "Parameters:  SEED=${seed}" 
    
    ### random shuffle with replacement and sort
    shuf -rn ${N} ${FD_BED}/${FN_BED} --random-source=<(get_seeded_random $seed) > ${FD_OUT}/tmp.bed
    bedtools sort -i ${FD_OUT}/tmp.bed > ${FD_OUT}/${FN_OUT}
    
    ### print end message
    echo
    echo "Show the first few lines of the output file"
    head -n 3 ${FD_OUT}/${FN_OUT}
done

echo
echo "Done"

EOF

Submitted batch job 11869674


In [29]:
source config.sh
FD_LOG=${FD_WRK}/log
cat ${FD_LOG}/bootstrap_output_dex.1.txt

Slurm Array Index:  1
Input  file:        /work/kk319/out/CombEffect_STARR/data/TFX3_Dex//chr17.bed
Total #Lines:       1102681

Show the first few lines of the input file
/work/kk319/out/CombEffect_STARR/data/TFX3_Dex//chr17.bed
chr17	120738	123102
chr17	169749	171139
chr17	182083	183038
chr17	197577	198557
chr17	205592	206544
chr17	205592	206545
chr17	205592	206545
chr17	205594	206545
chr17	205594	206544
chr17	205646	206631

Output file:  /work/kk319/out/CombEffect_STARR/bootstrap/TFX3_Dex/chr17_B1.bed
Parameters:  SEED=124

Show the first few lines of the output file
chr17	120738	123102
chr17	120738	123102
chr17	169749	171139

Output file:  /work/kk319/out/CombEffect_STARR/bootstrap/TFX3_Dex/chr17_B2.bed
Parameters:  SEED=125

Show the first few lines of the output file
chr17	169749	171139
chr17	169749	171139
chr17	169749	171139

Output file:  /work/kk319/out/CombEffect_STARR/bootstrap/TFX3_Dex/chr17_B3.bed
Parameters:  SEED=126

Show the first few lines of the output file
chr17	120

In [6]:
head /work/kk319/out/CombEffect_STARR/bootstrap/Input1/chr17_B1.bed

chr17	8232434	8233435
chr17	42935759	42936631
chr17	79599365	79600407
chr17	15433887	15434759
chr17	53233301	53234247
chr17	10736377	10737276
chr17	42191341	42192170
chr17	41433508	41434389
chr17	49683106	49684032
chr17	58511444	58512366


In [12]:
head /work/kk319/out/CombEffect_STARR/bootstrap/Input1/chr17_B1.bed > test_unsort.bed

In [13]:
cat test_unsort.bed

chr17	8232434	8233435
chr17	42935759	42936631
chr17	79599365	79600407
chr17	15433887	15434759
chr17	53233301	53234247
chr17	10736377	10737276
chr17	42191341	42192170
chr17	41433508	41434389
chr17	49683106	49684032
chr17	58511444	58512366


In [14]:
bedtools sort -i test_unsort.bed

chr17	8232434	8233435
chr17	10736377	10737276
chr17	15433887	15434759
chr17	41433508	41434389
chr17	42191341	42192170
chr17	42935759	42936631
chr17	49683106	49684032
chr17	53233301	53234247
chr17	58511444	58512366
chr17	79599365	79600407


In [15]:
bedtools sort -i test_unsort.bed > test_sort.bed

In [16]:
cat test_sort.bed

chr17	8232434	8233435
chr17	10736377	10737276
chr17	15433887	15434759
chr17	41433508	41434389
chr17	42191341	42192170
chr17	42935759	42936631
chr17	49683106	49684032
chr17	53233301	53234247
chr17	58511444	58512366
chr17	79599365	79600407


In [3]:
SEED=123
for i in {1..5}; do
    seed=$(( $SEED + $i ))
    echo $seed
done

124
125
126
127
128


In [None]:
### set environment on DCC
module load Bedtools
source config.sh
FD_LOG=${FD_WRK}/log

### run script using sbatch
#sbatch -pnew,all \
sbatch -p scavenger \
    --array=0-5 \
    --mem 8G \
    -o ${FD_LOG}/kmer_count_per1_input_chr17.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories and global parameters
source config.sh
FD_BEDS=($(ls -d ${FD_WRK}/data/Input*/))
CHROM=chr17
TARGET=PER1

### set input files
FD_BED=${FD_BEDS[${SLURM_ARRAY_TASK_ID}]}
FN_BED=${CHROM}.bed

FD_KMR=${FD_WRK}/kmer
FN_KMR=kmer_${TARGET}.bed

### set output files
FD_OUT=${FD_KMR}/$(basename ${FD_BED})
FN_OUT=kmer_PER1.bedpe

FD_CNT=${FD_KMR}/$(basename ${FD_BED})
FN_CNT=kmer_PER1_count.bed

### print start message
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Input  file:       " ${FD_KMR}/${FN_KMR}
echo "Input  file:       " ${FD_BED}/${FN_BED}
echo "Output file:       " ${FD_OUT}/${FN_OUT}
echo "Output file:       " ${FD_CNT}/${FN_CNT}
echo
echo "Show the first few lines of the input file"
echo ${FD_BED}/${FN_BED}
head ${FD_BED}/${FN_BED}
echo
echo "Show the first few lines of the input file"
echo ${FD_KMR}/${FN_KMR}
head ${FD_KMR}/${FN_KMR}

### init: create folder
mkdir -p ${FD_OUT}

### intersect
FP_BED_A=${FD_KMR}/${FN_KMR}
FP_BED_B=${FD_BED}/${FN_BED}
FP_BED_O=${FD_OUT}/${FN_OUT}
bedtools intersect -a ${FP_BED_A} -b ${FP_BED_B} -wo > ${FP_BED_O}

### count the kmers
cat ${FP_BED_O} |\
    awk -F $'\t' '($7 == 750)' |\
    cut -f1-3 |\
    uniq -c   |\
    awk '{$(NF+1)=$1;$1=""}1'   |\
    sed -e 's/^[[:space:]]*//'  |\
    sed -e 's/[[:space:]]/\t/g' > ${FD_CNT}/${FN_CNT}

### print end message
echo
echo "Show the first few lines of the output file"
echo ${FD_OUT}/${FN_OUT}
head ${FD_OUT}/${FN_OUT}
echo
echo "Show the first few lines of the output file"
echo ${FD_CNT}/${FN_CNT}
head ${FD_CNT}/${FN_CNT}

In [1]:
%%bash
cat > shuf_with_seed.sh << EOF
#!/bin/bash

# seeding adopted from https://stackoverflow.com/a/41962458/7820599
get_seeded_random()
{
  seed="$1";
  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \
    </dev/zero 2>/dev/null;
}

seed=0;

# Option parsing adopted from https://stackoverflow.com/a/14203146
REST=""
while [[ $# -gt 0 ]]
do
    key="$1"
    case $key in
    -s)
        seed="$2"
        shift
        shift
        ;;
    *)    # unknown option
        REST="$REST $1"
        shift # past argument
        ;;
    esac
done

shuf --random-source=<(get_seeded_random $seed) $REST
EOF

In [2]:
cat shuf_with_seed.sh

#!/bin/bash

# seeding adopted from https://stackoverflow.com/a/41962458/7820599
get_seeded_random()
{
  seed="";
  openssl enc -aes-256-ctr -pass pass:"" -nosalt     </dev/zero 2>/dev/null;
}

seed=0;

# Option parsing adopted from https://stackoverflow.com/a/14203146
REST=""
while [[ 0 -gt 0 ]]
do
    key=""
    case  in
    -s)
        seed=""
        shift
        shift
        ;;
    *)    # unknown option
        REST=" "
        shift # past argument
        ;;
    esac
done

shuf --random-source=<(get_seeded_random ) 


In [5]:
%%bash
cat > test_shuf.txt << EOF
1
2
3
4
5
EOF

cat test_shuf.txt

1
2
3
4
5


In [13]:
sort test_shuf.txt

1
2
3
4
5


In [11]:
%%bash
shuf -r -n 10 test_shuf.txt

4
2
4
4
2
1
1
2
1
3


In [18]:
%%bash
cat test_shuf.txt | wc -l

5


In [22]:
%%bash
FPATH=test_shuf.txt
N=$(cat ${FPATH} | wc -l)
echo $N
echo +++++++++++++++++++++
shuf -rn $N $FPATH

5
+++++++++++++++++++++
4
3
3
1
2


In [36]:
%%bash
for i in {1..3}; do
echo $i
done

1
2
3


In [40]:
%%bash
get_seeded_random()
{
  seed="$1";
  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt </dev/zero 2>/dev/null;
}

FPATH=test_shuf.txt
N=$(cat ${FPATH} | wc -l)
echo $N

for i in {1..3}; do 
    echo +++++++++++ Shuf No Seed  ++++++++++
    shuf -rn $N $FPATH
    echo +++++++++++ Shuf Seed 123 ++++++++++
    SEED=123
    shuf -rn $N $FPATH --random-source=<(get_seeded_random $SEED)
done

5
+++++++++++ Shuf No Seed ++++++++++
3
2
1
2
5
+++++++++++ Shuf Seed 123 ++++++++++
1
5
1
3
3
+++++++++++ Shuf No Seed ++++++++++
4
2
2
5
2
+++++++++++ Shuf Seed 123 ++++++++++
1
5
1
3
3
+++++++++++ Shuf No Seed ++++++++++
1
4
5
3
3
+++++++++++ Shuf Seed 123 ++++++++++
1
5
1
3
3


In [42]:
%%bash
cat > test_lines.txt << EOF
A
B
C
D
EOF

cat test_lines.txt

A
B
C
D


In [49]:
%%bash
ARRAY_IDX=4
idx=$(sed -n "${ARRAY_IDX}p" test_lines); echo $idx

D


In [19]:
%%bash

seq 10 > input
shuf -rn $(cat input | wc -l) input

1
7
6
1
3
1
4
5
9
10


In [None]:
# seeding adopted from https://stackoverflow.com/a/41962458/7820599
get_seeded_random()
{
  seed="$1";
  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \
    </dev/zero 2>/dev/null;
}


In [8]:
%%bash
chmod +x shuf_with_seed.sh
SEED=123
./shuf_with_seed.sh $SEED -r -n 10 test_shuf.txt

./shuf_with_seed.sh: line 18: syntax error near unexpected token `-s'
./shuf_with_seed.sh: line 18: `    -s)'


CalledProcessError: Command 'b'chmod +x shuf_with_seed.sh\nSEED=123\n./shuf_with_seed.sh $SEED -r -n 10 test_shuf.txt\n'' returned non-zero exit status 2.