In [None]:
%%bash
date

########################################
##### need to specify ################## 
ecoli_dir=/path/to/save/ecoli/fasta/
projPath=/path/to/save/project/
cores=4

histNames=(C_H3K27me3 C_H3K4me3 C_IgG \
           M_H3K27me3 M_H3K4me3 M_IgG \
           P_H3K27me3 P_H3K4me3 P_IgG)

########################################
########################################


##################################################################
## Part 1. Download ecoli genome reference
##################################################################
ecoli_bowtie2Index=${ecoli_dir}/bowtie2Index/

mkdir -p ${ecoli_bowtie2Index}

# https://www.ebi.ac.uk/ena/browser/view/U00096
wget https://www.ebi.ac.uk/ena/browser/api/fasta/U00096.3 -P ${ecoli_dir} > ${ecoli_dir}/download.log 2>&1

##################################################################
## Part 2. Build Bowtie2 index for U00096.3 / K12-MG1655
##################################################################

bowtie2-build --thread ${cores} ${ecoli_dir}/U00096.3 ${ecoli_bowtie2Index}/ecoli_U00096.3 > ${ecoli_bowtie2Index}/build.log 2>&1

##################################################################
## Part 3 Alignment to spike-in genome
##################################################################
trimmedPath=${projPath}/processed_data/CUT_RUN/Part1_alignment/
outPath=${projPath}/processed_data/CUT_RUN/Part2_ecoli_alignment/

mkdir -p ${outPath}/alignment/sam/bowtie2_summary
mkdir -p ${outPath}/alignment/bam
mkdir -p ${outPath}/alignment/bed
mkdir -p ${outPath}/alignment/bedgraph

# Loop through each histone marker name
for histName in "${histNames[@]}"
do
bowtie2 --very-sensitive --local --very-sensitive-local --no-unal --no-mixed --no-discordant \
        --phred33 \
        -I 10 -X 700 -p ${cores} -x ${ecoli_bowtie2Index}/ecoli_U00096.3 \
        -1 ${trimmedPath}/alignment/trimming/*${histName}*_R1*.fq.gz \
        -2 ${trimmedPath}/alignment/trimming/*${histName}*_R2*.fq.gz \
        -S ${outPath}/alignment/sam/${histName}_bowtie2_spikeIn.sam &> ${outPath}/alignment/sam/bowtie2_summary/${histName}_bowtie2_spikeIn.txt
done

##################################################################
## Part 4. Calculate scale factor
##################################################################
ecoliPath=${projPath}/processed_data/CUT_RUN/Part2_ecoli_alignment/

for histName in "${histNames[@]}"
do

    totalNum=$(sed -n '2p' $ecoliPath/alignment/sam/bowtie2_summary/${histName}_bowtie2_spikeIn.txt | awk '{print $1}')
    ecoliNum=$(awk 'NR==4{sum+=$1} NR==5{sum+=$1; print sum}' $ecoliPath/alignment/sam/bowtie2_summary/${histName}_bowtie2_spikeIn.txt)
    scale_factor=$(echo "scale=4; $totalNum / $ecoliNum / 100" | bc)
    
    {
        echo $totalNum
        echo $ecoliNum
        echo $scale_factor
    } > ${ecoliPath}/alignment/sam/bowtie2_summary/${histName}_bowtie2_scale_factor.txt
    
done

date