## Align reads to genome using `HISAT2`

In [None]:
BASE_DIR="../data"
INDEX="../ref/genome/genome"
NUMPROC=250
PATTERN="da"
pwd

In [None]:
hisat_exe="$HOME/bin/hisat2-2.2.1/hisat2"

for sample in $(ls $BASE_DIR | grep "$PATTERN")
do
    sample_dir="$BASE_DIR/$sample"
    result_dir="../results/$sample"
    
    mkdir -p $result_dir
    
    echo "Aligning $sample ..."
    $hisat_exe -p $NUMPROC --dta \
        -x $INDEX \
        -1 $sample_dir/R1.fastq.gz -2 $sample_dir/R2.fastq.gz \
        | samtools view -bhS - > "$result_dir/$sample.bam"
    
done

In [None]:
for sample in $(ls $BASE_DIR | grep "$PATTERN")
do
    result_dir="../results/$sample"
    samtools sort -@ $NUMPROC -o "$result_dir/$sample.sorted.bam" "$result_dir/$sample.bam"
    samtools index -@ $NUMPROC "$result_dir/$sample.sorted.bam"
done

## Assemble transcripts

In [None]:
stringtie_exe="/home/ilya/bin/stringtie-2.1.6.Linux_x86_64/stringtie"

for sample in $(ls $BASE_DIR | grep "$PATTERN")
do
    result_dir="../results/$sample"
    $stringtie_exe -p $NUMPROC -G "../ref/genes/genes.gtf" -o "$result_dir/${sample}.gtf" "$result_dir/${sample}.sorted.bam"
done

In [None]:
ls -lah ../results/*

## Merge `.gtf` annotations

In [None]:
ls -lah ../ref

In [None]:
REF_DIR="../ref/"

for mergelist in $(ls $REF_DIR | grep "mergelist_")
do
    tissue=$(echo $mergelist | cut -d'_' -f2 | cut -d'.' -f1)
    echo "Generating mergelist for: $tissue ..."
    $stringtie_exe --merge -p $NUMPROC -G "../ref/genes/genes.gtf" -o "../ref/merged_${tissue}.gtf" "$REF_DIR/$mergelist"
done

## Estimate abundances

In [None]:
cat ../ref/mergelist_DMD.txt

In [None]:
REF_DIR="../ref/"
stringtie_exe="/home/ilya/bin/stringtie-2.1.6.Linux_x86_64/stringtie"

for mergelist in $(ls $REF_DIR | grep "mergelist_")
do
    tissue=$(echo $mergelist | cut -d'_' -f2 | cut -d'.' -f1)
    echo "Processing: $tissue"
    for sampledir in $(cat "$REF_DIR/mergelist_${tissue}.txt")
    do
        sample=$(echo $sampledir | cut -d/ -f 3)
        result_dir="../results/$sample"
        $stringtie_exe -e -B -p $NUMPROC -G "../ref/merged_${tissue}.gtf" \
                -o "$result_dir/${sample}.gtf" \
                "$result_dir/$sample.sorted.bam"
    done
done

ls -lah ../results/da01