<a href="https://colab.research.google.com/github/Deepthivarughese/NGS-Data-Analysis/blob/main/Alignment_using_BWA_and_Mutation_calling_using_Mutect2_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!apt-get install openjdk-11-jre -y

In [None]:
!wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4.3.0.0.zip


In [None]:
!unzip gatk-4.3.0.0.zip

In [None]:
import os
os.environ['GATK_HOME'] = '/content/gatk-4.3.0.0'
os.environ['PATH'] += ':/content/gatk-4.3.0.0'

In [None]:
!gatk --version

In [None]:
# Install BWA, SAMtools
!apt-get update
!apt-get install -y bwa samtools

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
!wget ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz
!gunzip hg38.fa.gz

In [None]:
!bwa index hg38.fa

In [None]:
!bwa mem hg38.fa Norm_R1.fastq Norm_R2.fastq > normal_aligned.sam
!bwa mem hg38.fa Tumor_R1.fastq Tumor_R2.fastq > tumor_aligned.sam

In [None]:
!samtools view -Sb normal_aligned.sam > normal_aligned.bam
!samtools view -Sb tumor_aligned.sam > tumor_aligned.bam

In [None]:
!samtools sort normal_aligned.bam -o normal_sorted.bam
!samtools sort tumor_aligned.bam -o tumor_sorted.bam

In [None]:
!samtools index normal_sorted.bam
!samtools index tumor_sorted.bam

In [None]:
!gatk MarkDuplicates -I normal_sorted.bam -O normal_marked.bam -M normal_metrics.txt
!gatk MarkDuplicates -I tumor_sorted.bam -O tumor_marked.bam -M tumor_metrics.txt

In [None]:
# Download the sorted BAM file
from google.colab import files
files.download('normal_marked.bam')
files.download('tumor_marked.bam')

In [None]:
# Download the sorted BAM file
from google.colab import files
files.download('normal_metrics.txt')
files.download('tumor_metrics.txt')

In [None]:
# Base quality score recalibration (this requires known variant sites like dbSNP)
!gatk BaseRecalibrator -I normal_marked.bam -R hg38.fa --known-sites dbsnp.vcf -O normal_recal_data.table
!gatk BaseRecalibrator -I tumor_marked.bam -R hg38.fa --known-sites dbsnp.vcf -O tumor_recal_data.table

# Apply recalibration
!gatk ApplyBQSR -I normal_marked.bam -R hg38.fa --bqsr-recal-file normal_recal_data.table -O normal_recalibrated.bam
!gatk ApplyBQSR -I tumor_marked.bam -R hg38.fa --bqsr-recal-file tumor_recal_data.table -O tumor_recalibrated.bam


In [None]:
!gatk HaplotypeCaller -I normal_recalibrated.bam -R hg38.fa -O normal_variants.vcf
!gatk HaplotypeCaller -I tumor_recalibrated.bam -R hg38.fa -O tumor_variants.vcf

In [None]:
# Generate the index file (.fai)
!samtools faidx /content/hg38.fa

In [None]:
normal_bam = "/content/normal_sorted.bam"
tumor_bam = "/content/tumor_sorted.bam"
reference = "/content/hg38.fa"

In [None]:
# For Somatic mutation detection
!gatk Mutect2 -R {reference} -I {normal_bam} -I {tumor_bam} -O output_mutect2.vcf

In [None]:
# Filter the Mutect2 VCF results
!gatk Mutect2 -V output_mutect2.vcf -R {reference} -O filtered_output.vcf

In [None]:
from google.colab import files
files.download('fileterd_output.vcf')