In [None]:
# Ch05-4 - Variant Calling with FreeBayes

In [None]:
# Install FreeBayes
! brew install freebayes

In [None]:
# Import Libraries
import subprocess
import os

In [None]:
def run_command(cmd):
    """
    Run a shell command and check for errors.
    """
    print(f"Running: {' '.join(cmd)}")
    subprocess.run(cmd, check=True)

In [None]:
def index_reference(reference_fasta):
    """
    Index the reference genome for FreeBayes and Samtools.
    """
    print("Indexing the reference genome...")
    # Generate a FASTA index for samtools and FreeBayes
    run_command(["samtools", "faidx", reference_fasta])
    print("Reference indexing complete.\n")

In [None]:
def sort_and_index_bam(input_bam, output_sorted_bam):
    """
    Sort and index the BAM file using Samtools.
    """
    print("Sorting and indexing the BAM file...")
    # Sort the BAM file
    run_command(["samtools", "sort", "-o", output_sorted_bam, input_bam])
    # Index the sorted BAM file
    run_command(["samtools", "index", output_sorted_bam])
    print(f"Sorted BAM file: {output_sorted_bam}\n")

In [None]:
def call_variants_with_freebayes(reference_fasta, input_bam, output_vcf):
    """
    Call variants using FreeBayes.
    """
    print("Calling variants with FreeBayes...")
    cmd = [
        "freebayes",
        "-f", reference_fasta,  # Reference genome
        input_bam              # Sorted BAM file
    ]
    # Write the VCF output to a file
    with open(output_vcf, "w") as vcf_file:
        subprocess.run(cmd, stdout=vcf_file, check=True)
    print(f"Variants called successfully. Output VCF: {output_vcf}\n")

In [None]:
def main():
    """
    Main function to automate the FreeBayes variant calling workflow.
    """
    # Input files
    reference_fasta = "data/ecoli_genome/ecoli_reference.fasta"  # Path to the reference genome
    input_bam = "data/output/aligned_reads.sam"            # Input BAM file (unsorted)
    # Output files
    output_sorted_bam = "output/aligned_reads_sorted.bam"
    output_vcf = "output/variants.vcf"
    # Create output directory
    os.makedirs("output", exist_ok=True)
    try:
        # Step 1: Index the reference genome
        index_reference(reference_fasta)
        # Step 2: Sort and index the BAM file
        sort_and_index_bam(input_bam, output_sorted_bam)
        # Step 3: Call variants using FreeBayes
        call_variants_with_freebayes(reference_fasta, output_sorted_bam, output_vcf)
    except subprocess.CalledProcessError as e:
        print(f"Error occurred while running a command: {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")

if __name__ == "__main__":
    main()