In [1]:
import subprocess
import os
import time

def run_command(command):
    print(f"Running command: {command}")
    start_time = time.time()
    result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    elapsed_time = time.time() - start_time
    print(f"Command '{command}' took {elapsed_time:.2f} seconds to run.")
    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print("Error:", result.stderr)
    return result, elapsed_time

def main():
    input_fastq_filename = "rbcL_Qiagen_tomato_5000.fastq"
    input_fastq_path = f"assets/input/{input_fastq_filename}"
    base_name = os.path.splitext(input_fastq_filename)[0]

    output_base_dir = "assets/output"
    output_dir = os.path.join(output_base_dir, base_name)
    os.makedirs(output_dir, exist_ok=True)

    total_time_taken = 0

    # Step 0: Quality Control with NanoFilt
    filtered_fastq_path = os.path.join(output_dir, f"{base_name}_filtered.fastq")
    nanofilt_command = f"cat {input_fastq_path} | NanoFilt -q 10 -l 800 > {filtered_fastq_path}"
    print("Running Quality Control with NanoFilt...")
    _, nanofilt_time = run_command(nanofilt_command)
    total_time_taken += nanofilt_time

    print(f"\nNanoFilt quality control took {nanofilt_time:.2f} seconds.")

    # Step 1: Align the reads to each other with minimap2
    paf_alignment_path = os.path.join(output_dir, f"{base_name}_reads.paf")
    minimap2_command = f"minimap2 -x ava-ont {filtered_fastq_path} {filtered_fastq_path} > {paf_alignment_path}"
    print("\n\nRunning read alignment with minimap2...")
    _, minimap2_time = run_command(minimap2_command)
    total_time_taken += minimap2_time

    print(f"\nMinimap2 alignment took {minimap2_time:.2f} seconds.")

    # Step 2: Generate the consensus sequence with racon
    consensus_fasta_path = os.path.join(output_dir, f"consensus_{base_name}.fasta")
    racon_command = f"racon -m 8 -x -8 -g -16 -w 250 {filtered_fastq_path} {paf_alignment_path} {filtered_fastq_path} > {consensus_fasta_path}"
    print("\n\nGenerating consensus sequence with racon...")
    _, racon_time = run_command(racon_command)
    total_time_taken += racon_time

    print(f"\nRacon consensus calling took {racon_time:.2f} seconds.")

    # Print out the total time for the pipeline
    print(f"Total time taken for the pipeline: {total_time_taken:.2f} seconds.")

    print(f"Pipeline completed. Find outputs in {output_dir}.")

if __name__ == "__main__":
    main()

Running Quality Control with NanoFilt...
Running command: cat assets/input/rbcL_Qiagen_tomato_5000.fastq | NanoFilt -q 10 -l 800 > assets/output/rbcL_Qiagen_tomato_5000/rbcL_Qiagen_tomato_5000_filtered.fastq
Command 'cat assets/input/rbcL_Qiagen_tomato_5000.fastq | NanoFilt -q 10 -l 800 > assets/output/rbcL_Qiagen_tomato_5000/rbcL_Qiagen_tomato_5000_filtered.fastq' took 0.75 seconds to run.

NanoFilt quality control took 0.75 seconds.


Running read alignment with minimap2...
Running command: minimap2 -x ava-ont assets/output/rbcL_Qiagen_tomato_5000/rbcL_Qiagen_tomato_5000_filtered.fastq assets/output/rbcL_Qiagen_tomato_5000/rbcL_Qiagen_tomato_5000_filtered.fastq > assets/output/rbcL_Qiagen_tomato_5000/rbcL_Qiagen_tomato_5000_reads.paf
Command 'minimap2 -x ava-ont assets/output/rbcL_Qiagen_tomato_5000/rbcL_Qiagen_tomato_5000_filtered.fastq assets/output/rbcL_Qiagen_tomato_5000/rbcL_Qiagen_tomato_5000_filtered.fastq > assets/output/rbcL_Qiagen_tomato_5000/rbcL_Qiagen_tomato_5000_reads.p