In [1]:
import subprocess
import time
import os

def run_command(command):
    start_time = time.time()
    result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Command '{command}' took {elapsed_time:.2f} seconds to run.")
    print(result.stdout)
    print(result.stderr)
    return result, elapsed_time

# Assuming your reads are in 'reads.fastq'
input_fastq_filename = "rbcL_Qiagen_tomato_5000.fastq"
input_fastq_path = f"assets/input/{input_fastq_filename}"
base_name = os.path.splitext(input_fastq_filename)[0]  # Extract base name without extension

output_base_dir = "assets/output"
output_dir = os.path.join(output_base_dir, base_name)

# Make sure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Step 1: Align the reads to each other
paf_alignment_filename = f"reads_{base_name}.paf"
paf_alignment_path = os.path.join(output_dir, paf_alignment_filename)
minimap2_command = f"minimap2 -x ava-ont {input_fastq_path} {input_fastq_path} > {paf_alignment_path}"
_, minimap2_time = run_command(minimap2_command)

# Step 2: Use Racon to generate the consensus sequence from the alignment
consensus_fasta_filename = f"consensus_{base_name}.fasta"
consensus_fasta_path = os.path.join(output_dir, consensus_fasta_filename)
racon_command = f"racon -m 8 -x -6 -g -8 -w 500 {input_fastq_path} {paf_alignment_path} {input_fastq_path} > {consensus_fasta_path}"
_, racon_time = run_command(racon_command)

# Print out the total time for each step
print(f"Minimap2 alignment took {minimap2_time:.2f} seconds.")
print(f"Racon consensus calling took {racon_time:.2f} seconds.")


Command 'minimap2 -x ava-ont assets/input/rbcL_Qiagen_tomato_5000.fastq assets/input/rbcL_Qiagen_tomato_5000.fastq > assets/output/rbcL_Qiagen_tomato_5000/reads_rbcL_Qiagen_tomato_5000.paf' took 37.68 seconds to run.

[M::mm_idx_gen::0.078*1.03] collected minimizers
[M::mm_idx_gen::0.100*1.45] sorted minimizers
[M::main::0.100*1.45] loaded/built the index for 5000 target sequence(s)
[M::mm_mapopt_update::0.103*1.43] mid_occ = 1478
[M::mm_idx_stat] kmer size: 15; skip: 5; is_hpc: 0; #seq: 5000
[M::mm_idx_stat::0.106*1.42] distinct minimizers: 171791 (79.28% are singletons); average occurrences: 4.250; average spacing: 3.020; total length: 2204496
[M::worker_pipeline::37.620*1.64] mapped 5000 sequences
[M::main] Version: 2.22-r1101
[M::main] CMD: minimap2 -x ava-ont assets/input/rbcL_Qiagen_tomato_5000.fastq assets/input/rbcL_Qiagen_tomato_5000.fastq
[M::main] Real time: 37.627 sec; CPU: 61.676 sec; Peak RSS: 0.390 GB

Command 'racon -m 8 -x -6 -g -8 -w 500 assets/input/rbcL_Qiagen_tomat