In [None]:
import pandas as pd
from Bio import SeqIO

# read viral identification report
report = pd.read_csv(str(snakemake.input.viral_report))

# filter based on config paramters
# extract mgv virus sequences
if snakemake.params.run_mgv: 
    mgv_report = report[report['MGV_viral'] == 'Viral']
else:
    mgv_report = []

if snakemake.params.run_vf:
    vf_report = report[(report['VirFinder_score'] >= snakemake.params.vf_score)]
else:
    vf_report = []

if snakemake.params.run_vs:
    report['VirSorter_cat'] = report.apply(lambda x: x.Category_number if x.Category_text == 'complete_phage' else x.Category_number + 3, axis=1)
    vs_report = report[report['VirSorter_cat'].isin(snakemake.params.vs_cat)]
else:
    vs_report = []

if snakemake.params.run_vs2:
    vs2_report = report[report['VirSorter2_max_score'] >= snakemake.params.vs2_score]
else:
    vs2_report = []

if snakemake.params.run_dvf:
    dvf_report = report[(report['DeepVirFinder_score'] >= snakemake.params.dvf_score)]
else:
    dvf_report = []

if snakemake.params.run_vb:
    vb_report = report[report['VIBRANT_viruses'].notnull()]
else:
    vb_report = []

if snakemake.params.run_genomad:
    genomad_report = report[report['virus_score'] >= snakemake.params.genomad_score]
else:
    genomad_report = []

# list to store all combined sequences
combined_sequences = []

# extract viral contigs
for record in SeqIO.parse(str(snakemake.input.contigs), "fasta"):
    record.id = snakemake.params.assembly + '_' + record.id
    if record.id in set(mgv_report['contig_id']):
        combined_sequences.append(record)
    elif record.id in set(vf_report['contig_id']):
        combined_sequences.append(record)
    elif record.id in set(vs_report['contig_id']):
        combined_sequences.append(record)
    elif record.id in set(vs2_report['contig_id']):
        combined_sequences.append(record)
    elif record.id in set(dvf_report['contig_id']):
        combined_sequences.append(record)
    elif record.id in set(vb_report['contig_id']):
        combined_sequences.append(record)
    elif record.id in set(cenote_taker2_report['contig_id']):
        combined_sequences.append(record)

SeqIO.write(combined_sequences, str(snakemake.output), "fasta")