In [1]:
import os

def filter_zero_abundance(input_file, output_file):
    """Filter out lines where the first column is '0.00'."""
    with open(input_file, 'r') as file:
        lines = file.readlines()

    with open(output_file, 'w') as file:
        for line in lines:
            if not line.strip().startswith('0.00'):
                file.write(line)

def process_files(file_list):
    """Process multiple files to remove lines with '0.00' in the first column."""
    for file_name in file_list:
        # Ensure the file exists to avoid errors
        if os.path.exists(file_name):
            # Generate a new file name for the filtered output
            base_name, extension = os.path.splitext(file_name)
            output_file_name = f"{base_name}_filtered{extension}"
            
            # Filter the file
            filter_zero_abundance(file_name, output_file_name)
            print(f"Processed {file_name} -> {output_file_name}")
        else:
            print(f"File not found: {file_name}")

# List of input files to process
input_files = ['Bracken_PlusPF_Capped_8_Kraken_style_report.tabular', 'Bracken_PlusPF_Capped_16_Kraken_style_report.tabular', 'Bracken_PlusPF_Kraken_style_report.tabular', "Kraken2_PlusPF_Report.tabular", "Kraken2_PlusPF_Capped16_Report.tabular", "Kraken2_PlusPF_Capped8_Report.tabular"]  # Update with your actual file names

# Process all listed files
process_files(input_files)


File not found: Bracken_PlusPF_Capped_8_Kraken_style_report
File not found: Bracken_PlusPF_Capped_16_Kraken_style_report
Processed Bracken_PlusPF_Kraken_style_report.tabular -> Bracken_PlusPF_Kraken_style_report_filtered.tabular
Processed Kraken2_PlusPF_Report.tabular -> Kraken2_PlusPF_Report_filtered.tabular
Processed Kraken2_PlusPF_Capped16_Report.tabular -> Kraken2_PlusPF_Capped16_Report_filtered.tabular
Processed Kraken2_PlusPF_Capped8_Report.tabular -> Kraken2_PlusPF_Capped8_Report_filtered.tabular
