In [None]:
import os

# 文件夹路径
input_dir = "/storage/jufengLab/luogaoyang/metagenome_project/DSR/4_annotation/KO_scan_out"
output_dir = os.path.join(input_dir, "filtered_results_tsv")

# 创建输出文件夹（如果不存在）
os.makedirs(output_dir, exist_ok=True)

# 遍历文件夹中的所有 .txt 文件
for filename in os.listdir(input_dir):
    # 只处理以 .txt 结尾的文件，忽略其他文件
    if filename.endswith(".txt"):
        input_file = os.path.join(input_dir, filename)
        output_file = os.path.join(output_dir, filename)

        # 存储基因的最佳匹配
        gene_best_match = {}

        # 解析文件
        with open(input_file, "r") as infile:
            for line in infile:
                # 跳过注释行
                if line.startswith("#") or line.strip() == "":
                    continue
                
                # 提取列数据
                columns = line.strip().split()
                gene_name = columns[0]  # 基因名
                ko = columns[1]         # KO 编号
                threshold = columns[2]  # 阈值
                score = float(columns[3])  # 分数
                e_value = float(columns[4])  # E-value
                ko_definition = " ".join(columns[5:])  # KO 定义（可能包含空格）

                # 过滤掉 E-value >= 1e-3 的条目
                if e_value >= 1e-3:
                    continue

                # 如果基因名不存在或当前分数更高，则更新
                if gene_name not in gene_best_match or score > gene_best_match[gene_name]["score"]:
                    gene_best_match[gene_name] = {
                        "gene_name": gene_name,
                        "ko": ko,
                        "threshold": threshold,
                        "score": score,
                        "e_value": e_value,
                        "ko_definition": ko_definition
                    }

        # 写入结果到输出文件（制表符分隔）
        with open(output_file, "w") as outfile:
            # 写入表头
            outfile.write("Gene_Name\tKO\tThreshold\tScore\tE-value\tKO_Definition\n")
            
            # 写入每个基因的最佳匹配
            for gene, data in gene_best_match.items():
                outfile.write(
                    f"{data['gene_name']}\t{data['ko']}\t{data['threshold']}\t{data['score']}\t{data['e_value']}\t{data['ko_definition']}\n"
                )

        print(f"处理完成：{filename} -> {output_file}")

处理完成：S1WOOD.contigs_5M_contigs_fna2faa_koScan_out.txt -> /storage/jufengLab/luogaoyang/metagenome_project/DSR/4_annotation/KO_scan_out/filtered_results_tsv/S1WOOD.contigs_5M_contigs_fna2faa_koScan_out.txt
