In [1]:
import sys
import pathlib
from IPython.core.getipython import get_ipython

# 获取当前Notebook的路径（适用于Jupyter环境）
ipython = get_ipython()
if ipython is not None:
    # 获取当前笔记本的路径
    notebook_path = ipython.run_line_magic('pwd', '')
    project_root = pathlib.Path(notebook_path).resolve().parent
    sys.path.append(str(project_root))

from cdr3pairsearch.search import search_paired_chains 


In [3]:
import os
import pandas as pd
from cdr3pairsearch import search_paired_chains
from cdr3pairsearch.distance import hamming_distance, edit_distance
from cdr3pairsearch.gene import process_gene

def test_distance_functions():
    """测试距离计算函数"""
    print("测试距离计算函数...")
    
    # 测试汉明距离
    seq1 = "ABCDE"
    seq2 = "ABXDE"
    assert hamming_distance(seq1, seq2) == 1, "汉明距离计算错误"
    
    # 测试编辑距离
    assert edit_distance("kitten", "sitting") == 3, "编辑距离计算错误"
    assert edit_distance("abc", "abc") == 0, "编辑距离计算错误"
    assert edit_distance("abc", "def") == 3, "编辑距离计算错误"
    
    print("距离计算函数测试通过\n")

def test_gene_processing():
    """测试基因名称处理函数"""
    print("测试基因名称处理函数...")
    
    assert process_gene("IGHV2-5*02") == "IGHV2-5", "基因处理错误"
    assert process_gene("IGLV1-51*01") == "IGLV1-51", "基因处理错误"
    assert process_gene("IGHJ5*02") == "IGHJ5", "基因处理错误"
    assert process_gene("IGLJ3*02") == "IGLJ3", "基因处理错误"
    
    print("基因名称处理函数测试通过\n")

def test_search_scenarios():
    """测试不同的搜索场景"""
    database_dir = r"../database"
    output_dir = "./test_output"
    os.makedirs(output_dir, exist_ok=True)
    
    # 场景1：仅搜索重链CDR3
    print("测试场景1：仅搜索重链CDR3...")
    result1 = search_paired_chains(
        database_dir=database_dir,
        cdr3_aa_heavy="VHRSSLWYGGAYNWFDP",
        threshold=2,
        distance_method="edit",
        v_call_heavy="IGHV2-5*02",
        j_call_heavy="IGHJ5*02",
        output_file=os.path.join(output_dir, "result_heavy_chain.csv")
    )
    print(f"场景1找到 {len(result1)} 条匹配结果")
    
    # 场景2：仅搜索轻链CDR3
    print("\n测试场景2：仅搜索轻链CDR3...")
    try:
        result2 = search_paired_chains(
            database_dir=database_dir,
            cdr3_aa_light="GTWHSSLSAWV",
            threshold=1,
            distance_method="hamming",
            v_call_light="IGLV1-51*01",
            j_call_light="IGLJ3*02",
            output_file=os.path.join(output_dir, "result_light_chain.csv")
        )
        print(f"场景2找到 {len(result2)} 条匹配结果")
    except Exception as e:
        print(f"场景2测试失败: {str(e)}")
    
    # 场景3：使用通用CDR3搜索（同时匹配重链和轻链）
    print("\n测试场景3：使用通用CDR3搜索...")
    try:
        result3 = search_paired_chains(
            database_dir=database_dir,
            cdr3_aa="VHRSSLWYGGAYNWFDP",
            threshold=2,
            distance_method="edit",
            output_file=os.path.join(output_dir, "result_general.csv")
        )
        print(f"场景3找到 {len(result3)} 条匹配结果")
    except Exception as e:
        print(f"场景3测试失败: {str(e)}")
    
    # 场景4：同时使用基因和CDR3搜索
    print("\n测试场景4：同时使用基因和CDR3搜索...")
    try:
        result4 = search_paired_chains(
            database_dir=database_dir,
            cdr3_aa_heavy="VHRSSLWYGGAYNWFDP",
            threshold=1,
            distance_method="edit",
            v_call_heavy="IGHV2-5*02",
            j_call_heavy="IGHJ5*02",
            v_call_light="IGLV1-51*01",
            j_call_light="IGLJ3*02",
            output_file=os.path.join(output_dir, "result_combined.csv")
        )
        print(f"场景4找到 {len(result4)} 条匹配结果")
    except Exception as e:
        print(f"场景4测试失败: {str(e)}")
    
    # 场景5：测试错误情况（同时提供多个CDR3参数）
    print("\n测试场景5：测试错误情况处理...")
    try:
        search_paired_chains(
            database_dir=database_dir,
            cdr3_aa="VHRSSLWYGGAYNWFDP",
            cdr3_aa_heavy="VHRSSLWYGGAYNWFDP",
            threshold=1,
            distance_method="edit"
        )
        print("场景5测试失败：未捕获到参数冲突错误")
    except ValueError as e:
        print(f"场景5测试通过：正确捕获到错误 - {str(e)}")
    except Exception as e:
        print(f"场景5测试失败：捕获到意外错误 - {str(e)}")

def main():
    # 运行单元测试
    test_distance_functions()
    test_gene_processing()
    
    # 运行搜索场景测试
    test_search_scenarios()
    
    print("\n所有测试完成")

if __name__ == "__main__":
    main()


测试距离计算函数...
距离计算函数测试通过

测试基因名称处理函数...
基因名称处理函数测试通过

测试场景1：仅搜索重链CDR3...
处理文件: ../database\1279058_1_Paired_All.csv
当前数据块列名: ['sequence_id_heavy', 'sequence_heavy', 'locus_heavy', 'stop_codon_heavy', 'vj_in_frame_heavy', 'v_frameshift_heavy', 'productive_heavy', 'rev_comp_heavy', 'complete_vdj_heavy', 'v_call_heavy', 'd_call_heavy', 'j_call_heavy', 'sequence_alignment_heavy', 'germline_alignment_heavy', 'sequence_alignment_aa_heavy', 'germline_alignment_aa_heavy', 'v_alignment_start_heavy', 'v_alignment_end_heavy', 'd_alignment_start_heavy', 'd_alignment_end_heavy', 'j_alignment_start_heavy', 'j_alignment_end_heavy', 'v_sequence_alignment_heavy', 'v_sequence_alignment_aa_heavy', 'v_germline_alignment_heavy', 'v_germline_alignment_aa_heavy', 'd_sequence_alignment_heavy', 'd_sequence_alignment_aa_heavy', 'd_germline_alignment_heavy', 'd_germline_alignment_aa_heavy', 'j_sequence_alignment_heavy', 'j_sequence_alignment_aa_heavy', 'j_germline_alignment_heavy', 'j_germline_alignment_aa_heavy',