In [4]:
from Bio.PDB import PDBParser
from Bio.SeqUtils import seq1
import subprocess
import os
from scipy.spatial.distance import squareform

def calculate_tm_score(pdb_file1, pdb_file2):
    try:
        # 解析PDB文件
        parser = PDBParser(QUIET=True)
        structure1 = parser.get_structure("pdb1", pdb_file1)
        structure2 = parser.get_structure("pdb2", pdb_file2)

        #提取序列
        seq_str1 = ""
        for model in structure1:
            for chain in model:
                seq_str1 += ''.join([seq1(residue.get_resname()) for residue in chain.get_residues() if residue.get_id()[0] == ' '])

        seq_str2 = ""
        for model in structure2:
            for chain in model:
                seq_str2 += ''.join([seq1(residue.get_resname()) for residue in chain.get_residues() if residue.get_id()[0] == ' '])

        # 将序列写入临时文件
        with open('temp_seq1.fasta', 'w') as f:
            f.write(">seq1\n" + seq_str1 + "\n")
        with open('temp_seq2.fasta', 'w') as f:
            f.write(">seq2\n" + seq_str2 + "\n")    

        # 使用TM-align运行TM-score计算
        result = subprocess.run(["TMscore", pdb_file1, pdb_file2], capture_output=True, text=True)
        tm_score = float(result.stdout.split("\n")[14].split()[-1])  # Extract TM-score
        
        return tm_score
    except Exception as e:
        print(f"Error calculating TM-score for files {pdb_file1} and {pdb_file2}: {e}")
        return None

In [5]:
# 构建 PDB 文件路径列表
pdb_directory = "/data/home/liuzw/NR/pdb/structural"
pdb_files = [os.path.join(pdb_directory, file) for file in os.listdir(pdb_directory) if file.endswith(".pdb")]

# 计算 TM-scores 并生成距离矩阵
distance_matrix = []
for i, pdb_file1 in enumerate(pdb_files):
    row = []
    for j, pdb_file2 in enumerate(pdb_files):
        if i != j:
            tm_score = calculate_tm_score(pdb_file1, pdb_file2)
            if tm_score is not None:
                row.append(tm_score)
            else:
                row.append(0)  # If an error occurs, set TM-score to 0
        else:
            row.append(0)  # TM-score between identical structures is 1, but we use 0 for simplicity
    distance_matrix.append(row)

# 将结果写入文件
output_file_path = "distance_matrix.txt"
with open(output_file_path, "w") as output_file:
    for row in distance_matrix:
        output_file.write(" ".join(map(str, row)) + "\n")

# 将对称矩阵转换为压缩的距离矩阵
compressed_distance_matrix = squareform(distance_matrix)


print(f"结果已保存到 {output_file_path}")


结果已保存到 distance_matrix.txt
