In [6]:
import math
import numpy as np
from Bio.PDB import PDBParser
from Bio.PDB.DSSP import dssp_dict_from_pdb_file

1. Lire la structure PDB et extraire les atomes de la chaîne principale (N, CA, C, O)

In [8]:
class ReadPdbStructure:
    """Represent and extract the main chain atoms (N, CA, C, O) in the PDB structure."""

    def __init__(self, pdb_filename):
        self.pdb_filename = pdb_filename
        self.structure = self._read_pdb_structure()
        self.main_chain_atoms = self._get_main_chain_atoms()

    def _read_pdb_structure(self):
        """Lire le fichier PDB et renvoyer la structure"""
        parser = PDBParser(QUIET=True)
        structure = parser.get_structure("protein", self.pdb_filename)
        return structure
    
    def _get_main_chain_atoms(self):

        main_chain_atoms = []
        for model in self.structure:
            for chain in model:
                for residue in chain:
                    if residue.has_id('N') and residue.has_id('CA') and residue.has_id('C') and residue.has_id('O'):
                        main_chain_atoms.append({
                            'residue': residue.get_resname(),  
                            'res_id': residue.get_id()[1],    
                            'chain': chain.get_id(),          
                            'N': residue['N'].get_coord(),    
                            'CA': residue['CA'].get_coord(),  
                            'C': residue['C'].get_coord(),    
                            'O': residue['O'].get_coord()     
                        })
        return main_chain_atoms
    
    def get_main_chain_atoms(self):
        """Obtenir des informations atomiques sur la chaîne principale"""
        return self.main_chain_atoms
        

2. Calculer des hydrogènes 

In [9]:
class HydrogenBondDetector:
    """Identifier les liaisons hydrogène en fonction de l'énergie"""

    def __init__(self, main_chain_atoms):
        self.main_chain_atoms = main_chain_atoms
    
    def calculate_hydrogen_bonds(self, energy_threshold = -0.5):
        hbonds = []
        num_residues = len(self.main_chain_atoms)
        
        for i in range(num_residues):  # Iterate over donor residues
            donor_residue = self.main_chain_atoms[i]
            for j in range(i + 1, num_residues):  # Iterate over acceptor residues
                acceptor_residue = self.main_chain_atoms[j]
                
                # Calculate hydrogen bond energy
                energy = self._calculate_hydrogen_bond_energy(donor_residue, acceptor_residue)
                
                # Only consider valid hydrogen bonds (energy < threshold)
                if energy < energy_threshold:
                    hbonds.append({
                        'donor': donor_residue,  
                        'acceptor': acceptor_residue,  
                        'energy': energy
                    })

        return hbonds


    def _calculate_hydrogen_bond_energy(self, donor_residue, acceptor_residue):
        """Calculer l'énergie d'une liaison hydrogène à partir de l'équation énergétique"""
        # Ensure that necessary atoms are present in both residues
        if 'O' not in acceptor_residue or 'N' not in donor_residue or 'C' not in donor_residue:
            return float('inf')
        
        # Get atomic coordinates
        O = acceptor_residue['O']
        N = donor_residue['N']
        C = donor_residue['C']
        
        # Calculate distances between atoms
        r_ON = np.linalg.norm(O - N)
        r_CN = np.linalg.norm(C - N)
        
        # Simplified energy calculation (hydrogen atoms are not used)
        energy = 0.084 * (1/r_ON - 1/r_CN) * 332
        
        return energy


3. Attribution des structures secondaires

In [14]:
class SecondaryStructureAssigner:
    """Secondary Structure Assigner, detecting α-Helix and β-Folding based on hydrogen bonds"""

    def __init__(self, main_chain_atoms, hbonds):
        self.hbonds = hbonds
        self.main_chain_atoms = main_chain_atoms

    def assign_secondary_structure(self):
        helix = set()
        beta = set()
        beta_parallel = set()
        beta_antiparallel = set()

        for bond in self.hbonds:
            donor = bond['donor']
            acceptor = bond['acceptor']

            # Directly use 'res_id' from the donor and acceptor dictionaries
            donor_residue_id = donor['res_id']
            acceptor_residue_id = acceptor['res_id']

            # 调试信息：打印 donor 和 acceptor 的残基编号
            print(f"Processing bond: donor_residue_id={donor_residue_id}, acceptor_residue_id={acceptor_residue_id}, diff={abs(donor_residue_id - acceptor_residue_id)}")

            # Assign secondary structure
            if abs(donor_residue_id - acceptor_residue_id) == 4:
                print(f"Assigning helix: {donor_residue_id} <-> {acceptor_residue_id}")
                helix.add(donor_residue_id)
                helix.add(acceptor_residue_id)
            elif abs(donor_residue_id - acceptor_residue_id) > 4:
                beta.add(donor_residue_id)
                beta.add(acceptor_residue_id)
            """if self._is_parallel(donor, acceptor):
                    print(f"Assigning parallel beta: {donor_residue_id} <-> {acceptor_residue_id}")
                    beta_parallel.add(donor_residue_id)
                    beta_parallel.add(acceptor_residue_id)
                else:
                    print(f"Assigning antiparallel beta: {donor_residue_id} <-> {acceptor_residue_id}")
                    beta_antiparallel.add(donor_residue_id)
                    beta_antiparallel.add(acceptor_residue_id)"""

        # Initialize all residues with 'C' for coil
        secondary_structure = ['C'] * len(self.main_chain_atoms)
        for index in helix:
            secondary_structure[index] = 'H'
        for index in beta:
            secondary_structure[index] = "E"

        """ for index in beta_parallel:
            secondary_structure[index] = "PE"
        for index in beta_antiparallel:
            secondary_structure[index] = "AE" """
            
        return secondary_structure

    def _is_parallel(self, donor, acceptor):    
        """Check if the two residues are in parallel beta-sheets"""
        donor_chain = donor['chain']
        acceptor_chain = acceptor['chain']
        return donor_chain == acceptor_chain


4. Classe pour comparer la structure secondaire prédite avec DSSP 

In [11]:
class Compare:
    """Class for comparing custom secondary structure assignments with DSSP"""
    
    def __init__(self, pdb_filename, predicted_structure):
        self.pdb_filename = pdb_filename
        self.predicted_structure = predicted_structure
        self.true_structure = self._dssp_structure()

    def _dssp_structure(self):
        """Extracts the DSSP-assigned secondary structure from the PDB file.
        
        Returns:
            list: A list of DSSP-assigned secondary structure ('H' for helix, 'E' for beta sheet, 'C' for coil).
        """
        
        dssp = dssp_dict_from_pdb_file(self.pdb_filename)

        dssp_structure = []
        for key in dssp[0].keys():
            dssp_code = dssp[0][key][2]
            if dssp_code in ('H', 'G', 'I'):  # Helices in DSSP
                dssp_structure.append('H')
            elif dssp_code in ('E', 'B'):  # Beta sheets in DSSP
                dssp_structure.append('E')
            else:
                dssp_structure.append('C')  # Coils
        return dssp_structure

    def compare(self):
        """Compares predicted secondary structure with DSSP and calculates accuracy and sensitivity.
        
        Returns:
            tuple: Accuracy, helix sensitivity, and beta sheet sensitivity.
        """
        print(f"Predicted structure: {self.predicted_structure}")
        print(f"True structure (DSSP): {self.true_structure}")

        # Calculate accuracy (true positives / total residues)
        tp = sum([1 for i in range(len(self.predicted_structure)) if self.predicted_structure[i] == self.true_structure[i]])
        accuracy = tp / len(self.predicted_structure)

        # Calculate helix sensitivity (correctly identified helices / total helices in DSSP)
        total_helices_in_dssp = sum([1 for i in range(len(self.true_structure)) if self.true_structure[i] == 'H'])
        if total_helices_in_dssp == 0:
            helix_sensitivity = 0  # 如果没有螺旋，灵敏度直接为 0
        else:
            helix_sensitivity = sum([1 for i in range(len(self.predicted_structure)) if self.predicted_structure[i] == 'H' and self.true_structure[i] == 'H']) / total_helices_in_dssp
        
        # Calculate beta sheet sensitivity (correctly identified beta sheets / total beta sheets in DSSP)
        total_beta_in_dssp = sum([1 for i in range(len(self.true_structure)) if self.true_structure[i] == 'E'])
        if total_beta_in_dssp == 0:
            beta_sensitivity = 0  # 如果没有 β 折叠，灵敏度直接为 0
        else:
            beta_sensitivity = sum([1 for i in range(len(self.predicted_structure)) if self.predicted_structure[i] == 'E' and self.true_structure[i] == 'E']) / total_beta_in_dssp

        print(f"Helix Sensitivity: {helix_sensitivity}")
        print(f"Beta Sensitivity: {beta_sensitivity}")

        return accuracy, helix_sensitivity, beta_sensitivity


Main code

In [12]:
# Main program to run the secondary structure assignment and evaluation
def main(pdb_filename):
    """Main function to run the secondary structure prediction and comparison with DSSP.
    
    Args:
        pdb_filename (str): The path to the PDB file.
    """
    # 1. Parse the PDB structure and extract main chain atoms
    pdb_structure = ReadPdbStructure(pdb_filename)
    main_chain_atoms = pdb_structure.get_main_chain_atoms()

    # Debug: print the number of atoms extracted
    print(f"Number of main chain atoms extracted: {len(main_chain_atoms)}")

    # 2. Detect hydrogen bonds
    hb_detector = HydrogenBondDetector(main_chain_atoms)
    hbonds = hb_detector.calculate_hydrogen_bonds()

    # Debug: print the number of hydrogen bonds detected
    print(f"Number of hydrogen bonds detected: {len(hbonds)}")

    # 3. Assign secondary structures based on hydrogen bonds
    # Correct parameter order: main_chain_atoms first, then hbonds
    ss_assigner = SecondaryStructureAssigner(main_chain_atoms, hbonds)
    predicted_structure = ss_assigner.assign_secondary_structure()

    # 4. Compare with DSSP results and calculate accuracy and sensitivity
    evaluator = Compare(pdb_filename, predicted_structure)
    accuracy, helix_sensitivity, beta_sensitivity = evaluator.compare()

    # Print the results
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Helix Sensitivity: {helix_sensitivity:.2f}")
    print(f"Beta Sensitivity: {beta_sensitivity:.2f}")


In [15]:
# Ensure hb_detector is properly instantiated

# 1. Parse the PDB structure and extract main chain atoms
pdb_structure = ReadPdbStructure("3zxe.pdb")  # Assuming pdb_filename is defined
main_chain_atoms = pdb_structure.get_main_chain_atoms()

# 2. Detect hydrogen bonds
hb_detector = HydrogenBondDetector(main_chain_atoms)  # Instantiate hb_detector here
hbonds = hb_detector.calculate_hydrogen_bonds()  # Now you can call calculate_hydrogen_bonds()

ss_assigner = SecondaryStructureAssigner(main_chain_atoms, hbonds)
predicted_structure = ss_assigner.assign_secondary_structure()



Processing bond: donor_residue_id=4, acceptor_residue_id=5, diff=1
Processing bond: donor_residue_id=4, acceptor_residue_id=6, diff=2
Processing bond: donor_residue_id=4, acceptor_residue_id=7, diff=3
Processing bond: donor_residue_id=4, acceptor_residue_id=8, diff=4
Assigning helix: 4 <-> 8
Processing bond: donor_residue_id=4, acceptor_residue_id=9, diff=5
Processing bond: donor_residue_id=4, acceptor_residue_id=10, diff=6
Processing bond: donor_residue_id=4, acceptor_residue_id=11, diff=7
Processing bond: donor_residue_id=4, acceptor_residue_id=12, diff=8
Processing bond: donor_residue_id=4, acceptor_residue_id=13, diff=9
Processing bond: donor_residue_id=4, acceptor_residue_id=14, diff=10
Processing bond: donor_residue_id=4, acceptor_residue_id=15, diff=11
Processing bond: donor_residue_id=4, acceptor_residue_id=16, diff=12
Processing bond: donor_residue_id=4, acceptor_residue_id=17, diff=13
Processing bond: donor_residue_id=4, acceptor_residue_id=18, diff=14
Processing bond: donor_

TypeError: 'set' object is not subscriptable

In [21]:
if __name__ == "__main__":
    pdb_filename = "3zxe.pdb"  # Replace with your actual PDB file path
    main(pdb_filename)

Number of main chain atoms extracted: 265
Number of hydrogen bonds detected: 34980
Processing bond: donor_residue_id=4, acceptor_residue_id=5, diff=1
Processing bond: donor_residue_id=4, acceptor_residue_id=6, diff=2
Processing bond: donor_residue_id=4, acceptor_residue_id=7, diff=3
Processing bond: donor_residue_id=4, acceptor_residue_id=8, diff=4
Assigning helix: 4 <-> 8
Processing bond: donor_residue_id=4, acceptor_residue_id=9, diff=5
Assigning parallel beta: 4 <-> 9
Processing bond: donor_residue_id=4, acceptor_residue_id=10, diff=6
Assigning parallel beta: 4 <-> 10
Processing bond: donor_residue_id=4, acceptor_residue_id=11, diff=7
Assigning parallel beta: 4 <-> 11
Processing bond: donor_residue_id=4, acceptor_residue_id=12, diff=8
Assigning parallel beta: 4 <-> 12
Processing bond: donor_residue_id=4, acceptor_residue_id=13, diff=9
Assigning parallel beta: 4 <-> 13
Processing bond: donor_residue_id=4, acceptor_residue_id=14, diff=10
Assigning parallel beta: 4 <-> 14
Processing bo