In [14]:
from Bio.PDB import PDBParser, NeighborSearch, Vector
from Bio.PDB.PDBExceptions import PDBConstructionWarning
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
from pathlib import Path
import seaborn as sns
from typing import List, Dict, Optional, Tuple

# Configure warnings
warnings.filterwarnings('ignore', category=PDBConstructionWarning)

class HydrogenBondAnalyzer:
    """Analyze hydrogen bonds in protein structures."""
    
    def __init__(self, 
                 pdb_path: str,
                 distance_cutoff: float = 3.5,
                 angle_cutoff: float = 120.0,
                 donor_elements: tuple = ('N', 'O'),
                 acceptor_elements: tuple = ('N', 'O'),
                 h_bond_distance: float = 1.5):
        """
        Initialize the analyzer with customizable parameters.
        
        Args:
            pdb_path: Path to PDB file
            distance_cutoff: Maximum distance for H-bond detection (Å)
            angle_cutoff: Minimum angle for H-bond detection (degrees)
            donor_elements: Elements that can act as donors
            acceptor_elements: Elements that can act as acceptors
            h_bond_distance: Maximum distance between donor and H atom
        """
        self.pdb_path = Path(pdb_path)
        self.distance_cutoff = distance_cutoff
        self.angle_cutoff = angle_cutoff
        self.donor_elements = donor_elements
        self.acceptor_elements = acceptor_elements
        self.h_bond_distance = h_bond_distance
        
        # Initialize structure
        parser = PDBParser(QUIET=True)
        self.structure = parser.get_structure("structure", self.pdb_path)
        
    def is_donor(self, atom) -> bool:
        """Check if an atom can act as a hydrogen bond donor."""
        if atom.element in self.donor_elements:
            parent_residue = atom.get_parent()
            for other_atom in parent_residue:
                if (other_atom.element == 'H' and 
                    atom - other_atom < self.h_bond_distance):
                    return True
        return False
    
    def is_acceptor(self, atom) -> bool:
        """Check if an atom can act as a hydrogen bond acceptor."""
        return (atom.element in self.acceptor_elements and 
                atom.get_name() not in ['NH1', 'NH2'])
    
    def get_bonded_hydrogen(self, donor_atom) -> Optional[object]:
        """Find hydrogen atom bonded to donor atom."""
        parent_residue = donor_atom.get_parent()
        return next((atom for atom in parent_residue 
                    if atom.element == 'H' and 
                    donor_atom - atom < self.h_bond_distance), None)
    
    def calculate_angle(self, atom1, atom2, atom3) -> float:
        """Calculate angle between three atoms."""
        vector1 = Vector(atom1.get_coord() - atom2.get_coord())
        vector2 = Vector(atom3.get_coord() - atom2.get_coord())
        return np.degrees(vector1.angle(vector2))
    
    def get_residue(self, residue_number: int, chain_id: str) -> object:
        """Get specific residue from structure."""
        for model in self.structure:
            for chain in model:
                if chain.id == chain_id:
                    for residue in chain:
                        if residue.id[1] == residue_number:
                            return residue
        raise ValueError(f"Residue {residue_number} in chain {chain_id} not found.")
    
    def analyze_hbonds(self, residue_number: int, chain_id: str) -> pd.DataFrame:
        """
        Analyze hydrogen bonds for specific residue.
        
        Returns:
            DataFrame with hydrogen bond information
        """
        target_residue = self.get_residue(residue_number, chain_id)
        
        # Setup neighbor search
        atoms = [atom for atom in self.structure.get_atoms() 
                if atom.element in self.donor_elements + ('H',)]
        ns = NeighborSearch(atoms)
        
        hbond_data = []
        
        # Analyze donor and acceptor capabilities
        for atom in target_residue:
            if self.is_donor(atom):
                self._analyze_donor(atom, ns, hbond_data)
            elif self.is_acceptor(atom):
                self._analyze_acceptor(atom, ns, hbond_data)
        
        return pd.DataFrame(hbond_data)
    
    def _analyze_donor(self, atom, ns, hbond_data: List[Dict]):
        """Analyze atom as potential donor."""
        h_atom = self.get_bonded_hydrogen(atom)
        if h_atom:
            for close_atom in ns.search(atom.coord, self.distance_cutoff):
                if (close_atom.get_parent() != atom.get_parent() and 
                    self.is_acceptor(close_atom)):
                    angle = self.calculate_angle(h_atom, atom, close_atom)
                    if angle >= self.angle_cutoff:
                        self._add_hbond_data(hbond_data, atom, h_atom, 
                                           close_atom, angle, "Residue as donor")
    
    def _analyze_acceptor(self, atom, ns, hbond_data: List[Dict]):
        """Analyze atom as potential acceptor."""
        for close_atom in ns.search(atom.coord, self.distance_cutoff):
            if (close_atom.get_parent() != atom.get_parent() and 
                self.is_donor(close_atom)):
                h_atom = self.get_bonded_hydrogen(close_atom)
                if h_atom:
                    angle = self.calculate_angle(h_atom, close_atom, atom)
                    if angle >= self.angle_cutoff:
                        self._add_hbond_data(hbond_data, close_atom, h_atom, 
                                           atom, angle, "Residue as acceptor")
    
    def _add_hbond_data(self, hbond_data: List[Dict], donor, h_atom, 
                        acceptor, angle: float, bond_type: str):
        """Add hydrogen bond information to data list."""
        hbond_data.append({
            "Donor": f"{donor.get_parent().get_resname()}{donor.get_parent().id[1]}:{donor.get_name()}",
            "Hydrogen": f"{h_atom.get_parent().get_resname()}{h_atom.get_parent().id[1]}:{h_atom.get_name()}",
            "Acceptor": f"{acceptor.get_parent().get_resname()}{acceptor.get_parent().id[1]}:{acceptor.get_name()}",
            "Distance (Å)": donor - acceptor,
            "Angle (degrees)": angle,
            "Type": bond_type
        })
    
    def plot_hbonds(self, df_hbonds: pd.DataFrame, 
                    residue_number: int, chain_id: str,
                    output_path: Optional[str] = None) -> None:
        """
        Plot hydrogen bond analysis results with detailed donor/acceptor labels.
        
        Args:
            df_hbonds: DataFrame with H-bond data
            residue_number: Residue number analyzed
            chain_id: Chain ID analyzed
            output_path: Optional path to save plot
        """
        # Set style
        plt.style.use('seaborn')
        fig = plt.figure(figsize=(15, 10))  # Increased figure height for labels
        
        # Create custom x-axis labels with donor/acceptor information
        def create_label(row):
            if row['Type'] == 'Residue as donor':
                return f"Donor: {row['Donor']}\n→\nAcceptor: {row['Acceptor']}"
            else:
                return f"Acceptor: {row['Acceptor']}\n←\nDonor: {row['Donor']}"

        x_labels = [create_label(row) for _, row in df_hbonds.iterrows()]
        
        # Plot distances
        plt.subplot(1, 2, 1)
        bars1 = sns.barplot(data=df_hbonds, x=range(len(df_hbonds)), 
                          y="Distance (Å)", color='skyblue')
        plt.xlabel("")  # Remove x-label as it's redundant with the detailed labels
        plt.ylabel("Distance (Å)")
        plt.title("H-bond Distances")
        
        # Add value labels
        for i, v in enumerate(df_hbonds["Distance (Å)"]):
            plt.text(i, v, f'{v:.2f}Å', ha='center', va='bottom')
        
        # Add type indicators with colors
        for i, row in df_hbonds.iterrows():
            color = 'green' if row['Type'] == 'Residue as donor' else 'purple'
            plt.text(i, -0.15, row['Type'], ha='center', va='top',
                    color=color, fontsize=8, rotation=45)
        
        plt.xticks(range(len(df_hbonds)), x_labels,
                  rotation=45, ha='right', va='top')
        
        # Plot angles
        plt.subplot(1, 2, 2)
        bars2 = sns.barplot(data=df_hbonds, x=range(len(df_hbonds)), 
                          y="Angle (degrees)", color='lightgreen')
        plt.xlabel("")  # Remove x-label as it's redundant with the detailed labels
        plt.ylabel("Angle (degrees)")
        plt.title("H-bond Angles (D-H···A)")
        
        # Add value labels
        for i, v in enumerate(df_hbonds["Angle (degrees)"]):
            plt.text(i, v, f'{v:.1f}°', ha='center', va='bottom')
        
        # Add type indicators with colors
        for i, row in df_hbonds.iterrows():
            color = 'green' if row['Type'] == 'Residue as donor' else 'purple'
            plt.text(i, -5, row['Type'], ha='center', va='top',
                    color=color, fontsize=8, rotation=45)
        
        plt.xticks(range(len(df_hbonds)), x_labels,
                  rotation=45, ha='right', va='top')
        
        # Add legend for bond types
        legend_elements = [
            plt.Line2D([0], [0], color='green', lw=0, marker='s',
                      label='Residue as donor', markersize=10),
            plt.Line2D([0], [0], color='purple', lw=0, marker='s',
                      label='Residue as acceptor', markersize=10)
        ]
        plt.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1),
                  loc='upper left')
        
        plt.suptitle(
            f"Hydrogen Bonds Analysis for Residue {residue_number} Chain {chain_id}\n"
            f"Total H-bonds: {len(df_hbonds)} "
            f"(Donor: {sum(df_hbonds['Type'] == 'Residue as donor')}, "
            f"Acceptor: {sum(df_hbonds['Type'] == 'Residue as acceptor')})",
            y=1.05
        )
        
        # Adjust layout
        plt.tight_layout()
        
        if output_path:
            plt.savefig(output_path, dpi=300, bbox_inches='tight')
            plt.close()

def main():
    # Configuration
    pdb_path = "/home/hp/nayanika/github/GPX6/prep_structures/original_humansec.pdb"
    residue_number = 48
    chain_id = 'X'
    output_dir = Path("/home/hp/nayanika/github/GPX6")
    
    # Initialize analyzer
    analyzer = HydrogenBondAnalyzer(pdb_path)
    
    try:
        # Analyze H-bonds
        df_hbonds = analyzer.analyze_hbonds(residue_number, chain_id)
        
        # Save results
        csv_path = output_dir / "table" / "hbond_data.csv"
        plot_path = output_dir / "figures" / "hbond_analysis.png"
        
        # Create directories if they don't exist
        csv_path.parent.mkdir(parents=True, exist_ok=True)
        plot_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Save data and create plot
        df_hbonds.to_csv(csv_path, index=False)
        analyzer.plot_hbonds(df_hbonds, residue_number, chain_id, plot_path)
        
        # Print summary
        print("\nHydrogen Bonds Summary:")
        print(df_hbonds.to_string(index=False))
        print(f"\nResults saved to:")
        print(f"CSV: {csv_path}")
        print(f"Plot: {plot_path}")
        
    except Exception as e:
        print(f"Error during analysis: {str(e)}")

if __name__ == "__main__":
    main()

  plt.style.use('seaborn')



Hydrogen Bonds Summary:
  Donor  Hydrogen Acceptor  Distance (Å)  Angle (degrees)                Type
TYR48:N TYR48:H05  ALA47:O      2.221613       146.669007    Residue as donor
SEC49:N SEC49:H10  TYR48:O      2.208623       140.940814 Residue as acceptor

Results saved to:
CSV: /home/hp/nayanika/github/GPX6/table/hbond_data.csv
Plot: /home/hp/nayanika/github/GPX6/figures/hbond_analysis.png
