In [1]:
pdb_path=r'D:\PythonProj\Auto-EC\pdb_files\1FT5.pdb'

In [None]:
import os
import numpy as np
from Bio.PDB import PDBParser, PDBIO, Select
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.Draw import rdMolDraw2D
from scipy.spatial.distance import cdist
import tempfile
import py3Dmol
from IPython.display import display, HTML
import pandas as pd
def save_selected_pdb(structure, ligand_info, interacting_residues, output_pdb):
    class LigandAndInteractingResiduesSelect(Select):
        def accept_residue(self, residue):
            if (residue.resname == ligand_info['name'] and residue.id == ligand_info['id'] and residue.parent.id == ligand_info['chain']):
                return True
            for res in interacting_residues:
                if residue.resname == res['residue'] and residue.id[1] == res['id'] and residue.parent.id == res['chain']:
                    return True
            return False
    
    io = PDBIO()
    io.set_structure(structure)
    io.save(output_pdb, LigandAndInteractingResiduesSelect())
    
def extract_ligand_mol( ligand_info):
    """
    Convert the ligand atoms from PDB into an RDKit Mol object.
    
    Args:
        ligand_info: Extracted ligand information
    
    Returns:
        mol: RDKit Mol object of the ligand
    """
    atoms = ligand_info['atoms_list']
   
    mol = Chem.RWMol()
    atom_map = {}

    # Add atoms to the RDKit molecule
    for idx, atom_name in enumerate(atoms):
        element = atom_name[0]  # Approximate element from atom name (could be improved)
        try:
            atomic_num = Chem.PeriodicTable.GetAtomicNumber(Chem.GetPeriodicTable(), element)
            atom = Chem.Atom(atomic_num)
            atom_idx = mol.AddAtom(atom)
            atom_map[idx] = atom_idx
        except:
            continue  # Skip unknown atoms
    
    # Add bonds if information is available from PDB (or simple defaults)
    for i in range(len(atoms) - 1):
        mol.AddBond(atom_map[i], atom_map[i+1], Chem.BondType.SINGLE)
    
    # Generate 2D coordinates for ligand
    AllChem.Compute2DCoords(mol)
    
    return mol
 
# Load the protein structure
def get_ligand_and_interactions(pdb_path, cutoff=4.0):
    """
    Identify the first ligand in PDB file and residues interacting with it.
    
    Args:
        pdb_path: Path to PDB file
        cutoff: Distance cutoff for interactions in Angstroms
        
    Returns:
        ligand_info: Dictionary with ligand information
        interacting_residues: List of residues interacting with ligand
    """
    # Parse PDB file
    parser = PDBParser(QUIET=True)
    structure_id = os.path.basename(pdb_path).split('.')[0]
    structure = parser.get_structure(structure_id, pdb_path)
    model = structure[0]
    
    # Find the first ligand (hetero residue that's not water)
    ligand = None
    for chain in model:
        for residue in chain:
            if residue.id[0] != ' ' and residue.resname != 'HOH' and residue.resname != 'PO4':
                ligand = residue
                break
        if ligand:
            break
    
    if not ligand:
        return None, []
    
    # Get ligand info
    ligand_info = {
        'chain': ligand.parent.id,
        'id': ligand.id,
        'name': ligand.resname,
        'atoms': len(ligand),
        'atoms_list': [atom.name for atom in ligand],
        'coordinates': np.array([atom.coord for atom in ligand])
    }
    
    # Find all residues within cutoff distance
    interacting_residues = []
    
    for chain in model:
        for residue in chain:
            # Skip the ligand itself and non-standard residues (except the current ligand)
            if residue == ligand or (residue.id[0] != ' ' and residue != ligand):
                continue
            
            # Get coordinates for all atoms in this residue
            res_coords = np.array([atom.coord for atom in residue])
            
            # Calculate minimum distance between any atom in residue and any atom in ligand
            distances = cdist(res_coords, ligand_info['coordinates'])
            min_distance = np.min(distances)
            
            # If within cutoff, add to interacting residues
            if min_distance <= cutoff:
                # Get indices of atom pairs with minimum distance
                min_indices = np.where(distances <= cutoff)
                
                # Collect interaction information
                interaction_info = []
                for res_idx, lig_idx in zip(min_indices[0], min_indices[1]):
                    res_atom = list(residue.get_atoms())[res_idx]
                    lig_atom = list(ligand.get_atoms())[lig_idx]
                    dist = np.linalg.norm(res_atom.coord - lig_atom.coord)
                    
                    # Determine type of interaction
                    interaction_type = categorize_interaction( res_atom, lig_atom, dist)
                    
                    interaction_info.append({
                        'res_atom': res_atom.name,
                        'lig_atom': lig_atom.name,
                        'distance': dist,
                        'type': interaction_type
                    })
                
                interacting_residues.append({
                    'chain': chain.id,
                    'residue': residue.resname,
                    'id': residue.id[1],
                    'interactions': interaction_info
                })
    
    return structure,ligand_info, interacting_residues

def categorize_interaction(res_atom, lig_atom, distance):
    """Simple heuristic to categorize the type of interaction"""
    # Potential hydrogen bond acceptor/donor atoms
    hbond_atoms = ['O', 'N', 'F']
    
    # Check for potential hydrogen bonds
    if (res_atom.element in hbond_atoms and lig_atom.element in hbond_atoms) and distance <= 3.5:
        return "H-Bond"
    
    # Check for potential ionic interactions 
    if (res_atom.element in ['O', 'N'] and lig_atom.element in ['O', 'N']) and distance <= 4.0:
        return "Ionic"
    
    # Check for potential π-π interactions (better aromatic detection)
    aromatic_residues = ['PHE', 'TYR', 'TRP', 'HIS']
    # Check if residue is aromatic
    if res_atom.element == 'C' and lig_atom.element == 'C' and distance <= 4.0:
        # Check if residue is part of an aromatic amino acid
        if res_atom.parent.resname in aromatic_residues:
            # Check for known carbon atoms in aromatic rings
            if res_atom.name in ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ']:
                # Check if ligand atom could be part of aromatic system (heme has porphyrin ring)
                if ligand_info['name'] == 'HEM' and lig_atom.name[0] == 'C':
                    return "π-π"
    
    # Default to hydrophobic for carbon-carbon
    if res_atom.element == 'C' and lig_atom.element == 'C':
        return "Hydrophobic"
    
    # Default to other
    return "Other"



def get_atom_properties(mol):
    """
    Calculate molecular properties for each atom in the molecule.
    
    Args:
        mol: RDKit Mol object
        
    Returns:
        properties: Dictionary of atom properties
    """
    if mol is None:
        return {}
    
    # Compute Gasteiger charges
    try:
        AllChem.ComputeGasteigerCharges(mol)
    except:
        print("Error: Could not compute Gasteiger charges.")
        pass  # If charges can't be calculated, continue without them
    
    # Calculate properties for each atom
    properties = {}
    for atom in mol.GetAtoms():
        idx = atom.GetIdx()
        properties[idx] = {
            'element': atom.GetSymbol(),
            'formal_charge': atom.GetFormalCharge(),
            'gasteiger_charge': atom.GetDoubleProp('_GasteigerCharge') if atom.HasProp('_GasteigerCharge') else None,
            'hybridization': str(atom.GetHybridization()),
            'is_aromatic': atom.GetIsAromatic(),
            'is_in_ring': atom.IsInRing(),
            'num_h': atom.GetTotalNumHs(includeNeighbors=True),
            'atomic_num': atom.GetAtomicNum(),
            'degree': atom.GetDegree(),
            'implicit_valence': atom.GetImplicitValence(),
            'is_hydrophobic': atom.GetSymbol() == 'C' and atom.GetTotalNumHs(includeNeighbors=True) > 0
        }
    
    return properties

def analyze_interaction_compatibility(ligand_mol, interacting_residues):
    """
    Analyze compatibility of interactions based on atomic properties.
    
    Args:
        ligand_mol: RDKit Mol object of the ligand
        interacting_residues: List of interacting residues
        
    Returns:
        interaction_analysis: Dictionary with interaction compatibility analysis
    """
    # Get ligand properties
    ligand_properties = get_atom_properties(ligand_mol)
    
    # Define expected property ranges for different interaction types
    interaction_ranges = {
        "H-Bond": {
            "distance": (2.5, 3.5),  # Angstroms
            "donor_elements": ["N", "O"],
            "acceptor_elements": ["N", "O", "F", "S"]
        },
        "Ionic": {
            "distance": (2.5, 4.0),
            "charge_product": "negative"  # opposite charges attract
        },
        "π-π": {
            "distance": (3.0, 5.0),
            "aromaticity": True
        },
        "Hydrophobic": {
            "distance": (3.0, 4.5),
            "hydrophobicity": True
        }
    }
    
    # Store analysis results
    interaction_analysis = []
    
    for residue in interacting_residues:
        residue_name = residue['residue']
        residue_id = residue['id']
        
        for interaction in residue['interactions']:
            interaction_type = interaction['type']
            distance = interaction['distance']
            
            # Check if the interaction distance is within expected range
            expected_range = interaction_ranges.get(interaction_type, {}).get("distance", (0, 100))
            is_distance_optimal = expected_range[0] <= distance <= expected_range[1]
            
            # Additional type-specific compatibility checks
            compatibility_notes = []
            if interaction_type == "H-Bond":
                compatibility_notes.append(f"Expected distance: {expected_range[0]}-{expected_range[1]}Å")
                compatibility_notes.append(f"Optimal donors: {interaction_ranges['H-Bond']['donor_elements']}")
                compatibility_notes.append(f"Optimal acceptors: {interaction_ranges['H-Bond']['acceptor_elements']}")
            
            elif interaction_type == "Ionic":
                compatibility_notes.append(f"Expected distance: {expected_range[0]}-{expected_range[1]}Å")
                compatibility_notes.append("Opposite charges attract")
            
            elif interaction_type == "π-π":
                compatibility_notes.append(f"Expected distance: {expected_range[0]}-{expected_range[1]}Å")
                compatibility_notes.append("Both interacting groups should be aromatic")
            
            elif interaction_type == "Hydrophobic":
                compatibility_notes.append(f"Expected distance: {expected_range[0]}-{expected_range[1]}Å")
                compatibility_notes.append("Optimal between hydrophobic groups (e.g., alkyl chains)")
            
            # Add analysis to results
            interaction_analysis.append({
                'residue': f"{residue_name} {residue_id}",
                'interaction_type': interaction_type,
                'distance': distance,
                'is_distance_optimal': is_distance_optimal,
                'notes': compatibility_notes,
                'res_atom': interaction['res_atom'],
                'lig_atom': interaction['lig_atom']
            })
    
    return interaction_analysis

def create_3d_density_map(ligand_info, interaction_analysis):
    """
    Create a 3D density map around the ligand for different interaction properties.
    
    Args:
        ligand_info: Dictionary with ligand information
        interaction_analysis: List of interaction analysis dictionaries
    """
    # Initialize py3Dmol viewer
    viewer = py3Dmol.view(width=800, height=600)
    
    # Get ligand coordinates
    ligand_coords = ligand_info['coordinates']
    ligand_atoms = ligand_info['atoms_list']
    
    # Define color schemes for different interaction types
    color_scheme = {
        "H-Bond": "blue",
        "Ionic": "red",
        "π-π": "purple",
        "Hydrophobic": "green",
        "Other": "gray"
    }
    
    # First, add the ligand model
    viewer.addModel()
    
    # Add atoms to represent the ligand
    for i, atom_coord in enumerate(ligand_coords):
        atom_name = ligand_atoms[i] if i < len(ligand_atoms) else "UNK"
        # Add sphere for each ligand atom
        viewer.addSphere({
            'center': {'x': float(atom_coord[0]), 'y': float( atom_coord[1]), 'z': float( atom_coord[2])},
            'radius': 0.5,
            'color': 'white',
            'alpha': 0.8
        })
    
    # Add spheres for interaction points
    for interaction in interaction_analysis:
        # Find the corresponding ligand atom coordinates
        lig_atom_name = interaction['lig_atom']
        try:
            atom_idx = ligand_atoms.index(lig_atom_name)
            atom_coord = ligand_coords[atom_idx]
            
            # Determine color based on interaction type
            color = color_scheme.get(interaction['interaction_type'], "gray")
            
            # Use distance as radius (scaled for visibility)
            # Smaller distances will create larger spheres to indicate stronger interactions
            radius = max(0.5, 4.5 - interaction['distance'])
            
            # Add a transparent sphere at the interaction site
            viewer.addSphere({
                'center': {'x':float( atom_coord[0]), 'y': float( atom_coord[1]), 'z': float( atom_coord[2])},
                'radius': radius,
                'color': color,
                'alpha': 0.3  # Transparency
            })
        except (ValueError, IndexError):
            continue  # Skip if atom not found
    
    # Create a legend for interaction types
    legend_html = """
    <div style="position: absolute; top: 10px; left: 10px; background-color: rgba(255,255,255,0.7); padding: 10px; border-radius: 5px;">
        <h4>Interaction Types</h4>
        <div><span style="display: inline-block; width: 20px; height: 20px; background-color: blue;"></span> H-Bond</div>
        <div><span style="display: inline-block; width: 20px; height: 20px; background-color: red;"></span> Ionic</div>
        <div><span style="display: inline-block; width: 20px; height: 20px; background-color: purple;"></span> π-π</div>
        <div><span style="display: inline-block; width: 20px; height: 20px; background-color: green;"></span> Hydrophobic</div>
        <div><span style="display: inline-block; width: 20px; height: 20px; background-color: gray;"></span> Other</div>
        <h4>Sphere Size</h4>
        <div>Larger spheres = Stronger interactions (shorter distances)</div>
    </div>
    """
    
    # Set up the view
    viewer.zoomTo()
    
    # Display the viewer with the legend
    display(HTML(legend_html))
    return viewer.show()


def visualize_pdb(pdb_path):
    with open(pdb_path, "r") as f:
        pdb_data = f.read()
    
    viewer = py3Dmol.view(width=800, height=600)
    viewer.addModel(pdb_data, "pdb")
    
    # Show protein in cartoon representation
    viewer.setStyle({"cartoon": {"color": "spectrum"}})
    
    # Show ligand in stick representation
    viewer.setStyle({"hetflag": True}, {"stick": {"colorscheme": "greenCarbon", "radius": 0.2}})
    
    # Customize interacting residues with sticks
    viewer.setStyle({"resn": ["LYS", "CYS", "HIS", "GLY", "PHE", "ARG", "GLN", "LEU", "GLU", "VAL"], 
                     "hetflag": False}, 
                    {"stick": {"colorscheme": "yellowCarbon", "radius": 0.15}})
    
    # Add a transparent surface
    viewer.addSurface(py3Dmol.VDW, {"opacity": 0.3, "color": "white"})
    
    # Add distance measurements for interactions (optional)
    # viewer.addLine({"start": {x, y, z}, "end": {x, y, z}, "color": "black", "dashed": True})
    
    # Final setup
    viewer.zoomTo()
    return viewer.show()
 
# Main execution
structure, ligand_info, interacting_residues = get_ligand_and_interactions(pdb_path)
ligand_mol = extract_ligand_mol(structure, ligand_info)
output_pdb = "selected_ligand_interactions.pdb"
save_selected_pdb(structure, ligand_info, interacting_residues, output_pdb)
# Run the analysis
interaction_analysis = analyze_interaction_compatibility(ligand_mol, interacting_residues)

create_3d_density_map(ligand_info, interaction_analysis)
# Visualize with py3Dmol
visualize_pdb(output_pdb)

In [2]:
import os
import numpy as np
from matplotlib.patches import Rectangle, Circle, Arrow
from Bio.PDB import PDBParser, DSSP
from collections import defaultdict

import matplotlib.pyplot as plt

def load_protein_structure(pdb_path):
    """Load protein structure from PDB file"""
    parser = PDBParser(QUIET=True)
    structure_id = os.path.basename(pdb_path).split('.')[0]
    structure = parser.get_structure(structure_id, pdb_path)
    return structure

def analyze_secondary_structure(structure):
    """Identify secondary structure elements using DSSP"""
    model = structure[0]  # First model
    dssp = DSSP(model, structure.id + ".pdb", dssp='mkdssp')
    
    # Extract secondary structure information
    sec_struct = []
    for chain in model:
        chain_id = chain.id
        residues = []
        
        for res in chain:
            if res.id[0] == ' ':  # Standard residue
                try:
                    key = (chain_id, res.id)
                    ss = dssp[key][2]  # DSSP code for secondary structure
                    residues.append({
                        'id': res.id,
                        'ss': ss,
                        'resname': res.resname
                    })
                except:
                    # Residue not in DSSP (possibly due to missing atoms)
                    residues.append({
                        'id': res.id,
                        'ss': '-',
                        'resname': res.resname
                    })
        
        sec_struct.append({
            'chain_id': chain_id,
            'residues': residues
        })
    
    return sec_struct

def identify_secondary_structure_elements(sec_struct):
    """Group residues into secondary structure elements"""
    elements = []
    
    for chain in sec_struct:
        chain_id = chain['chain_id']
        residues = chain['residues']
        
        # Define mapping for DSSP codes
        ss_mapping = {
            'H': 'alpha',   # Alpha helix
            'B': 'beta',    # Beta bridge
            'E': 'beta',    # Extended strand, participates in beta ladder
            'G': '3-10',    # 3-10 helix
            'I': 'pi',      # Pi helix
            'T': 'turn',    # Turn
            'S': 'bend',    # Bend
            '-': 'loop',    # Loop or irregular
            ' ': 'loop'     # Loop or irregular
        }
        
        current_element = {
            'type': ss_mapping[residues[0]['ss']],
            'start': residues[0]['id'][1],
            'end': residues[0]['id'][1],
            'length': 1,
            'chain': chain_id,
            'symbol': None
        }
        
        for i in range(1, len(residues)):
            current_ss = ss_mapping[residues[i]['ss']]
            prev_ss = ss_mapping[residues[i-1]['ss']]
            
            if current_ss == prev_ss:
                current_element['end'] = residues[i]['id'][1]
                current_element['length'] += 1
            else:
                # Add the completed element
                elements.append(current_element)
                
                # Start a new element
                current_element = {
                    'type': current_ss,
                    'start': residues[i]['id'][1],
                    'end': residues[i]['id'][1],
                    'length': 1,
                    'chain': chain_id,
                    'symbol': None
                }
        
        # Add the last element
        elements.append(current_element)
    
    # Assign unique symbols to each element type
    symbols = {
        'alpha': 'Circle',
        'beta': 'Rectangle',
        '3-10': 'Triangle',
        'pi': 'Diamond',
        'turn': 'Square',
        'bend': 'Pentagon',
        'loop': 'Line'
    }
    
    for element in elements:
        element['symbol'] = symbols[element['type']]
    
    return elements

def create_tops_visualization(elements, output_file=None):
    """Create a TOPS-like visualization of the protein structure"""
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Colors for different types of elements
    colors = {
        'alpha': 'red',
        'beta': 'blue',
        '3-10': 'green',
        'pi': 'purple',
        'turn': 'orange',
        'bend': 'yellow',
        'loop': 'gray'
    }
    
    # Horizontal layout positions
    x_pos = 1
    element_positions = []
    
    for i, element in enumerate(elements):
        element_type = element['type']
        length = element['length']
        
        if element_type == 'loop':
            # For loops, just move right but record position
            element_positions.append((x_pos, 0.5))
            x_pos += length / 10  # Scale loop length
        else:
            # For structured elements
            height = length / 10  # Scale height by length
            width = 2
            
            if element_type == 'alpha':
                circle = plt.Circle((x_pos + width/2, 0.5), width/2, 
                                   color=colors[element_type], alpha=0.7)
                ax.add_patch(circle)
                ax.text(x_pos + width/2, 0.5, f"{element['start']}-{element['end']}\n{length} res",
                       ha='center', va='center', color='white')
            
            elif element_type == 'beta':
                rect = plt.Rectangle((x_pos, 0.5 - height/2), width, height,
                                    color=colors[element_type], alpha=0.7)
                ax.add_patch(rect)
                ax.text(x_pos + width/2, 0.5, f"{element['start']}-{element['end']}\n{length} res",
                       ha='center', va='center', color='white')
            
            elif element_type in ['3-10', 'pi', 'turn', 'bend']:
                rect = plt.Rectangle((x_pos, 0.5 - height/2), width, height,
                                    color=colors[element_type], alpha=0.7)
                ax.add_patch(rect)
                ax.text(x_pos + width/2, 0.5, f"{element['type']}\n{element['start']}-{element['end']}\n{length} res",
                       ha='center', va='center', color='white')
            
            element_positions.append((x_pos + width/2, 0.5))
            x_pos += width + 1  # Space between elements
    
    # Connect elements with lines (representing loops)
    for i in range(len(element_positions) - 1):
        x1, y1 = element_positions[i]
        x2, y2 = element_positions[i+1]
        
        if elements[i]['type'] == 'loop' or elements[i+1]['type'] == 'loop':
            plt.plot([x1, x2], [y1, y2], 'k-', linewidth=1)
        else:
            # Calculate loop length
            loop_length = elements[i+1]['start'] - elements[i]['end'] - 1
            plt.plot([x1, x2], [y1, y2], 'k-', linewidth=loop_length/5 if loop_length > 0 else 1)
            if loop_length > 0:
                plt.text((x1 + x2)/2, (y1 + y2)/2, f"{loop_length} res", ha='center', va='bottom')
    
    # Add legend
    legend_elements = [plt.Rectangle((0, 0), 1, 1, color=colors[t], alpha=0.7, label=t.capitalize())
                      for t in colors]
    ax.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(1, 1))
    
    ax.set_xlim(0, x_pos)
    ax.set_ylim(0, 1)
    ax.axis('off')
    ax.set_title("TOPS Model for Protein Structure")
    
    plt.tight_layout()
    
    if output_file:
        plt.savefig(output_file, dpi=300, bbox_inches='tight')
    
    return fig, ax

def create_tops_model(pdb_path, output_file=None):
    """Complete pipeline to create TOPS model from PDB file"""
    structure = load_protein_structure(pdb_path)
    sec_struct = analyze_secondary_structure(structure)
    elements = identify_secondary_structure_elements(sec_struct)
    
    # Display summary of elements
    print(f"Found {len(elements)} structural elements:")
    for i, elem in enumerate(elements):
        print(f"{i+1}. {elem['type']} ({elem['symbol']}): residues {elem['start']}-{elem['end']} ({elem['length']} residues)")
    
    fig, ax = create_tops_visualization(elements, output_file)
    return elements, fig

# Example usage
# Replace with your PDB file path
pdb_path = "pdb_files/1FT5.pdb"  
elements, fig = create_tops_model(pdb_path, "tops_model.png")
plt.show()

FileNotFoundError: [WinError 2] The system cannot find the file specified