In [16]:
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
import requests
import tempfile
import os
import subprocess
import numpy as np
import matplotlib.pyplot as plt
import py3Dmol
import xml.etree.ElementTree as ET

In [17]:
temp_drugpdb = "temp_drug.pdb"
temp_protpdb = "temp_prot.pdb"
temp_drugpdbqt = "temp_drug.pdbqt"
temp_protpdbqt = "temp_prot.pdbqt"
docking_output = "docking_output.pdbqt"

In [18]:
# Function to process SMILES data and generate 3D coordinates for the drug
def clean_pdb(pdb_filename):
    with open(pdb_filename, "r") as file:
        lines = file.readlines()

    with open(pdb_filename, "w") as file:
        for line in lines:
            if "UNL" in line:
                line = line.replace("UNL", "LIG")
            file.write(line)


def process_smiles(smiles: str):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise ValueError("Invalid SMILES string")
    
    conversion_result = os.system(f"obabel -:'{smiles}' -O {temp_drugpdb} -xr --gen3d --addhydrogens -p -h")
    if conversion_result != 0:
            raise RuntimeError(f"Error converting drug PDB to PDBQT. Command failed.")

# Function to fetch protein structure from UniProt (using PDB for now)
def fetch_protein_structure(uniprot_id: str):
    url = f"https://www.uniprot.org/uniprot/{uniprot_id}.fasta"
    response = requests.get(url)
    if response.status_code != 200:
        raise ValueError("Protein not found")
    return response.text

def fetch_pdb_file(uniprot_id:str):
    """Fetch PDB IDs associated with a UniProt ID using the UniProt API."""
    url = f"https://www.uniprot.org/uniprot/{uniprot_id}.xml"
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Error fetching UniProt entry for {uniprot_id}")
    
    # Parse the XML response
    root = ET.fromstring(response.content)
    pdb_ids = []
    # Iterate through cross-references to find PDB entries
    for cross_ref in root.findall(".//{http://uniprot.org/uniprot}dbReference"):
        if cross_ref.attrib.get('type') == 'PDB':
            pdb_ids.append(cross_ref.attrib.get('id'))
    
    pdb_id = pdb_ids[0]
    
    """Fetch the PDB file content from the PDB database."""
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    if response.status_code == 200:
        with open(temp_protpdb,"w") as f:
            f.write(response.text)
        # clean_pdb(temp_protpdb)
        return response.text
    else:
        # raise Exception(f"Error fetching PDB file for {pdb_id}")
        """
        Check if a UniProt ID exists in the AlphaFold database and fetch the PDB file if available.
        """
        print("Checking in Alphaprot database:....")
        # Strip version suffix (e.g., ".1") if present
        clean_uniprot_id = uniprot_id.split('.')[0]

        alphafold_url = f"https://alphafold.ebi.ac.uk/files/AF-{clean_uniprot_id}-F1-model_v4.pdb"
        response = requests.get(alphafold_url)
        
        if response.status_code == 200:
            with open(temp_protpdb,"w") as f:
                f.write(response.text)
            # clean_pdb(temp_protpdb)
            return response.text
        else:
            raise Exception(f"Error fetching PDB file for {pdb_id}")
        
def fetch_prot_pdb(uniprot_id:str):
    """
        Check if a UniProt ID exists in the AlphaFold database and fetch the PDB file if available.
    """
    print("Checking in AlphaFold database:....")
    # Strip version suffix (e.g., ".1") if present
    clean_uniprot_id = uniprot_id.split('.')[0]

    alphafold_url = f"https://alphafold.ebi.ac.uk/files/AF-{clean_uniprot_id}-F1-model_v4.pdb"
    response = requests.get(alphafold_url)
        
    if response.status_code == 200:
        with open(temp_protpdb,"w") as f:
            f.write(response.text)
        # clean_pdb(temp_protpdb)
        return response.text
    else:
        """Fetch PDB IDs associated with a UniProt ID using the UniProt API."""
        url = f"https://www.uniprot.org/uniprot/{uniprot_id}.xml"
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(f"Error fetching UniProt entry for {uniprot_id}")
    
        # Parse the XML response
        root = ET.fromstring(response.content)
        pdb_ids = []
        # Iterate through cross-references to find PDB entries
        for cross_ref in root.findall(".//{http://uniprot.org/uniprot}dbReference"):
            if cross_ref.attrib.get('type') == 'PDB':
                pdb_ids.append(cross_ref.attrib.get('id'))
        
        if not pdb_ids:
            print(f"No PDB IDs found for UniProt ID {uniprot_id}.")
            raise Exception(f"No PDB IDs found for UniProt ID {uniprot_id}.")
    
        pdb_id = pdb_ids[0]
    
        """Fetch the PDB file content from the PDB database."""
        url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
        response = requests.get(url)
        if response.status_code == 200:
            with open(temp_protpdb,"w") as f:
                f.write(response.text)
            # clean_pdb(temp_protpdb)
            return response.text
        else:
            raise Exception(f"Error fetching PDB file for {pdb_id}")

In [19]:
import os
import tempfile
from vina import Vina
from prepare_ligands_vina import prepare_ligand_for_vina

def perform_docking(drug_pdb, protein_pdb):
    # Convert drug and protein PDB files to PDBQT format using Open Babel

    # with open(temp_drugpdb,"w") as f:
    #     f.write(drug_pdb)
    
    # with open(temp_protpdb,"w") as f:
    #     f.write(protein_pdb)
    
    # conversion_result = os.system(f"obabel -i pdb {temp_drugpdb} -o pdbqt -O {temp_drugpdbqt} -xr -p")
    # if conversion_result != 0:
    #         raise RuntimeError(f"Error converting drug PDB to PDBQT. Command failed.")
    
    conversion_result = os.system(f"obabel -i pdb {temp_protpdb} -o pdbqt -O {temp_protpdbqt} -xr -p")
    if conversion_result != 0:
        raise RuntimeError(f"Error converting protein PDB to PDBQT. Command failed.")
    
    ligand_prepared = prepare_ligand_for_vina(input_pdb=temp_drugpdb,output_pdbqt=temp_drugpdbqt)
    if not ligand_prepared:
        raise RuntimeError("Failed to prepare ligand") 
    
    print("Conversion to pdbqt successful. Moving on.")
    
    # Check if PDBQT files are created
    if not os.path.exists(temp_protpdbqt) or not os.path.exists(temp_drugpdbqt):
        raise RuntimeError(f"PDBQT files were not created. Check Open Babel conversion.")
    
    # Create Vina object and load receptor (protein) and ligand (drug)
    v = Vina(sf_name='vina')  # Use default scoring function ('vina')
    
    # Load receptor (protein) and ligand (drug) in PDBQT format
    try:
        v.set_receptor(temp_protpdbqt)
        v.set_ligand_from_file(temp_drugpdbqt)
    except Exception as e:
        raise RuntimeError(f"Error setting receptor or ligand: {e}")
    
    # Perform docking (default grid box size)
    print("Starting the docking process....")
    v.dock(exhaustiveness=8, n_poses=9)
    
    # Save the docking results to a temporary file
    docking_output = "docking_output.pdbqt"
    v.write_pose(docking_output, overwrite=True)
    
    # Read docking results (output PDBQT file)
    with open(docking_output, 'r') as file:
        docking_result = file.read()
    
    # Clean up temporary files
    # os.remove(drug_pdbqt)
    # os.remove(protein_pdbqt)
    
    return docking_result


In [20]:
# Function to analyze docking results
def analyze_interaction(docking_results: str):
    # Here we can analyze the docking output (e.g., binding affinity, interaction sites)
    return docking_results

In [21]:
# Now let's test with some sample inputs
# Sample SMILES for a drug (Aspirin)
drug_smiles = "CC(=O)OC1=CC=CC=C1C(=O)O"  # Aspirin

# Sample UniProt ID for a protein (example: P12345)
protein_uniprot = "Q852Q0"  # Replace with valid UniProt ID

# Step 1: Process SMILES for drug
drug_pdb = process_smiles(drug_smiles)

# Step 2: Fetch protein structure
protein_pdb = fetch_prot_pdb(protein_uniprot)

# For simplicity, we save the protein structure (FASTA format) to a temporary file.
# with tempfile.NamedTemporaryFile(delete=False) as temp_file:
#     protein_pdb = temp_file.name
#     with open(protein_pdb, 'w') as f:
#         f.write(protein_fasta)

Checking in AlphaFold database:....


  Both -p and -h options are set. All implicit hydrogens (-h) will be added without considering pH.

1 molecule converted


In [22]:
# Step 3: Perform docking

docking_result = perform_docking(drug_pdb, protein_pdb)


# Step 4: Analyze interaction
interaction_details = analyze_interaction(docking_result)
print("Docking Interaction Details:", interaction_details)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is temp_prot.pdb)

1 molecule converted


Error running prepare_ligand4.py:
Command output: 
Command errors:   File "/home/aavash/Downloads/mgltools_x86_64Linux2_1.5.7p1/mgltools_x86_64Linux2_1.5.7/MGLToolsPckgs/AutoDockTools/Utilities24/prepare_ligand4.py", line 22
    print "Usage: prepare_ligand4.py -l filename"
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
SyntaxError: Missing parentheses in call to 'print'. Did you mean print(...)?



RuntimeError: Failed to prepare ligand

In [None]:
# Step 5: 3D Visualization (using 3Dmol.js)

# Create a 3Dmol.js viewer to display the protein-drug interaction
viewer = py3Dmol.view(width=800, height=600)

# Load protein structure (assuming protein is in PDB format)
viewer.addModel(protein_pdb, "pdb")

# Load drug molecule (in PDB format after conversion from SMILES)
viewer.addModel(drug_pdb, "pdb")

# Set style for both molecules (sticks and spheres for better visualization)
viewer.setStyle({'model': 0}, {'stick': {}})
viewer.setStyle({'model': 1}, {'stick': {}})

# Zoom to fit both molecules
viewer.zoomTo()

# Render the structure
viewer.render()

# Show the viewer in the notebook
viewer.show()