In [1]:
import warnings
warnings.filterwarnings('ignore')
from ipywidgets import widgets
from IPython.display import display, HTML, Markdown, clear_output
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors
from padelpy import padeldescriptor
import joblib
import py3Dmol
import csv
import glob
from rdkit.Chem import AllChem
from meeko import MoleculePreparation, PDBQTWriterLegacy
from pymol import cmd
from vina import Vina
import sys
sys.path.insert(1, 'Jupyter_Dock/utilities/')
from utils import getbox, pdbqt_to_sdf
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning, message="'xdrlib' is deprecated")

  import xdrlib


In [2]:
# !jupyter nbextension enable --py widgetsnbextension --sys-prefix
# !jupyter serverextension enable voila --sys-prefix

In [3]:
#constants
docking_protein = '5gs4_clean.pdb'
vina_input = '5gs4.pdbqt'

In [4]:
def calculate_lipinski_descriptors(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise ValueError("You entered an invalid SMILES string")

    descriptors = {
        'Molecular Weight': Descriptors.MolWt(mol),
        'LogP': Descriptors.MolLogP(mol),
        'Num H Donors': Descriptors.NumHDonors(mol),
        'Num H Acceptors': Descriptors.NumHAcceptors(mol),
        'Num Rotatable Bonds': Descriptors.NumRotatableBonds(mol),
        'Carbon Count': Descriptors.HeavyAtomCount(mol),
        'Oxygen Count': sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 8)
    }

    aliases = {
        'Molecular Weight': 'Molecular Weight',
        'LogP': 'LogP',
        'Num H Donors': 'Number Hydrogen Bond Donors',
        'Num H Acceptors': 'Number of Hydrogen Bond Acceptors',
        'Num Rotatable Bonds': 'Number of Rotatable Bonds',
        'Carbon Count': 'Carbon Count',
        'Oxygen Count': 'Oxygen Count'
    }

    table_html = "<table style='font-size: 20px;'>"
    for descriptor, value in descriptors.items():
        alias = aliases.get(descriptor, descriptor)
        table_html += f"<tr><td><strong>{alias}:</strong></td><td>{value:.4f}</td></tr>"
    table_html += "</table>"

    return table_html

def generate_csv_file(string1, string2, filename):
    data = [[string1 + '\t' + string2]]  # Create a list of lists containing the strings

    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerows(data)
        
def prepare_and_dock(smiles):
    # Convert SMILES to molecule object
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol, randomSeed=42)
    
    # Write the molecule to an SDF file
    writer = Chem.SDWriter("ligand_clean.sdf")
    writer.write(mol)
    writer.close()
    
    # Prepare the ligand
    mol_supplier = Chem.SDMolSupplier("ligand_clean.sdf", removeHs=False)
    preparator = MoleculePreparation()
    
    for mol in mol_supplier:
        mol_setups = preparator.prepare(mol)
        for setup in mol_setups:
            pdbqt_tuple = PDBQTWriterLegacy.write_string(setup)
            pdbqt_string = pdbqt_tuple[0]
            
            # Save pdbqt_string to the ligand.pdbqt file
            with open("ligand.pdbqt", "w") as pdbqt_file:
                pdbqt_file.write(pdbqt_string)
                
    cmd.load(filename=docking_protein,format='pdb',object='prot')
    cmd.load(filename='ligand_clean.sdf',format='sdf',object='lig')
    center, size= getbox(selection='lig',extending=5.0,software='vina')
    cmd.delete('all')

    v = Vina(sf_name='vina')
    v.set_receptor(vina_input)
    v.set_ligand_from_file('ligand.pdbqt')
    v.compute_vina_maps(center=[center['center_x'], center['center_y'], center['center_z']], 
                        box_size=[size['size_x'], size['size_y'], size['size_z']])
    
    v.dock(exhaustiveness=10, n_poses=10)
    v.write_poses('5gs4_ligand_vina_out.pdbqt', n_poses=10, overwrite=True)
    pdbqt_to_sdf(pdbqt_file='5gs4_ligand_vina_out.pdbqt',output='5gs4_ligand_vina_out.sdf')

In [5]:
# Define action when the button is clicked
def on_button_click(b):
    with out:
        clear_output(wait=True)
        smiles = input_smiles.value
        prepare_and_dock(smiles)

        # Load and visualize the protein
        view = py3Dmol.view()
        view.removeAllModels()
        view.setBackgroundColor('white')
        view.setViewStyle({'style':'outline','color':'black','width':0.1})

        prot_pdb = open(docking_protein, 'r').read()
        view.addModel(prot_pdb, format='pdb')
        Prot = view.getModel()
        Prot.setStyle({'cartoon':{'arrows':True, 'tubes':False, 'style':'oval', 'color':'spectrum'}})

        results=Chem.SDMolSupplier('5gs4_ligand_vina_out.sdf')

        p=Chem.MolToMolBlock(results[0],False)

        print ('Pose: {} | Score: {}'.format(results[0].GetProp('Pose'),results[0].GetProp('Score')))

        view.addModel(p,'mol')
        x = view.getModel()
        x.setStyle({},{'stick':{'colorscheme':'cyanCarbon','radius':0.2}})

        view.zoomTo()
        view.show()

In [6]:
smiles_input = widgets.Text(placeholder="Enter Canonical SMILES")

# Define the dropdown options
options = widgets.Dropdown(
    options=[
        "Compute Lipinski's Descriptors",
        "Predict the Compound's pIC50",
        "Perform docking",
    ],
    disabled=False,
    description="Program:"
)

# Apply custom CSS to increase the font size of both label and options and center-align the widget
css = """
<style>
.widget-label {
    font-size: 16px; /* Change this value to adjust the label font size */
}
.widget-dropdown > .dropdown-container > .dropdown-list > .item {
    font-size: 16px; /* Change this value to adjust the option font size */
}

/* Center-align the widget */
.widget-dropdown {
    display: flex;
    justify-content: center;
}
</style>
"""
display(HTML(css))

run_button = widgets.Button(description="Run", button_style='success')
run_button.style.button_color = 'lightgreen'
output = widgets.Output()

In [7]:
def run_button_clicked(b):
    with output:
        output.clear_output()

        smiles = smiles_input.value
        selected_option = options.value

        if not smiles:
            print("You did not enter Canonical SMILES")
            return

        try:
            if selected_option == "Compute Lipinski's Descriptors":
                table_html = calculate_lipinski_descriptors(smiles)
                display(HTML(table_html))

            elif selected_option == "Predict the Compound's pIC50":
                string1 = smiles
                string2 = 'Compound_name'
                filename = "molecule.smi"
                generate_csv_file(string1, string2, filename)

                xml_files = glob.glob("fingerprints_xml/*.xml")
                xml_files.sort()
                FP_list = ['AtomPairs2DCount', 'AtomPairs2D', 'EState', 'CDKextended', 'CDK', 'CDKgraphonly',
                           'KlekotaRothCount',
                           'KlekotaRoth', 'MACCS', 'PubChem', 'SubstructureCount', 'Substructure']
                fp = dict(zip(FP_list, xml_files))
                fingerprint = 'Substructure'
                fingerprint_output_file = ''.join([fingerprint, '.csv'])
                fingerprint_descriptortypes = fp[fingerprint]

                padeldescriptor(mol_dir='molecule.smi',
                                d_file=fingerprint_output_file,
                                descriptortypes=fingerprint_descriptortypes,
                                detectaromaticity=True,
                                standardizenitro=True,
                                standardizetautomers=True,
                                removesalt=True,
                                log=True,
                                fingerprints=True)

                data = pd.read_csv('Substructure.csv')
                X = data.drop(columns=['Name'])

                loaded_model = joblib.load('padel_model.joblib')
                y_pred = loaded_model.predict(X)
                predicted_value = y_pred[0]
                predicted_value = format(predicted_value, ".2f")
                display(Markdown(f"### The pIC50 of your compound is **{predicted_value}**"))
                
            elif selected_option == "Perform docking":
                prepare_and_dock(smiles)

                # Load and visualize the protein and docking results
                view = py3Dmol.view()
                view.removeAllModels()
                view.setBackgroundColor('white')
                view.setViewStyle({'style':'outline','color':'black','width':0.1})

                # Load protein
                prot_pdb = open(docking_protein, 'r').read()
                view.addModel(prot_pdb, format='pdb')
                Prot = view.getModel()
                Prot.setStyle({'cartoon':{'arrows':True, 'tubes':False, 'style':'oval', 'color':'spectrum'}})

                # Load docking results
                results = Chem.SDMolSupplier('5gs4_ligand_vina_out.sdf')
                
                p = Chem.MolToMolBlock(results[0],False)
                view.addModel(p, 'mol')
                x = view.getModel()
                x.setStyle({}, {'stick': {'colorscheme': 'cyanCarbon', 'radius': 0.2}})

                view.zoomTo()
                view.show()

        except ValueError as e:
            print(str(e))      

In [8]:
# Attach callback to the Run button
run_button.on_click(run_button_clicked)

In [9]:
# Display widgets
display(smiles_input)
display(options)
display(run_button)
display(output)      

Text(value='', placeholder='Enter Canonical SMILES')

Dropdown(description='Program:', options=("Compute Lipinski's Descriptors", "Predict the Compound's pIC50", 'P…

Button(button_style='success', description='Run', style=ButtonStyle(button_color='lightgreen'))

Output()