In [1]:
pip install gradio

Note: you may need to restart the kernel to use updated packages.


In [7]:
import gradio as gr
import openvino.runtime as ov
from rdkit import Chem
from rdkit.Chem import AllChem, Draw, rdMolDescriptors
import numpy as np
import torch
import intel_extension_for_pytorch as ipex
import random
from PIL import Image

# Define the PyTorch model (assuming `Net` is your model class)
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(2048, 1024)
        self.dropout1 = torch.nn.Dropout(0.3)
        self.fc2 = torch.nn.Linear(1024, 512)
        self.dropout2 = torch.nn.Dropout(0.3)
        self.fc3 = torch.nn.Linear(512, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        return self.fc3(x)

# Load and optimize the PyTorch model with IPEX
torch_model = Net()
torch_model.load_state_dict(torch.load('lipophilicity_model.pth'))
torch_model.eval()
ipex_model = ipex.optimize(torch_model)

# Load the OpenVINO model
core = ov.Core()
ov_model_path = 'binding_affinity_model_openvino.xml'
compiled_ov_model = core.compile_model(core.read_model(ov_model_path), "CPU")

# Function to convert SMILES to a Morgan fingerprint
def smiles_to_fp(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise ValueError("Invalid SMILES string.")
    fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
    return np.array(fp, dtype=np.float32)

# VAE Model for Generating New Molecular Fingerprints
class VAE(torch.nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.fc1 = torch.nn.Linear(2048, 512)
        self.fc_mu = torch.nn.Linear(512, 256)
        self.fc_logvar = torch.nn.Linear(512, 256)
        self.fc_decode = torch.nn.Linear(256, 2048)

    def encode(self, x):
        h1 = torch.relu(self.fc1(x))
        return self.fc_mu(h1), self.fc_logvar(h1)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        return torch.sigmoid(self.fc_decode(z))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

# Load or initialize the VAE model
vae_model = VAE()
# Ideally, load a pre-trained VAE model, e.g., vae_model.load_state_dict(torch.load('vae_model.pth'))
vae_model.eval()

# Generate New Molecules Using the VAE
def generate_new_molecules(smiles, num_variants=5):
    scaffold_fp = smiles_to_fp(smiles)
    scaffold_tensor = torch.tensor(scaffold_fp, dtype=torch.float32).unsqueeze(0)
    new_molecules = []

    with torch.no_grad():
        mu, logvar = vae_model.encode(scaffold_tensor)
        for _ in range(num_variants):
            # Sample a new fingerprint from the VAE's latent space
            z = vae_model.reparameterize(mu, logvar)
            new_fp = vae_model.decode(z).numpy().squeeze()

            # Generate SMILES strings from VAE output
            try:
                # Convert fingerprint to molecule approximation
                smiles_str = Chem.MolToSmiles(Chem.MolFromSmiles(smiles)) # Use input SMILES as a base
                mol = Chem.MolFromSmiles(smiles_str) # Convert SMILES to RDKit molecule
                if mol:
                    AllChem.SanitizeMol(mol)
                    new_molecules.append(mol)
            except:
                pass

    return new_molecules

# Score each molecule's specific conformer using either OpenVINO or IPEX
def score_molecule(mol, method="OpenVINO", conf_id=0):
    fp_array = smiles_to_fp(Chem.MolToSmiles(mol))
    input_tensor = torch.tensor(fp_array, dtype=torch.float32).unsqueeze(0)

    if method == "OpenVINO":
        ov_input_tensor = ov.Tensor(input_tensor.numpy())
        result = compiled_ov_model([ov_input_tensor])[0]
        return result[0].item()
    else:
        with torch.no_grad():
            result = ipex_model(input_tensor)
        return result.item()

# Generate Conformers for Each New Molecule
def generate_conformers(mol, num_conformers=10):
    mol = Chem.AddHs(mol)
    AllChem.EmbedMultipleConfs(mol, numConfs=num_conformers, randomSeed=42)
    return mol

# Score Conformers Using Selected Method
def score_conformers(mol, method="OpenVINO"):
    scores = []
    for conf_id in range(mol.GetNumConformers()):
        score = score_molecule(mol, method=method, conf_id=conf_id)
        scores.append((conf_id, score))
    return scores

# Drug Discovery Pipeline Function
def drug_discovery_pipeline(initial_smiles, num_molecules=5, num_conformers=10, method="OpenVINO"):
    new_molecules = generate_new_molecules(initial_smiles, num_molecules)
    best_molecules = []
    
    for mol in new_molecules:
        mol_with_confs = generate_conformers(mol, num_conformers)
        conformer_scores = score_conformers(mol_with_confs, method)
        conformer_scores = sorted(conformer_scores, key=lambda x: x[1])
        
        best_conformer_id = conformer_scores[0][0]
        best_score = conformer_scores[0][1]
        best_molecules.append((mol, best_conformer_id, best_score))
    
    best_molecules = sorted(best_molecules, key=lambda x: x[2])
    return best_molecules[:3]

# Visualization Function
def visualize_molecules(molecules):
    images = []
    for mol, conf_id, score in molecules:
        img = Draw.MolToImage(mol, kekulize=True)
        images.append((img, f"Score: {score:.4f}, Conformer ID: {conf_id}"))
    return images

# Gradio Function to Run Pipeline and Display Results
def run_pipeline(initial_smiles, num_molecules, num_conformers, method):
    top_molecules = drug_discovery_pipeline(initial_smiles, num_molecules, num_conformers, method)
    return visualize_molecules(top_molecules)

# Gradio Interface
interface = gr.Interface(
    fn=run_pipeline,
    inputs=[
        gr.Textbox(label="Initial SMILES", placeholder="e.g., C1=CC=CC=C1"),
        gr.Slider(label="Number of Molecules", minimum=1, maximum=10, step=1, value=5),
        gr.Slider(label="Number of Conformers", minimum=1, maximum=20, step=1, value=10),
        gr.Radio(["OpenVINO", "IPEX"], label="Inference Method", value="OpenVINO")
    ],
    outputs=gr.Gallery(label="Top Molecules and Scores"),
    title="Drug Discovery Pipeline",
    description="Generate and score new molecules based on an initial scaffold using OpenVINO or IPEX and RDKit."
)

interface.launch(share=True)



  torch_model.load_state_dict(torch.load('lipophilicity_model.pth'))


Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://ff55c02fb48307d254.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




