In [3]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Dummy database of SMILES embeddings and likelihoods
database = [
    {"smiles": "CCO", "embedding": np.random.rand(4096), "likelihood": 0.9},     # Ethanol
    {"smiles": "CCl", "embedding": np.random.rand(4096), "likelihood": 0.85},    # Chloromethane
    
]


# Retrieve similar reactions based on cosine similarity
def retrieve_similar_reactions(query_embedding, top_k=1):
    embeddings = np.array([entry["embedding"] for entry in database])
    similarities = cosine_similarity([query_embedding], embeddings).flatten()
    indices = similarities.argsort()[-top_k:][::-1]  # Get top K similar items
    return [database[i] for i in indices]


In [5]:
import openvino.runtime as ov
import numpy as np
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.rdFingerprintGenerator import GetMorganGenerator
import gradio as gr
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity

# Load OpenVINO model
core = ov.Core()
model_path = "production_prediction_openvino1.xml"  # Replace with your actual model path
compiled_model = core.compile_model(core.read_model(model_path), "CPU")

# Dummy database of SMILES embeddings and likelihoods 
# (Updated to have 4096-dimensional embeddings for consistency)
database = [
    {"smiles": "CCO", "embedding": np.random.rand(4096), "likelihood": 0.9},        # Ethanol
    {"smiles": "CCl", "embedding": np.random.rand(4096), "likelihood": 0.85},      # Chloromethane
    {"smiles": "CC(=O)O", "embedding": np.random.rand(4096), "likelihood": 0.8},   # Acetic acid
    # ... add more entries with 4096-dimensional embeddings ...
]

# Retrieve similar reactions based on cosine similarity (using only reactant embedding)
def retrieve_similar_reactions(query_embedding, top_k=1):
    embeddings = np.array([entry["embedding"] for entry in database])
    similarities = cosine_similarity([query_embedding], embeddings).flatten()
    indices = similarities.argsort()[-top_k:][::-1]  # Get top K similar items
    return [database[i] for i in indices]

# Fingerprint generator (updated fpSize)
fp_generator = GetMorganGenerator(radius=2, fpSize=4096)

# Predict function with RAG integration
def predict_reaction_with_rag_openvino(reactant_smiles, product_smiles):
    # Generate molecules from SMILES strings
    reactant_mol = Chem.MolFromSmiles(reactant_smiles)
    product_mol = Chem.MolFromSmiles(product_smiles)

    if reactant_mol is None or product_mol is None:
        return "Invalid SMILES string. Please check your input.", None, None, None

    # Generate fingerprints
    reactant_fp = np.array(fp_generator.GetFingerprint(reactant_mol))
    product_fp = np.array(fp_generator.GetFingerprint(product_mol))
    combined_fp = np.concatenate([reactant_fp, product_fp])  # Now has shape (8192,)

    # Retrieve similar reactions using only the reactant embedding
    similar_reactions = retrieve_similar_reactions(reactant_fp)

    # Reshape for OpenVINO input
    input_data = combined_fp.reshape(1, -1).astype(np.float32)  # Ensure correct shape (1, 8192)

    # Run inference with OpenVINO
    result = compiled_model([input_data])[0]
    predicted_likelihood = result[0].item()

    # Generate molecular images
    reactant_img = Draw.MolToImage(reactant_mol, size=(200, 200))
    product_img = Draw.MolToImage(product_mol, size=(200, 200))

    # Display result
    similar_likelihood = similar_reactions[0]["likelihood"] if similar_reactions else 0.0
    return (
        f"Predicted Reaction Likelihood: {predicted_likelihood:.4f} (Augmented with similar likelihood: {similar_likelihood:.4f})",
        reactant_img,
        product_img,
        f"True Likelihood: {predicted_likelihood:.4f}"  # You might want to adjust this if you have true labels
    )

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Reaction Prediction with RAG Integration using OpenVINO")

    with gr.Row():
        reactant_smiles = gr.Textbox(label="Reactant SMILES", placeholder="Enter reactant SMILES (e.g., CCO for ethanol)")
        product_smiles = gr.Textbox(label="Product SMILES", placeholder="Enter product SMILES (e.g., CCO for ethanol)")

    predict_button = gr.Button("Predict Reaction Likelihood")

    with gr.Row():
        likelihood_output = gr.Textbox(label="Predicted Reaction Likelihood")
        reactant_image = gr.Image(type="pil", label="Reactant Molecule")
        product_image = gr.Image(type="pil", label="Product Molecule")

    predict_button.click(
        predict_reaction_with_rag_openvino,
        inputs=[reactant_smiles, product_smiles],
        outputs=[likelihood_output, reactant_image, product_image, likelihood_output]
    )

# Launch the Gradio app
demo.launch(share=True)



Running on local URL:  http://127.0.0.1:7862
Running on public URL: https://e4e25eef309851198d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




[23:07:39] SMILES Parse Error: syntax error while parsing: CC+O
[23:07:39] SMILES Parse Error: Failed parsing SMILES 'CC+O' for input: 'CC+O'
