In [7]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Dummy database of SMILES embeddings and likelihoods
database = [
    {"smiles": "CCO", "embedding": np.random.rand(2048), "likelihood": 0.9},     # Ethanol
    {"smiles": "CCl", "embedding": np.random.rand(2048), "likelihood": 0.85},    # Chloromethane
    {"smiles": "CC(=O)O", "embedding": np.random.rand(2048), "likelihood": 0.8}, # Acetic acid
    {"smiles": "CO", "embedding": np.random.rand(2048), "likelihood": 0.88},     # Methanol
    {"smiles": "CC(=O)OC", "embedding": np.random.rand(2048), "likelihood": 0.75}, # Methyl acetate
    {"smiles": "CCC", "embedding": np.random.rand(2048), "likelihood": 0.82},    # Propane
    {"smiles": "CCN", "embedding": np.random.rand(2048), "likelihood": 0.8},     # Ethylamine
    {"smiles": "C=O", "embedding": np.random.rand(2048), "likelihood": 0.86},    # Formaldehyde
    {"smiles": "CC(=O)N", "embedding": np.random.rand(2048), "likelihood": 0.83}, # Acetamide
    {"smiles": "CC(C)O", "embedding": np.random.rand(2048), "likelihood": 0.84}, # Isopropanol
    {"smiles": "C(CO)O", "embedding": np.random.rand(2048), "likelihood": 0.87}, # Ethylene glycol
    {"smiles": "CCCC", "embedding": np.random.rand(2048), "likelihood": 0.81},   # Butane
    {"smiles": "CC(C)C", "embedding": np.random.rand(2048), "likelihood": 0.83}, # Isobutane
    {"smiles": "C1CCCCC1", "embedding": np.random.rand(2048), "likelihood": 0.92}, # Cyclohexane
    {"smiles": "C1=CC=CC=C1", "embedding": np.random.rand(2048), "likelihood": 0.89}, # Benzene
    {"smiles": "CCOC", "embedding": np.random.rand(2048), "likelihood": 0.79},   # Ethyl methyl ether
    {"smiles": "C(CCl)Cl", "embedding": np.random.rand(2048), "likelihood": 0.76}, # Dichloromethane
    {"smiles": "CC#N", "embedding": np.random.rand(2048), "likelihood": 0.78},   # Acetonitrile
    {"smiles": "CC(=O)Cl", "embedding": np.random.rand(2048), "likelihood": 0.77}, # Acetyl chloride
    {"smiles": "CCCCCCC", "embedding": np.random.rand(2048), "likelihood": 0.73}, # Heptane
    {"smiles": "CCCCO", "embedding": np.random.rand(2048), "likelihood": 0.81},   # Butanol
    {"smiles": "CC(C)CO", "embedding": np.random.rand(2048), "likelihood": 0.78}, # Isobutanol
    {"smiles": "C1=CC=C(C=C1)O", "embedding": np.random.rand(2048), "likelihood": 0.9}, # Phenol
    {"smiles": "CC(C)COC", "embedding": np.random.rand(2048), "likelihood": 0.74}, # Isobutyl methyl ether
    {"smiles": "CC(C)(C)O", "embedding": np.random.rand(2048), "likelihood": 0.85}, # Tert-butanol
    {"smiles": "CC(=O)OCC", "embedding": np.random.rand(2048), "likelihood": 0.82}, # Ethyl acetate
    {"smiles": "C(CO)N", "embedding": np.random.rand(2048), "likelihood": 0.83}, # Ethanolamine
    {"smiles": "C(=O)N", "embedding": np.random.rand(2048), "likelihood": 0.86}, # Formamide
    {"smiles": "CC(=O)NC", "embedding": np.random.rand(2048), "likelihood": 0.84}, # Acetyl methyl amine
    {"smiles": "CC(=O)NCC", "embedding": np.random.rand(2048), "likelihood": 0.8}, # N-Ethylacetamide
    {"smiles": "CC1=CC=CC=C1", "embedding": np.random.rand(2048), "likelihood": 0.91}, # Toluene
    {"smiles": "CCC(=O)O", "embedding": np.random.rand(2048), "likelihood": 0.79}, # Propanoic acid
    {"smiles": "CC(=O)OCCC", "embedding": np.random.rand(2048), "likelihood": 0.77}, # Propyl acetate
    {"smiles": "CC(=O)C", "embedding": np.random.rand(2048), "likelihood": 0.8}, # Acetone
    {"smiles": "C(CO)CO", "embedding": np.random.rand(2048), "likelihood": 0.87}, # 1,2-Propanediol
    {"smiles": "CCOCC", "embedding": np.random.rand(2048), "likelihood": 0.78}, # Diethyl ether
    {"smiles": "CC(=O)CCC", "embedding": np.random.rand(2048), "likelihood": 0.82}, # Butanone
    {"smiles": "CCC(=O)OCC", "embedding": np.random.rand(2048), "likelihood": 0.76}, # Ethyl propionate
    {"smiles": "CC(C)C(=O)O", "embedding": np.random.rand(2048), "likelihood": 0.79}, # Isobutyric acid
    {"smiles": "CCCCCC", "embedding": np.random.rand(2048), "likelihood": 0.75}, # Hexane
    {"smiles": "CCC(C)O", "embedding": np.random.rand(2048), "likelihood": 0.84}, # Isopropanol
    {"smiles": "CCCC=O", "embedding": np.random.rand(2048), "likelihood": 0.8}, # Butanal
    {"smiles": "C=C", "embedding": np.random.rand(2048), "likelihood": 0.85}, # Ethene (Ethylene)
    {"smiles": "C=CC", "embedding": np.random.rand(2048), "likelihood": 0.82}, # Propene
    {"smiles": "CC(=O)OC", "embedding": np.random.rand(2048), "likelihood": 0.76}, # Methyl acetate
    {"smiles": "C1=CC=C(C=C1)N", "embedding": np.random.rand(2048), "likelihood": 0.89}, # Aniline
    {"smiles": "CCCCN", "embedding": np.random.rand(2048), "likelihood": 0.81}, # Butylamine
    {"smiles": "CNC(=O)C", "embedding": np.random.rand(2048), "likelihood": 0.83}, # N-Methylacetamide
    {"smiles": "CC(C)C(=O)N", "embedding": np.random.rand(2048), "likelihood": 0.84}, # Isobutyramide
]


# Retrieve similar reactions based on cosine similarity
def retrieve_similar_reactions(query_embedding, top_k=1):
    embeddings = np.array([entry["embedding"] for entry in database])
    similarities = cosine_similarity([query_embedding], embeddings).flatten()
    indices = similarities.argsort()[-top_k:][::-1]  # Get top K similar items
    return [database[i] for i in indices]


In [8]:
import openvino.runtime as ov
import numpy as np
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.rdFingerprintGenerator import GetMorganGenerator
import gradio as gr
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity

# Load OpenVINO model
core = ov.Core()
model_path = "production_prediction_openvino1.xml"
compiled_model = core.compile_model(core.read_model(model_path), "CPU")

# Dummy database of SMILES embeddings and likelihoods (only 2048-dimensional embeddings)
database = [
    {"smiles": "CCO", "embedding": np.random.rand(2048), "likelihood": 0.9},     # Ethanol
    {"smiles": "CCl", "embedding": np.random.rand(2048), "likelihood": 0.85},    # Chloromethane
    {"smiles": "CC(=O)O", "embedding": np.random.rand(2048), "likelihood": 0.8}, # Acetic acid
    {"smiles": "CO", "embedding": np.random.rand(2048), "likelihood": 0.88},     # Methanol
    {"smiles": "CC(=O)OC", "embedding": np.random.rand(2048), "likelihood": 0.75}, # Methyl acetate
    {"smiles": "CCC", "embedding": np.random.rand(2048), "likelihood": 0.82},    # Propane
    {"smiles": "CCN", "embedding": np.random.rand(2048), "likelihood": 0.8},     # Ethylamine
    {"smiles": "C=O", "embedding": np.random.rand(2048), "likelihood": 0.86},    # Formaldehyde
    {"smiles": "CC(=O)N", "embedding": np.random.rand(2048), "likelihood": 0.83}, # Acetamide
    {"smiles": "CC(C)O", "embedding": np.random.rand(2048), "likelihood": 0.84}, # Isopropanol
    {"smiles": "C(CO)O", "embedding": np.random.rand(2048), "likelihood": 0.87}, # Ethylene glycol
    {"smiles": "CCCC", "embedding": np.random.rand(2048), "likelihood": 0.81},   # Butane
    {"smiles": "CC(C)C", "embedding": np.random.rand(2048), "likelihood": 0.83}, # Isobutane
    {"smiles": "C1CCCCC1", "embedding": np.random.rand(2048), "likelihood": 0.92}, # Cyclohexane
    {"smiles": "C1=CC=CC=C1", "embedding": np.random.rand(2048), "likelihood": 0.89}, # Benzene
    {"smiles": "CCOC", "embedding": np.random.rand(2048), "likelihood": 0.79},   # Ethyl methyl ether
    {"smiles": "C(CCl)Cl", "embedding": np.random.rand(2048), "likelihood": 0.76}, # Dichloromethane
    {"smiles": "CC#N", "embedding": np.random.rand(2048), "likelihood": 0.78},   # Acetonitrile
    {"smiles": "CC(=O)Cl", "embedding": np.random.rand(2048), "likelihood": 0.77}, # Acetyl chloride
    {"smiles": "CCCCCCC", "embedding": np.random.rand(2048), "likelihood": 0.73}, # Heptane
    {"smiles": "CCCCO", "embedding": np.random.rand(2048), "likelihood": 0.81},   # Butanol
    {"smiles": "CC(C)CO", "embedding": np.random.rand(2048), "likelihood": 0.78}, # Isobutanol
    {"smiles": "C1=CC=C(C=C1)O", "embedding": np.random.rand(2048), "likelihood": 0.9}, # Phenol
    {"smiles": "CC(C)COC", "embedding": np.random.rand(2048), "likelihood": 0.74}, # Isobutyl methyl ether
    {"smiles": "CC(C)(C)O", "embedding": np.random.rand(2048), "likelihood": 0.85}, # Tert-butanol
    {"smiles": "CC(=O)OCC", "embedding": np.random.rand(2048), "likelihood": 0.82}, # Ethyl acetate
    {"smiles": "C(CO)N", "embedding": np.random.rand(2048), "likelihood": 0.83}, # Ethanolamine
    {"smiles": "C(=O)N", "embedding": np.random.rand(2048), "likelihood": 0.86}, # Formamide
    {"smiles": "CC(=O)NC", "embedding": np.random.rand(2048), "likelihood": 0.84}, # Acetyl methyl amine
    {"smiles": "CC(=O)NCC", "embedding": np.random.rand(2048), "likelihood": 0.8}, # N-Ethylacetamide
    {"smiles": "CC1=CC=CC=C1", "embedding": np.random.rand(2048), "likelihood": 0.91}, # Toluene
    {"smiles": "CCC(=O)O", "embedding": np.random.rand(2048), "likelihood": 0.79}, # Propanoic acid
    {"smiles": "CC(=O)OCCC", "embedding": np.random.rand(2048), "likelihood": 0.77}, # Propyl acetate
    {"smiles": "CC(=O)C", "embedding": np.random.rand(2048), "likelihood": 0.8}, # Acetone
    {"smiles": "C(CO)CO", "embedding": np.random.rand(2048), "likelihood": 0.87}, # 1,2-Propanediol
    {"smiles": "CCOCC", "embedding": np.random.rand(2048), "likelihood": 0.78}, # Diethyl ether
    {"smiles": "CC(=O)CCC", "embedding": np.random.rand(2048), "likelihood": 0.82}, # Butanone
    {"smiles": "CCC(=O)OCC", "embedding": np.random.rand(2048), "likelihood": 0.76}, # Ethyl propionate
    {"smiles": "CC(C)C(=O)O", "embedding": np.random.rand(2048), "likelihood": 0.79}, # Isobutyric acid
    {"smiles": "CCCCCC", "embedding": np.random.rand(2048), "likelihood": 0.75}, # Hexane
    {"smiles": "CCC(C)O", "embedding": np.random.rand(2048), "likelihood": 0.84}, # Isopropanol
    {"smiles": "CCCC=O", "embedding": np.random.rand(2048), "likelihood": 0.8}, # Butanal
    {"smiles": "C=C", "embedding": np.random.rand(2048), "likelihood": 0.85}, # Ethene (Ethylene)
    {"smiles": "C=CC", "embedding": np.random.rand(2048), "likelihood": 0.82}, # Propene
    {"smiles": "CC(=O)OC", "embedding": np.random.rand(2048), "likelihood": 0.76}, # Methyl acetate
    {"smiles": "C1=CC=C(C=C1)N", "embedding": np.random.rand(2048), "likelihood": 0.89}, # Aniline
    {"smiles": "CCCCN", "embedding": np.random.rand(2048), "likelihood": 0.81}, # Butylamine
    {"smiles": "CNC(=O)C", "embedding": np.random.rand(2048), "likelihood": 0.83}, # N-Methylacetamide
    {"smiles": "CC(C)C(=O)N", "embedding": np.random.rand(2048), "likelihood": 0.84}, # Isobutyramide
]

# Retrieve similar reactions based on cosine similarity (using only reactant embedding)
def retrieve_similar_reactions(query_embedding, top_k=1):
    embeddings = np.array([entry["embedding"] for entry in database])
    similarities = cosine_similarity([query_embedding], embeddings).flatten()
    indices = similarities.argsort()[-top_k:][::-1]  # Get top K similar items
    return [database[i] for i in indices]

# Fingerprint generator
fp_generator = GetMorganGenerator(radius=2, fpSize=2048)

# Predict function with RAG integration
def predict_reaction_with_rag_openvino(reactant_smiles, product_smiles):
    # Generate molecules from SMILES strings
    reactant_mol = Chem.MolFromSmiles(reactant_smiles)
    product_mol = Chem.MolFromSmiles(product_smiles)
    
    if reactant_mol is None or product_mol is None:
        return "Invalid SMILES string. Please check your input.", None, None, None
    
    # Generate fingerprints
    reactant_fp = np.array(fp_generator.GetFingerprint(reactant_mol))
    product_fp = np.array(fp_generator.GetFingerprint(product_mol))
    combined_fp = np.concatenate([reactant_fp, product_fp])  # Now has shape (4096,)

    # Retrieve similar reactions using only the reactant embedding
    similar_reactions = retrieve_similar_reactions(reactant_fp)
    
    # Use only the combined_fp of 4096 dimensions for the OpenVINO model
    augmented_input = combined_fp.reshape(1, -1).astype(np.float32)
    
    # Run inference with OpenVINO
    result = compiled_model([augmented_input])[0]
    predicted_likelihood = result[0].item()

    
    # Generate molecular images
    reactant_img = Draw.MolToImage(reactant_mol, size=(200, 200))
    product_img = Draw.MolToImage(product_mol, size=(200, 200))
    
    # Display result
    similar_likelihood = similar_reactions[0]["likelihood"] if similar_reactions else 0.0
    return (
        f"Predicted Reaction Likelihood: {predicted_likelihood:.4f} (Augmented with similar likelihood: {similar_likelihood:.4f})",
        reactant_img,
        product_img,
        f"True Likelihood: {predicted_likelihood:.4f}"
    )

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Reaction Prediction with RAG Integration using OpenVINO")

    with gr.Row():
        reactant_smiles = gr.Textbox(label="Reactant SMILES", placeholder="Enter reactant SMILES (e.g., CCO for ethanol)")
        product_smiles = gr.Textbox(label="Product SMILES", placeholder="Enter product SMILES (e.g., CCO for ethanol)")

    predict_button = gr.Button("Predict Reaction Likelihood")

    with gr.Row():
        likelihood_output = gr.Textbox(label="Predicted Reaction Likelihood")
        reactant_image = gr.Image(type="pil", label="Reactant Molecule")
        product_image = gr.Image(type="pil", label="Product Molecule")

    predict_button.click(
        predict_reaction_with_rag_openvino,
        inputs=[reactant_smiles, product_smiles],
        outputs=[likelihood_output, reactant_image, product_image, likelihood_output]
    )

# Launch the Gradio app
demo.launch(share=True)




Running on local URL:  http://127.0.0.1:7871
Running on public URL: https://2140cbadca717dfd05.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/opt/app-root/lib64/python3.9/site-packages/gradio/queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "/opt/app-root/lib64/python3.9/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "/opt/app-root/lib64/python3.9/site-packages/gradio/blocks.py", line 1935, in process_api
    result = await self.call_function(
  File "/opt/app-root/lib64/python3.9/site-packages/gradio/blocks.py", line 1520, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/opt/app-root/lib64/python3.9/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/opt/app-root/lib64/python3.9/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
    return await future
  File "/opt/app-root/lib64/python3.9/site-packages/an