In [1]:
!pip install qdrant-client

Collecting qdrant-client
  Downloading qdrant_client-1.12.1-py3-none-any.whl.metadata (10 kB)
Collecting grpcio-tools>=1.41.0 (from qdrant-client)
  Downloading grpcio_tools-1.67.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.3 kB)
Collecting portalocker<3.0.0,>=2.7.0 (from qdrant-client)
  Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)
Collecting protobuf<6.0dev,>=5.26.1 (from grpcio-tools>=1.41.0->qdrant-client)
  Downloading protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Collecting grpcio>=1.41.0 (from qdrant-client)
  Downloading grpcio-1.67.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Collecting h2<5,>=3 (from httpx[http2]>=0.20.0->qdrant-client)
  Downloading h2-4.1.0-py3-none-any.whl.metadata (3.6 kB)
Collecting hyperframe<7,>=6.0 (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant-client)
  Downloading hyperframe-6.0.1-py3-none-any.whl.metadata (2.7 kB)
Collecting hpack<5,>=4.0 (from h2

In [1]:
import csv
import random
from rdkit import Chem

# Function to generate a random SMILES string
def generate_random_smiles(max_atoms=10):
    """Generates a random SMILES string with a maximum number of atoms."""
    mol = Chem.MolFromSmiles("C")  # Start with a simple molecule
    for _ in range(random.randint(1, max_atoms - 1)):
        try:
            mol = Chem.CombineMols(mol, Chem.MolFromSmiles(random.choice(["C", "O", "N", "Cl", "Br", "F"])))
        except:
            pass  # Ignore errors in combining molecules
    return Chem.MolToSmiles(mol)

# Generate synthetic reaction data
num_reactions = 1000  # Number of reactions to generate
reactions = []
for i in range(num_reactions):import csv
import random
from rdkit import Chem

# Function to generate a random SMILES string
def generate_random_smiles(max_atoms=10):
    """Generates a random SMILES string with a maximum number of atoms."""
    mol = Chem.MolFromSmiles("C")  # Start with a simple molecule
    for _ in range(random.randint(1, max_atoms - 1)):
        try:
            mol = Chem.CombineMols(mol, Chem.MolFromSmiles(random.choice(["C", "O", "N", "Cl", "Br", "F"])))
        except:
            pass  # Ignore errors in combining molecules
    return Chem.MolToSmiles(mol)

# Generate synthetic reaction data
num_reactions = 1000  # Number of reactions to generate
reactions = []
for i in range(num_reactions):
    reactant_smiles = generate_random_smiles()
    product_smiles = generate_random_smiles()
    likelihood = round(random.uniform(0.5, 1.0), 2)  # Random likelihood between 0.5 and 1.0
    reactions.append({"id": i, "reactant_smiles": reactant_smiles, "product_smiles": product_smiles, "likelihood": likelihood})

# Save data to CSV file
with open("synthetic_reactions.csv", "w", newline="") as csvfile:
    fieldnames = ["id", "reactant_smiles", "product_smiles", "likelihood"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(reactions)
    reactant_smiles = generate_random_smiles()
    product_smiles = generate_random_smiles()
    likelihood = round(random.uniform(0.5, 1.0), 2)  # Random likelihood between 0.5 and 1.0
    reactions.append({"id": i, "reactant_smiles": reactant_smiles, "product_smiles": product_smiles, "likelihood": likelihood})

# Save data to CSV file
with open("synthetic_reactions.csv", "w", newline="") as csvfile:
    fieldnames = ["id", "reactant_smiles", "product_smiles", "likelihood"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(reactions)

In [2]:
import csv
import random
from PIL import Image
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.rdFingerprintGenerator import GetMorganGenerator
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import numpy as np
import openvino.runtime as ov

# --- Vector Database Setup ---

client = QdrantClient(":memory:")  # Use an in-memory Qdrant instance for this example

# Check if the collection exists and create it if it doesn't
if not client.collection_exists(collection_name="reactions"):
    client.create_collection(
        collection_name="reactions",
        vectors_config=VectorParams(size=4096, distance=Distance.COSINE),
    )

# --- Load OpenVINO Model ---

core = ov.Core()
model_path = "production_prediction_openvino1.xml"  # Replace with your actual model path
compiled_model = core.compile_model(core.read_model(model_path), "CPU")

# ---  Load and Store Reaction Data ---

def load_reaction_data(filepath="synthetic_reactions.csv"):
    """Loads reaction data from a file and returns a list of dictionaries."""
    reactions = []
    with open(filepath, "r") as csvfile:
        reader = csv.DictReader(csvfile)
        for i, row in enumerate(reader):
            if "id" not in row:
                row["id"] = i  # Assign the row index as ID
            reactions.append(row)
    return reactions

reactions = load_reaction_data()  # Load your reaction data

# --- Fingerprint Generator ---

fp_generator = GetMorganGenerator(radius=2, fpSize=4096)

# --- Populate Vector Database ---

def populate_vector_db(reactions, client, fp_generator):
    points = []  # Collect points for batch upsert

    for reaction in reactions:
        reactant_mol = Chem.MolFromSmiles(reaction["reactant_smiles"])
        if reactant_mol:
            reactant_fp = np.array(fp_generator.GetFingerprint(reactant_mol), dtype=np.float64)
            reaction_id = int(reaction["id"])

            point = PointStruct(
                id=reaction_id,
                vector=reactant_fp.tolist(),
                payload=reaction
            )
            points.append(point)

    client.upsert(collection_name="reactions", points=points)

populate_vector_db(reactions, client, fp_generator)

# --- Retrieve Similar Reactions ---

def retrieve_similar_reactions(query_embedding, client, top_k=3):
    query_embedding = query_embedding.astype(np.float64)

    search_result = client.search(
        collection_name="reactions",
        query_vector=query_embedding.tolist(),
        limit=top_k,
    )
    return [hit.payload for hit in search_result]

# --- Predict Function with RAG ---

def predict_reaction_with_rag_openvino(reactant_smiles, product_smiles):
    reactant_mol = Chem.MolFromSmiles(reactant_smiles)
    product_mol = Chem.MolFromSmiles(product_smiles)

    if reactant_mol is None or product_mol is None:
        return "Invalid SMILES string. Please check your input.", None, None, None

    reactant_fp = np.array(fp_generator.GetFingerprint(reactant_mol), dtype=np.float64)
    product_fp = np.array(fp_generator.GetFingerprint(product_mol), dtype=np.float64)
    combined_fp = np.concatenate([reactant_fp, product_fp])

    # Retrieve similar reactions from the vector database
    similar_reactions = retrieve_similar_reactions(reactant_fp, client)

    # Prepare input for OpenVINO
    input_data = combined_fp.reshape(1, -1).astype(np.float32)

    # Run inference with OpenVINO
    result = compiled_model([input_data])[0]
    predicted_likelihood = result[0].item()

    # Generate images
    reactant_img = Draw.MolToImage(reactant_mol, size=(200, 200))
    product_img = Draw.MolToImage(product_mol, size=(200, 200))

    # Highlighting the importance of predicted likelihood
    explanation = (
        f"**The predicted reaction likelihood of {predicted_likelihood:.4f} indicates the model's estimation of how probable the reaction is based on historical and chemical data.**\n"
        "A higher likelihood means the reaction is more feasible or commonly observed, while a lower likelihood suggests it may be less typical or have constraints.\n\n"
        "This helps in assessing the practical viability of proposed chemical reactions."
    )

    # Explain similar reactions to the user
    similar_reactions_str = "\n**Similar Reactions from the Database**:\n"
    for reaction in similar_reactions:
        similar_likelihood = reaction.get("likelihood", "N/A")
        similar_reactions_str += (
            f" - Reactant: {reaction['reactant_smiles']} | Product: {reaction['product_smiles']}\n"
            f"   Estimated Likelihood: {similar_likelihood} (Provides context on reaction feasibility based on past data)\n"
        )

    return (
        f"Predicted Reaction Likelihood: {predicted_likelihood:.4f}\n\n"
        f"{explanation}\n\n"
        f"{similar_reactions_str}",
        reactant_img,
        product_img,
        f"True Likelihood: {predicted_likelihood:.4f}"
    )

# --- Gradio Interface ---

with gr.Blocks() as demo:
    gr.Markdown("# Reaction Prediction with RAG Integration using OpenVINO")
    gr.Markdown(
        """**About**: This tool predicts the likelihood of a reaction occurring and provides similar reactions for comparison.\n\n"
        "The predicted likelihood helps understand the feasibility of the input reaction, while similar reactions show examples from the database to give additional insights."
        """
    )

    with gr.Row():
        reactant_smiles = gr.Textbox(
            label="Reactant SMILES",
            placeholder="Enter reactant SMILES (e.g., CCO for ethanol)")
        product_smiles = gr.Textbox(
            label="Product SMILES",
            placeholder="Enter product SMILES (e.g., CCO for ethanol)")

    predict_button = gr.Button("Predict Reaction Likelihood")

    with gr.Row():
        likelihood_output = gr.Textbox(
            label="Predicted Reaction Likelihood",
            placeholder="This shows how likely the reaction is to occur.")
        reactant_image = gr.Image(type="pil", label="Reactant Molecule")
        product_image = gr.Image(type="pil", label="Product Molecule")

    predict_button.click(
        predict_reaction_with_rag_openvino,
        inputs=[reactant_smiles, product_smiles],
        outputs=[
            likelihood_output, reactant_image, product_image,
            likelihood_output
        ])

# Launch the app
demo.launch(share=True)




Running on local URL:  http://127.0.0.1:7864
Running on public URL: https://79f45cab00ae022429.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


