In [10]:
!pip install qdrant-client



In [11]:
import csv
import random
import torch
from rdkit import Chem
from rdkit.Chem.rdFingerprintGenerator import GetMorganGenerator
from rdkit.Chem import Draw
from PIL import Image
import numpy as np
import openvino.runtime as ov
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
import gradio as gr

# --- Constants ---
MODEL_PATH = "production_prediction_openvino1.xml"  # Replace with your actual model path
VECTOR_DB_COLLECTION = "reactions"
FP_SIZE = 4096  # Set to 4096 to match model expectations
FP_RADIUS = 3
BATCH_SIZE = 100

# --- Utility Functions ---

# Generate synthetic SMILES for testing
def generate_random_smiles(max_atoms=10):
    mol = Chem.MolFromSmiles("C")
    for _ in range(random.randint(1, max_atoms - 1)):
        try:
            mol = Chem.CombineMols(mol, Chem.MolFromSmiles(random.choice(["C", "O", "N", "Cl", "Br", "F"])))
        except:
            pass
    return Chem.MolToSmiles(mol)

# Generate random reaction data and save to CSV
def generate_synthetic_reactions(filepath="synthetic_reactions.csv", num_reactions=1000):
    reactions = []
    for i in range(num_reactions):
        reactant_smiles = generate_random_smiles()
        product_smiles = generate_random_smiles()
        likelihood = round(random.uniform(0.5, 1.0), 2)
        reactions.append({"id": i, "reactant_smiles": reactant_smiles, "product_smiles": product_smiles, "likelihood": likelihood})
    
    with open(filepath, "w", newline="") as csvfile:
        fieldnames = ["id", "reactant_smiles", "product_smiles", "likelihood"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(reactions)

# Load reaction data from CSV
def load_reaction_data(filepath="synthetic_reactions.csv"):
    reactions = []
    with open(filepath, "r") as csvfile:
        reader = csv.DictReader(csvfile)
        for i, row in enumerate(reader):
            row["id"] = i
            reactions.append(row)
    return reactions

# --- Fingerprint Generation ---
fp_generator = GetMorganGenerator(radius=FP_RADIUS, fpSize=FP_SIZE)

def generate_fingerprint(mol):
    """Generates a Morgan fingerprint for the molecule as a numpy array."""
    mol_fp = fp_generator.GetFingerprint(mol)
    return torch.tensor(mol_fp, dtype=torch.float32).numpy()

# --- Qdrant Database Setup ---
client = QdrantClient(":memory:")
if not client.collection_exists(collection_name=VECTOR_DB_COLLECTION):
    client.create_collection(collection_name=VECTOR_DB_COLLECTION, vectors_config=VectorParams(size=FP_SIZE, distance=Distance.COSINE))

def populate_vector_db_in_batches(reactions, client, batch_size=BATCH_SIZE):
    points = []
    for i, reaction in enumerate(reactions):
        if i % batch_size == 0 and points:
            client.upsert(collection_name=VECTOR_DB_COLLECTION, points=points)
            points = []
        reactant_mol = Chem.MolFromSmiles(reaction["reactant_smiles"])
        reactant_fp = generate_fingerprint(reactant_mol)
        points.append(PointStruct(id=i, vector=reactant_fp.tolist(), payload=reaction))
    if points:
        client.upsert(collection_name=VECTOR_DB_COLLECTION, points=points)

# --- Retrieve Similar Reactions ---
def retrieve_similar_reactions(query_embedding, client, top_k=3):
    search_result = client.search(
        collection_name=VECTOR_DB_COLLECTION,
        query_vector=query_embedding.tolist(),
        limit=top_k,
    )
    return [hit.payload for hit in search_result]

# --- OpenVINO Model Loading ---
core = ov.Core()
compiled_model = core.compile_model(core.read_model(MODEL_PATH), "CPU")

# --- Prediction with RAG and OpenVINO ---
def predict_reaction_with_rag_openvino(reactant_smiles, product_smiles):
    reactant_mol = Chem.MolFromSmiles(reactant_smiles)
    product_mol = Chem.MolFromSmiles(product_smiles)
    
    if reactant_mol is None or product_mol is None:
        return "Invalid SMILES string. Please check your input.", None, None, None

    reactant_fp = generate_fingerprint(reactant_mol)
    product_fp = generate_fingerprint(product_mol)
    combined_fp = np.concatenate([reactant_fp, product_fp]).astype(np.float32).reshape(1, -1)

    # Retrieve similar reactions from the vector database
    similar_reactions = retrieve_similar_reactions(reactant_fp, client)

    # Run inference with OpenVINO
    result = compiled_model([combined_fp])[0]
    predicted_likelihood = result[0].item()

    # Generate images
    reactant_img = Draw.MolToImage(reactant_mol, size=(200, 200))
    product_img = Draw.MolToImage(product_mol, size=(200, 200))

    explanation = (
        f"**The predicted reaction likelihood of {predicted_likelihood:.4f} indicates the model's estimation of reaction feasibility.**\n"
        "Higher likelihood suggests feasibility, while lower values indicate possible constraints.\n\n"
        "This helps in assessing practical viability of proposed reactions."
    )

    similar_reactions_str = "\n**Similar Reactions from Database**:\n"
    for reaction in similar_reactions:
        similar_reactions_str += (
            f" - Reactant: {reaction['reactant_smiles']} | Product: {reaction['product_smiles']}\n"
            f"   Estimated Likelihood: {reaction.get('likelihood', 'N/A')}\n"
        )

    return (
        f"Predicted Reaction Likelihood: {predicted_likelihood:.4f}\n\n"
        f"{explanation}\n\n"
        f"{similar_reactions_str}",
        reactant_img,
        product_img,
        f"True Likelihood: {predicted_likelihood:.4f}"
    )

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Reaction Prediction with RAG Integration using OpenVINO")
    gr.Markdown(
        "**About**: This tool predicts reaction likelihood and shows similar reactions for comparison.\n"
        "Likelihood reflects feasibility, while similar reactions give additional insights."
    )

    with gr.Row():
        reactant_smiles = gr.Textbox(label="Reactant SMILES", placeholder="Enter reactant SMILES (e.g., CCO for ethanol)")
        product_smiles = gr.Textbox(label="Product SMILES", placeholder="Enter product SMILES (e.g., CCO for ethanol)")

    predict_button = gr.Button("Predict Reaction Likelihood")

    with gr.Row():
        likelihood_output = gr.Textbox(label="Predicted Reaction Likelihood")
        reactant_image = gr.Image(type="pil", label="Reactant Molecule")
        product_image = gr.Image(type="pil", label="Product Molecule")

    predict_button.click(
        predict_reaction_with_rag_openvino,
        inputs=[reactant_smiles, product_smiles],
        outputs=[likelihood_output, reactant_image, product_image, likelihood_output]
    )

# Launch the app
demo.launch(share=True)


Running on local URL:  http://127.0.0.1:7862
Running on public URL: https://34e24013455c62acab.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


