## content in warhead

# Example warhead file for REINVENT4 LinkInvent
#
# One warhead pair per line
# Each warhead must be annotated with '*' to locate the attachment points
# The two warheads must be separated by the pipe symbol

c1ccn2c(n1)cnc2|C(*)NCCCCc1ccc(Oc2ccccc2(*))cc1



Paper 1 - [*]C1C(C#N)CN1|[*]c1ncnc2[nH]
Paper 2- N1=C(C*)C2N(C=C1)C=CC=N2|*CC1=COC=C1

In [38]:
import sys
!{sys.executable} -m reinvent sampling.toml

17:07:44 <INFO> Started REINVENT 4.5.11 (C) AstraZeneca 2017, 2023 on 2025-06-23
17:07:44 <INFO> Command line: C:\Users\andre\anaconda3\envs\reinvent4\lib\site-packages\reinvent\__main__.py sampling.toml
17:07:44 <INFO> Reading run configuration from C:\Users\andre\OneDrive\Desktop\REINVENT4\configs\sampling.toml using format toml
17:07:44 <INFO> User andre on host Andrews-Legion
17:07:44 <INFO> Python version 3.10.18
17:07:44 <INFO> PyTorch version 2.6.0+cu124, git 2236df1770800ffea5697b11b0bb0d910b2e59e1
17:07:44 <INFO> PyTorch compiled with CUDA version 12.4
17:07:44 <INFO> RDKit version 2025.03.3
17:07:44 <INFO> Platform Windows-10-10.0.26100-SP0
17:07:44 <INFO> Number of PyTorch CUDA devices 1
17:07:44 <INFO> Using CUDA device:0 NVIDIA GeForce RTX 4060 Laptop GPU
17:07:44 <INFO> GPU memory: 7099 MiB free, 8187 MiB total
17:07:44 <INFO> Writing JSON config file to C:\Users\andre\OneDrive\Desktop\REINVENT4\configs\_sampling.json
17:07:44 <INFO> Starting Sampling
17:07:45 <INFO> C:\U

In [44]:
import sys
!{sys.executable} -m reinvent my_scoring.toml

17:26:06 <INFO> Started REINVENT 4.5.11 (C) AstraZeneca 2017, 2023 on 2025-06-23
17:26:06 <INFO> Command line: C:\Users\andre\anaconda3\envs\reinvent4\lib\site-packages\reinvent\__main__.py my_scoring.toml
17:26:06 <INFO> Reading run configuration from C:\Users\andre\OneDrive\Desktop\REINVENT4\configs\my_scoring.toml using format toml
17:26:06 <INFO> User andre on host Andrews-Legion
17:26:06 <INFO> Python version 3.10.18
17:26:06 <INFO> PyTorch version 2.6.0+cu124, git 2236df1770800ffea5697b11b0bb0d910b2e59e1
17:26:06 <INFO> PyTorch compiled with CUDA version 12.4
17:26:06 <INFO> RDKit version 2025.03.3
17:26:06 <INFO> Platform Windows-10-10.0.26100-SP0
17:26:06 <INFO> Number of PyTorch CUDA devices 1
17:26:06 <INFO> Using CUDA device:0 NVIDIA GeForce RTX 4060 Laptop GPU
17:26:06 <INFO> GPU memory: 7099 MiB free, 8187 MiB total
17:26:06 <INFO> Writing JSON config file to C:\Users\andre\OneDrive\Desktop\REINVENT4\configs\scoring_simple.json
17:26:06 <INFO> Scoring SMILES from file C:\U

In [3]:
import pandas as pd

# Load dataset
df = pd.read_csv('scoring_results.csv')  # <-- use your actual file name

# Start with a copy
filtered = df.copy()

# Only apply Tanimoto filter if the column exists in the file
if 'Tanimoto' in filtered.columns:
    filtered = filtered[filtered['Tanimoto'] >= 0.6]

# Remove Input_SMILES column if it exists
if 'Input_SMILES' in filtered.columns:
    filtered = filtered.drop(columns=['Input_SMILES'])

# Apply property filters (independent of Tanimoto)
filtered = filtered[
    (filtered['Molecular weight'] <= 550) &
    (filtered['QED'] >= 0.75) &
    (filtered['SA score'] <= 4) &
    (filtered['SlogP (RDKit)'] >= -1) &
    (filtered['SlogP (RDKit)'] <= 5)
]

print(f"✅ Filtered {len(filtered)} molecules out of {len(df)} total.")

# Save output
filtered.to_csv('filtered_scoring_results.csv', index=False)

✅ Filtered 1502 molecules out of 9429 total.


In [4]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
from PIL import Image
import numpy as np

# Load molecules
df = pd.read_csv("filtered_scoring_results.csv")
smiles_list = df["SMILES"].tolist()
mols = [Chem.MolFromSmiles(smi) for smi in smiles_list if Chem.MolFromSmiles(smi)]

# Split into chunks of N molecules per page
def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

pages = list(chunks(mols, 20))  # 20 mols per page

images = []
for chunk in pages:
    img_array = Draw.MolsToGridImage(
        chunk,
        molsPerRow=4,
        subImgSize=(300, 300),
        useSVG=False,
        returnPNG=False  # Important: this returns a numpy array
    )
    # Convert RDKit image (PIL or array) to actual PIL Image
    pil_img = img_array if isinstance(img_array, Image.Image) else Image.fromarray(np.array(img_array))
    images.append(pil_img.convert("RGB"))

# Save to PDF
if images:
    images[0].save("molecule_grid.pdf", save_all=True, append_images=images[1:])
