In [1]:
import io
import os
import rdkit
import ipywidgets as widgets
import numpy as np
import pandas as pd

In [2]:
def rdimage(mol: rdkit.Chem.Mol) -> bytes:
    """Returns an image of a molecule as bytes."""
    pil_image = rdkit.Chem.Draw.MolToImage(mol)
    buf = io.BytesIO()
    pil_image.save(buf, format='png')
    return buf.getvalue()

In [15]:
filename = "drd2-subset300mols.csv"
# Can also use FileUpload widget: 
# https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html#File-Upload
df = pd.read_csv(filename)

In [16]:
df["mol"] = df["canonical"].apply(rdkit.Chem.MolFromSmiles)

In [17]:
df["mol_widget"] = df["mol"].apply(
    lambda mol: widgets.Image(
        value=rdimage(mol),
        format="PNG",
        width=150,
        height=150,
    )
)

In [18]:
df["button_widget"] = df["canonical"].apply(
    lambda smi: widgets.ToggleButton(
        value=False, 
        description=smi,
        layout=widgets.Layout(height="auto", width="auto")
    )
)

In [19]:
df["duo_widget"] = df.apply(
    axis=1, 
    func=lambda row: widgets.VBox(
        [row.mol_widget, row.button_widget],
        layout=widgets.Layout(margin="5px 5px 40px 5px")  # top, right, bottom, left
    )
)

In [20]:
N_ROWS = 10
N_COLS = 4
grid = widgets.GridspecLayout(N_ROWS, N_COLS)
df["is_visible"] = False  # Anything beyond N_ROWS x N_COLS will not be visible.
for row in range(N_ROWS):
    for col in range(N_COLS):
        i = np.ravel_multi_index([row, col], [N_ROWS, N_COLS])
        if i < len(df):
            grid[row, col] = df["duo_widget"][i]
            df.loc[i, "is_visible"] = True

grid

GridspecLayout(children=(VBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01,\x00\x00…

In [21]:
df["is_selected"] = df["button_widget"].map(lambda row: row.value)

auxiliary_ui_columns = ["mol", "mol_widget", "button_widget", "duo_widget"]
good_columns = df.columns.difference(auxiliary_ui_columns)
df2 = df[good_columns]

df2

Unnamed: 0,activity,canonical,is_selected,is_visible,molwt,molwt_gt_330
0,0,CCCC=N[SH](=O)(O)C=Cc1ccccc1,True,True,239.340,False
1,0,O=C(Nc1ccc2c(c1)OCO2)c1cnn2cccnc12,True,True,282.259,False
2,0,N#Cc1c(N)n(CCNC(=O)c2cccs2)c2nc3ccccc3nc12,True,True,362.418,True
3,0,CC(=O)c1cccc(NC(=O)Cn2cnc3c2c(=O)n(C)c(=O)n3C)c1,False,True,355.354,True
4,0,CCOC(=O)c1oc2ccccc2c1NC(=O)Cc1cccs1,False,True,329.377,False
...,...,...,...,...,...,...
295,0,O=C(O)c1cccc([N+](=O)[O-])c1C(=O)Nc1nc2ccccc2s1,False,False,343.320,True
296,0,CN(C)S(=O)(=O)c1cccc(NC(=O)COC(=O)COc2ccc(Br)c...,False,False,471.329,True
297,0,CCOc1ccc(NC(=O)CCC(=O)c2ccc(Br)cc2)cc1,False,False,376.250,True
298,0,CC1CCN(S(=O)(=O)c2ccc3c(c2)c(=O)c(C(=O)NCc2ccc...,False,False,459.593,True


In [160]:
def out_filename(in_filename):
    name, ext = os.path.splitext(in_filename)
    return f"{name}-with-user-selection{ext}"

df2.to_csv(out_filename(filename), index=False)