In [1]:
import os
import pandas as pd
from pathlib import Path
import json
import re
from stereomapper.domain.chemistry.analysis import StereoAnalyser
from stereomapper.domain.chemistry import ChemistryUtils
from stereomapper.domain.chemistry import OpenBabelOperations
import sqlite3
import subprocess

In [9]:
# import csv with control pairings
control_pairs = pd.read_csv('data/enantiomer_control_set.csv')
control_pairs

Unnamed: 0,id1,id2,label
0,CHEBI_43796,CHEBI_30314,Enantiomers
1,CHEBI_30314,CHEBI_43796,Enantiomers
2,CHEBI_15570,CHEBI_16977,Enantiomers
3,CHEBI_16977,CHEBI_15570,Enantiomers
4,CHEBI_32433,CHEBI_32437,Enantiomers
...,...,...,...
2809,CHEBI_235379,CHEBI_235380,Enantiomers
2810,CHEBI_76640,CHEBI_195630,Enantiomers
2811,CHEBI_195630,CHEBI_76640,Enantiomers
2812,CHEBI_235487,CHEBI_76457,Enantiomers


In [16]:
def norm_chebi(x: str) -> str:
    """Return a canonical CHEBI_###### string from inputs like 'chebi:123', 'CHEBI_123', '123'."""
    s = str(x).strip()
    m = re.search(r'(\d+)$', s)  # grab trailing digits
    if not m:
        return s.upper()  # fallback: just uppercase unknowns
    return f"CHEBI_{m.group(1)}"

# 1) Canonicalize + dedupe control set
dfc = control_pairs.copy()
dfc["id1_c"] = dfc["id1"].map(norm_chebi)
dfc["id2_c"] = dfc["id2"].map(norm_chebi)
# order-invariant canonical pair (tuple sorted)
dfc["pair_key"] = dfc.apply(lambda r: tuple(sorted((r["id1_c"], r["id2_c"]))), axis=1)
# keep one row per unordered pair (optionally verify labels agree before dropping)
dfc_dedup = dfc.drop_duplicates(subset=["pair_key"]).reset_index(drop=True)
control_keys = set(dfc_dedup["pair_key"])

dfc_dedup

Unnamed: 0,id1,id2,label,id1_c,id2_c,pair_key
0,CHEBI_43796,CHEBI_30314,Enantiomers,CHEBI_43796,CHEBI_30314,"(CHEBI_30314, CHEBI_43796)"
1,CHEBI_15570,CHEBI_16977,Enantiomers,CHEBI_15570,CHEBI_16977,"(CHEBI_15570, CHEBI_16977)"
2,CHEBI_32433,CHEBI_32437,Enantiomers,CHEBI_32433,CHEBI_32437,"(CHEBI_32433, CHEBI_32437)"
3,CHEBI_32447,CHEBI_32452,Enantiomers,CHEBI_32447,CHEBI_32452,"(CHEBI_32447, CHEBI_32452)"
4,CHEBI_17561,CHEBI_16375,Enantiomers,CHEBI_17561,CHEBI_16375,"(CHEBI_16375, CHEBI_17561)"
...,...,...,...,...,...,...
1402,CHEBI_234521,CHEBI_234520,Enantiomers,CHEBI_234521,CHEBI_234520,"(CHEBI_234520, CHEBI_234521)"
1403,CHEBI_234547,CHEBI_234545,Enantiomers,CHEBI_234547,CHEBI_234545,"(CHEBI_234545, CHEBI_234547)"
1404,CHEBI_235380,CHEBI_235379,Enantiomers,CHEBI_235380,CHEBI_235379,"(CHEBI_235379, CHEBI_235380)"
1405,CHEBI_76640,CHEBI_195630,Enantiomers,CHEBI_76640,CHEBI_195630,"(CHEBI_195630, CHEBI_76640)"


In [None]:
# run stereomapper as a subprocess
enantiomers = Path('benchmarking_data/enantiomer_benchmark_data') # downloadable from Zenodo (DOI: 10.5281/zenodo.17831412)
results = Path('enantiomer_benchmark_results.sqlite') 
cache_path = Path('enantiomer_benchmark_cache.sqlite')

In [11]:
cmd =[
    "stereomapper",
    "run",
    "-d", enantiomers.as_posix(),
    "-o", results.as_posix(),
    "-p", cache_path.as_posix(),
    "--fresh-cache"
]

subprocess.run(cmd) 

INFO    Logging initialised. File: logs/stereomapper_20251210_123713.log
Pipeline: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| , Complete! [00:11<00:00]


âœ… Pipeline completed in 11.2s
ðŸ“¦ Inputs attempted: 2,504 (skipped 0)
ðŸ“Š Successes: 2,502 | Failures: 2
ðŸ”— Inchikey groups â€” processed 931, skipped 0, failed 0
ðŸ§® Relationship rows: 3,618
ðŸ§¾ Unique inchikeys observed: 931
ðŸ’¾ Cache hit rate: 0.0%


Pipeline: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| , Complete! [00:11<00:00]


CompletedProcess(args=['stereomapper', 'run', '-d', '/home/jackmcgoldrick/Downloads/benchmarking_data/enantiomer_benchmark_data', '-o', '/home/jackmcgoldrick/enantiomer_benchmark_results.sqlite', '-p', '/home/jackmcgoldrick/enantiomer_benchmark_cache.sqlite', '--fresh-cache'], returncode=0)

In [14]:
conn = sqlite3.connect(results)
merged_df_q = """ 
SELECT * from relationships;
"""
df_merged = pd.read_sql_query(merged_df_q, conn)
df_merged

Unnamed: 0,cluster_a,cluster_b,cluster_a_members,cluster_b_members,cluster_a_size,cluster_b_size,classification,score,score_details,extra_info,version_tag
0,1,2,"[""chebi:17521""]","[""chebi:36124""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0
1,3,4,"[""chebi:137507""]","[""chebi:137513""]",1,1,Enantiomers,100.0,"{""confidence_bin"":""high""}",,v1.0
2,5,6,"[""chebi:134198""]","[""chebi:134199""]",1,1,Enantiomers,100.0,"{""confidence_bin"":""high""}",,v1.0
3,7,8,"[""chebi:28651""]","[""chebi:27702""]",1,1,Enantiomers,100.0,"{""confidence_bin"":""high""}",,v1.0
4,9,10,"[""chebi:145480""]","[""chebi:145483""]",1,1,Enantiomers,100.0,"{""confidence_bin"":""high""}",,v1.0
...,...,...,...,...,...,...,...,...,...,...,...
3613,2456,2458,"[""chebi:133313""]","[""chebi:133312""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0
3614,2457,2458,"[""chebi:133311""]","[""chebi:133312""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0
3615,2459,2460,"[""chebi:233960""]","[""chebi:233959""]",1,1,Enantiomers,100.0,"{""confidence_bin"":""high""}",,v1.0
3616,2461,2462,"[""chebi:142550""]","[""chebi:9399""]",1,1,Enantiomers,100.0,"{""confidence_bin"":""high""}",,v1.0


In [17]:
expanded_keys = set()
for _, row in df_merged.iterrows():
    if not row["cluster_a_members"] or not row["cluster_b_members"]:
        continue
    m1s = [norm_chebi(x) for x in json.loads(row["cluster_a_members"])]
    m2s = [norm_chebi(x) for x in json.loads(row["cluster_b_members"])]
    for a in m1s:
        for b in m2s:
            if a == b:
                continue
            expanded_keys.add(tuple(sorted((a, b))))

# --- Overlap ---
tp = control_keys & expanded_keys
fp = expanded_keys - control_keys
fn = control_keys - expanded_keys

print(f"True Positives: {len(tp)}")
print(f"False Positives: {len(fp)}")
print(f"False Negatives: {len(fn)}")


True Positives: 1243
False Positives: 2494
False Negatives: 164


Not a true representation of the results, contains 2384 false positives majority of which are off target relationships. Therefore, the dataset needs to be remapped back onto the original pairs stored in the control dataset.

In [18]:
valid_pairs = control_keys

df_pred_filtered = []
for _, row in df_merged.iterrows():
    members_1 = [norm_chebi(x) for x in json.loads(row["cluster_a_members"])]
    members_2 = [norm_chebi(x) for x in json.loads(row["cluster_b_members"])]
    for a in members_1:
        for b in members_2:
            pair = tuple(sorted((a, b)))
            if pair in valid_pairs:
                df_pred_filtered.append({"id1": a, "id2": b, **row.to_dict()})

df_pred_filtered = pd.DataFrame(df_pred_filtered)

In [19]:
df_pred_filtered = df_pred_filtered.copy()
df_pred_filtered["pair_key"] = df_pred_filtered.apply(
    lambda r: tuple(sorted([norm_chebi(r["id1"]), norm_chebi(r["id2"])])), axis=1
)

In [22]:
df_eval = pd.merge(
    df_pred_filtered,
    dfc_dedup[["pair_key", "label"]],
    on="pair_key",
    how="left"
)

Dataframe `df_eval` now contains the original pairs of identifiers with their predicitions from stereomapper. This dataframe will be used to accurately calculate the precision, recall and F1 score on the enantiomer control dataset.

In [24]:
# only rows where we have a prediction - removing FN, ensure to state in results section
df_with_pred = df_eval.dropna(subset=["classification"])

tp = ((df_eval["classification"] == df_eval["label"]).sum())
fp = ((df_eval["classification"] != df_eval["label"]).sum())
fn = df_eval["classification"].isna().sum()

print(f"TP: {tp}, FP: {fp}, FN: {fn}")

TP: 1184, FP: 59, FN: 0


In [25]:
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

print(f"Precision: {precision:.3f}")
print(f"Recall:    {recall:.3f}")
print(f"F1 score:  {f1:.3f}")


Precision: 0.953
Recall:    1.000
F1 score:  0.976


Great results, lets investigate the false positive results to see what went wrong and if the pipeline can be improved.

In [26]:
df_fp = df_with_pred[df_with_pred["classification"] != df_with_pred["label"]]
df_fp

Unnamed: 0,id1,id2,cluster_a,cluster_b,cluster_a_members,cluster_b_members,cluster_a_size,cluster_b_size,classification,score,score_details,extra_info,version_tag,pair_key,label
0,CHEBI_17521,CHEBI_36124,1,2,"[""chebi:17521""]","[""chebi:36124""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_17521, CHEBI_36124)",Enantiomers
117,CHEBI_27374,CHEBI_27372,232,233,"[""chebi:27374""]","[""chebi:27372""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_27372, CHEBI_27374)",Enantiomers
184,CHEBI_140637,CHEBI_136698,364,365,"[""chebi:140637""]","[""chebi:136698""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_136698, CHEBI_140637)",Enantiomers
202,CHEBI_28548,CHEBI_37209,400,401,"[""chebi:28548""]","[""chebi:37209""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_28548, CHEBI_37209)",Enantiomers
203,CHEBI_16002,CHEBI_47537,402,404,"[""chebi:16002""]","[""chebi:47537""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_16002, CHEBI_47537)",Enantiomers
206,CHEBI_21101,CHEBI_21398,406,409,"[""chebi:21101""]","[""chebi:21398""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_21101, CHEBI_21398)",Enantiomers
209,CHEBI_37546,CHEBI_37547,412,416,"[""chebi:37546""]","[""chebi:37547""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_37546, CHEBI_37547)",Enantiomers
210,CHEBI_21100,CHEBI_21397,413,417,"[""chebi:21100""]","[""chebi:21397""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_21100, CHEBI_21397)",Enantiomers
230,CHEBI_17924,CHEBI_28789,456,457,"[""chebi:17924""]","[""chebi:28789""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_17924, CHEBI_28789)",Enantiomers
232,CHEBI_134311,CHEBI_192698,459,461,"[""chebi:134311""]","[""chebi:192698""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_134311, CHEBI_192698)",Enantiomers


In [27]:
df_fp['classification'].value_counts()

classification
Diastereomers              37
Stereo-resolution pairs    16
Unresolved                  5
Unclassified                1
Name: count, dtype: int64

In [28]:
df_fp_dia = df_fp[df_fp['classification'] == "Diastereomers"]
df_fp_dia

Unnamed: 0,id1,id2,cluster_a,cluster_b,cluster_a_members,cluster_b_members,cluster_a_size,cluster_b_size,classification,score,score_details,extra_info,version_tag,pair_key,label
0,CHEBI_17521,CHEBI_36124,1,2,"[""chebi:17521""]","[""chebi:36124""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_17521, CHEBI_36124)",Enantiomers
117,CHEBI_27374,CHEBI_27372,232,233,"[""chebi:27374""]","[""chebi:27372""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_27372, CHEBI_27374)",Enantiomers
184,CHEBI_140637,CHEBI_136698,364,365,"[""chebi:140637""]","[""chebi:136698""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_136698, CHEBI_140637)",Enantiomers
202,CHEBI_28548,CHEBI_37209,400,401,"[""chebi:28548""]","[""chebi:37209""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_28548, CHEBI_37209)",Enantiomers
203,CHEBI_16002,CHEBI_47537,402,404,"[""chebi:16002""]","[""chebi:47537""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_16002, CHEBI_47537)",Enantiomers
206,CHEBI_21101,CHEBI_21398,406,409,"[""chebi:21101""]","[""chebi:21398""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_21101, CHEBI_21398)",Enantiomers
209,CHEBI_37546,CHEBI_37547,412,416,"[""chebi:37546""]","[""chebi:37547""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_37546, CHEBI_37547)",Enantiomers
210,CHEBI_21100,CHEBI_21397,413,417,"[""chebi:21100""]","[""chebi:21397""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_21100, CHEBI_21397)",Enantiomers
230,CHEBI_17924,CHEBI_28789,456,457,"[""chebi:17924""]","[""chebi:28789""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_17924, CHEBI_28789)",Enantiomers
232,CHEBI_134311,CHEBI_192698,459,461,"[""chebi:134311""]","[""chebi:192698""]",1,1,Diastereomers,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_134311, CHEBI_192698)",Enantiomers


Lets investigate these cases manually to determine what went wrong.

Upon manual review, it has been determined that 23 out of the 37 pairs are deemed to be actual correct predictions by the stereomapper pipeline. An the 23 cases, each has been identified to be a mistake in assignment by ChEBI. Most of these relationships are indeed diastereomers, whilst others contain missing stereochemistry, which is picked up by the stereomapper pipeline. In one case, two structures are indeed enantiomers, but the two structures actually share different formal charges meaning they are not deemed enantiomers by the pipeline.

In reality, when accounting for these disagreements, stereomapper has the actual precision, recall and F1 score accounted for below:

In [30]:
df_fp_parent_child = df_fp[df_fp['classification'] == "Stereo-resolution pairs"]
df_fp_parent_child

Unnamed: 0,id1,id2,cluster_a,cluster_b,cluster_a_members,cluster_b_members,cluster_a_size,cluster_b_size,classification,score,score_details,extra_info,version_tag,pair_key,label
275,CHEBI_37477,CHEBI_37476,547,549,"[""chebi:37477""]","[""chebi:37476""]",1,1,Stereo-resolution pairs,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_37476, CHEBI_37477)",Enantiomers
312,CHEBI_38969,CHEBI_3332,621,622,"[""chebi:38969""]","[""chebi:3332""]",1,1,Stereo-resolution pairs,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_3332, CHEBI_38969)",Enantiomers
387,CHEBI_90389,CHEBI_90391,771,772,"[""chebi:90389""]","[""chebi:90391""]",1,1,Stereo-resolution pairs,75.0,"{""confidence_bin"":""medium""}",,v1.0,"(CHEBI_90389, CHEBI_90391)",Enantiomers
388,CHEBI_90394,CHEBI_90395,773,774,"[""chebi:90394""]","[""chebi:90395""]",1,1,Stereo-resolution pairs,75.0,"{""confidence_bin"":""medium""}",,v1.0,"(CHEBI_90394, CHEBI_90395)",Enantiomers
402,CHEBI_189872,CHEBI_189871,801,802,"[""chebi:189872""]","[""chebi:189871""]",1,1,Stereo-resolution pairs,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_189871, CHEBI_189872)",Enantiomers
406,CHEBI_17426,CHEBI_137932,809,810,"[""chebi:17426""]","[""chebi:137932""]",1,1,Stereo-resolution pairs,100.0,"{""confidence_bin"":""high""}",,v1.0,"(CHEBI_137932, CHEBI_17426)",Enantiomers
414,CHEBI_90386,CHEBI_90387,826,827,"[""chebi:90386""]","[""chebi:90387""]",1,1,Stereo-resolution pairs,75.0,"{""confidence_bin"":""medium""}",,v1.0,"(CHEBI_90386, CHEBI_90387)",Enantiomers
467,CHEBI_39336,CHEBI_39335,931,932,"[""chebi:39336""]","[""chebi:39335""]",1,1,Stereo-resolution pairs,76.0,"{""confidence_bin"":""medium""}",,v1.0,"(CHEBI_39335, CHEBI_39336)",Enantiomers
689,CHEBI_63698,CHEBI_63695,1370,1373,"[""chebi:63698""]","[""chebi:63695""]",1,1,Stereo-resolution pairs,77.0,"{""confidence_bin"":""medium""}",,v1.0,"(CHEBI_63695, CHEBI_63698)",Enantiomers
690,CHEBI_63705,CHEBI_63700,1371,1372,"[""chebi:63705""]","[""chebi:63700""]",1,1,Stereo-resolution pairs,77.0,"{""confidence_bin"":""medium""}",,v1.0,"(CHEBI_63700, CHEBI_63705)",Enantiomers


All are incorrect predictions by StereoMapper.

In [31]:
df_fp_unresolved = df_fp[df_fp['classification'] == "Unresolved"]
df_fp_unresolved

Unnamed: 0,id1,id2,cluster_a,cluster_b,cluster_a_members,cluster_b_members,cluster_a_size,cluster_b_size,classification,score,score_details,extra_info,version_tag,pair_key,label
252,CHEBI_37465,CHEBI_15386,501,503,"[""chebi:37465""]","[""chebi:15386""]",1,1,Unresolved,100.0,"{""confidence_bin"":""high""}",Possible pipeline error - should be no identic...,v1.0,"(CHEBI_15386, CHEBI_37465)",Enantiomers
786,CHEBI_38139,CHEBI_44343,1564,1566,"[""chebi:38139""]","[""chebi:44343""]",1,1,Unresolved,100.0,"{""confidence_bin"":""high""}",Possible pipeline error - should be no identic...,v1.0,"(CHEBI_38139, CHEBI_44343)",Enantiomers
895,CHEBI_47008,CHEBI_47011,1778,1780,"[""chebi:47008""]","[""chebi:47011""]",1,1,Unresolved,100.0,"{""confidence_bin"":""high""}",Possible pipeline error - should be no identic...,v1.0,"(CHEBI_47008, CHEBI_47011)",Enantiomers
1119,CHEBI_83132,CHEBI_83130,2218,2219,"[""chebi:83132""]","[""chebi:83130""]",1,1,Unresolved,75.0,"{""confidence_bin"":""medium""}",Possible pipeline error - should be no identic...,v1.0,"(CHEBI_83130, CHEBI_83132)",Enantiomers
1120,CHEBI_83131,CHEBI_83129,2220,2221,"[""chebi:83131""]","[""chebi:83129""]",1,1,Unresolved,75.0,"{""confidence_bin"":""medium""}",Possible pipeline error - should be no identic...,v1.0,"(CHEBI_83129, CHEBI_83131)",Enantiomers


all incorrect by StereoMapper. the single unclassified case is a complex case where stereochemistry differs, but so does protonation states.

In [32]:
# 23 for corrected FP in diastereomers, 1 for corrected FP in unclassified
tp_act = tp + 24
fp_act = fp - 24

actual_precision = tp_act / (tp_act + fp_act) if (tp_act + fp_act) > 0 else 0
actual_recall = tp_act / (tp_act + fn) if (tp_act + fn) > 0 else 0
actual_f1 = 2 * actual_precision * actual_recall / (actual_precision + actual_recall) if (actual_precision + actual_recall) > 0 else 0

print(f"Adjusted Precision: {actual_precision:.3f}")
print(f"Adjusted Recall:    {actual_recall:.3f}")
print(f"Adjusted F1 score:  {actual_f1:.3f}")

Adjusted Precision: 0.972
Adjusted Recall:    1.000
Adjusted F1 score:  0.986
