In [1]:
from wp2_functions import drf_features_from_rsmi, drf_features_from_graphs
import pandas as pd
from vis_utils import plot_drf_from_counters_rsmi
import plotly.io as pio
pio.renderers.default = "vscode"

TODOs:

1. Implement transformations Φ for Vertex-, Edge- and Shortest-Path Labels.
2. Implement the DRF transformation function.
3. Implement Weisfeiler-Lehman with applications of Φ such that hashed feature sets are
returned at every iteration.
4. Pre-compute feature hash sets for all datasets

In [2]:
# one rsmi for testing

path = "schneider50k_clean.tsv"
data = pd.read_csv(path, sep="\t")
rsmi = data["clean_rxn"].iloc[20]
print(rsmi)


[NH2:1][CH2:2][c:3]1[cH:33][cH:32][cH:31][c:5]([CH2:6][N:7]([CH2:20][c:21]2[cH:22][cH:23][c:24]([C:27]([F:28])([F:29])[F:30])[cH:25][cH:26]2)[S:8](=[O:9])(=[O:10])[c:11]2[cH:16][c:15]([Cl:17])[cH:14][c:13]([Cl:18])[c:12]2[OH:19])[cH:4]1.[O:44]=[C:43]=[N:42][c:36]1[cH:37][cH:38][c:39]([F:41])[cH:40][c:35]1[F:34]>>[O:44]=[C:43]([NH:1][CH2:2][c:3]1[cH:33][cH:32][cH:31][c:5]([CH2:6][N:7]([CH2:20][c:21]2[cH:26][cH:25][c:24]([C:27]([F:30])([F:28])[F:29])[cH:23][cH:22]2)[S:8](=[O:10])(=[O:9])[c:11]2[cH:16][c:15]([Cl:17])[cH:14][c:13]([Cl:18])[c:12]2[OH:19])[cH:4]1)[NH:42][c:36]1[cH:37][cH:38][c:39]([F:41])[cH:40][c:35]1[F:34]


## 1. Implement transformations Φ for Vertex-, Edge- and Shortest-Path Labels

In [3]:
drf_E = drf_features_from_rsmi(rsmi, mode="edge")
drf_V = drf_features_from_rsmi(rsmi, mode="vertex")
drf_SP = drf_features_from_rsmi(rsmi, mode="sp", include_edge_labels_in_sp=True)

print("DRF edge feature count:", sum(drf_E.values()))
print("DRF vertex feature count:", sum(drf_V.values()))
print("DRF shortest-path feature count:", sum(drf_SP.values()))

# Optional: ein paar Einträge ansehen
print("Sample DRF edge items:", list(drf_E.items())[:10])

DRF edge feature count: 3
DRF vertex feature count: 0
DRF shortest-path feature count: 399
Sample DRF edge items: [('095751d849efbdb232d632cd4a7fc89e', 1), ('e79537351a6bc51d5c4fbea90fec0b26', 2)]


In [4]:
fig_e = plot_drf_from_counters_rsmi(
    rsmi,
    drf_counter=drf_E,
    mode="edge",
    hash_labels=True,              
    digest_size=16,                 
    show_edge_labels=True,
)
fig_e.show(renderer="vscode")

fig_v = plot_drf_from_counters_rsmi(
    rsmi,
    drf_counter=drf_V,
    mode="vertex",
    hash_labels=True,
    digest_size=16,
)
fig_v.show(renderer="vscode")

fig_sp = plot_drf_from_counters_rsmi(
    rsmi,
    drf_counter=drf_SP,
    mode="sp",
    include_edge_labels_in_sp=True, 
    hash_labels=True,
    digest_size=16,
)
fig_sp.show(renderer="vscode")