In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
from io import StringIO
from pathlib import Path

import numpy as np
import pandas as pd
import requests


def load_github_csv(url):
    response = requests.get(url)
    response.raise_for_status()
    df = pd.read_csv(StringIO(response.text))
    return df


df = load_github_csv(
    "https://raw.githubusercontent.com/dfhahn/protein-ligand-benchmark-analysis/master/03_comparison_experiment/03b_all_ligands_all_ffs.csv"
)

In [3]:
df = df[["ligand", "target", "DG_Exp.", "dDG_Exp."]]

# Update nodes

In [4]:
# Update the nodes
lomap_jsons = sorted(Path("../perturbations").glob("*/lomap.json"))
for json_file in lomap_jsons:

    results_dict = json.loads(json_file.read_text())
    target = json_file.parent.name
    print(f"Processing: {target}")
    for key, node in results_dict["nodes"].items():
        subset = df.query("target == @target & ligand == @key")
        assert (
            subset.shape[0] == 1
        ), f"Something went wrong for target {target}, ligand {key}"
        dg = subset["DG_Exp."].values[0]
        dg_err = subset["dDG_Exp."].values[0]
        node.update({"dg_value": dg, "dg_error": dg_err})

    json_file.write_text(json.dumps(results_dict, indent=4))

Processing: bace
Processing: bace_hunt
Processing: bace_p2
Processing: cdk2
Processing: cdk8
Processing: cmet
Processing: eg5
Processing: galectin
Processing: hif2a
Processing: jnk1
Processing: mcl1
Processing: p38
Processing: pde10
Processing: pde2
Processing: pfkfb3
Processing: ptp1b
Processing: shp2
Processing: syk
Processing: thrombin
Processing: tnks2
Processing: tyk2


# Update the edges

In [5]:
# Update the edges
lomap_jsons = sorted(Path("../perturbations").glob("*/lomap.json"))

for json_file in lomap_jsons:

    new_edges = []
    results_dict = json.loads(json_file.read_text())
    target = json_file.parent.name
    print(f"Processing: {target}")
    for edge in results_dict["edges"]:
        _from = edge["from"]
        _to = edge["to"]
        dg = round(
            results_dict["nodes"][_to]["dg_value"]
            - results_dict["nodes"][_from]["dg_value"],
            2,
        )
        # Error propagation as in:
        # https://github.com/openforcefield/protein-ligand-benchmark/blob/main/plbenchmark/edges.py#L51-L53
        dg_err = round(
            np.sqrt(
                np.power(results_dict["nodes"][_to]["dg_error"], 2.0)
                + np.power(results_dict["nodes"][_from]["dg_error"], 2.0)
            ),
            2,
        )
        new_edges.append(
            {
                **{k: v for k, v in edge.items() if k not in ["dg_value", "dg_error"]},
                "ddg_value": dg,
                "ddg_error": dg_err,
            }
        )
    results_dict["edges"] = new_edges
    json_file.write_text(json.dumps(results_dict, indent=4))

Processing: bace
Processing: bace_hunt
Processing: bace_p2
Processing: cdk2
Processing: cdk8
Processing: cmet
Processing: eg5
Processing: galectin
Processing: hif2a
Processing: jnk1
Processing: mcl1
Processing: p38
Processing: pde10
Processing: pde2
Processing: pfkfb3
Processing: ptp1b
Processing: shp2
Processing: syk
Processing: thrombin
Processing: tnks2
Processing: tyk2
