In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from cinnabar.plotting import _master_plot
from cinnabar import stats

In [None]:
# load the basic edge data
cumulative_data = pd.read_csv("https://raw.githubusercontent.com/OpenFreeEnergy/IndustryBenchmarks2024/refs/heads/main/industry_benchmarks/analysis/processed_results/combined_pymbar3_cumulative_data.csv")
# load our edge data with exp values
normal_edge_data = pd.read_csv("https://raw.githubusercontent.com/OpenFreeEnergy/IndustryBenchmarks2024/refs/heads/main/industry_benchmarks/analysis/processed_results/combined_pymbar3_edge_data.csv")

In [None]:
# load the rerun data
rerun_cumulative_data = pd.read_csv("https://raw.githubusercontent.com/OpenFreeEnergy/IndustryBenchmarks2024/refs/heads/main/industry_benchmarks/analysis/processed_results/reruns/rerun_pymbar3_cumulative_data.csv")
# load the rerun edge data with exp values
rerun_edge_data = pd.read_csv("https://raw.githubusercontent.com/OpenFreeEnergy/IndustryBenchmarks2024/refs/heads/main/industry_benchmarks/analysis/processed_results/reruns/rerun_pymbar3_edge_data.csv")

In [None]:
# drop the pfkfb3 data from the default and add the rerun data
cumulative_data = cumulative_data[cumulative_data["system name"] != "pfkfb3"]
cumulative_data = pd.concat([cumulative_data, rerun_cumulative_data], ignore_index=True)
# drop the pfkfb3 data from the default and add the rerun data
normal_edge_data = normal_edge_data[(normal_edge_data["system name"] != "pfkfb3")]
normal_edge_data = pd.concat([normal_edge_data, rerun_edge_data], ignore_index=True)
normal_edge_data

In [None]:
# load the private edge and cumulative data
private_cumulative_data = pd.read_csv("https://raw.githubusercontent.com/OpenFreeEnergy/IndustryBenchmarks2024/refs/heads/main/industry_benchmarks/analysis/private_processed_results/combined_pymbar3_cumulative_data.csv")
private_edge_data = pd.read_csv("https://raw.githubusercontent.com/OpenFreeEnergy/IndustryBenchmarks2024/refs/heads/main/industry_benchmarks/analysis/private_processed_results/combined_pymbar3_edge_data.csv")
private_edge_data = private_edge_data[private_edge_data["failed"] != True]
private_edge_data

In [None]:
# load the dg data
public_dg_data = pd.read_csv("https://raw.githubusercontent.com/OpenFreeEnergy/IndustryBenchmarks2024/refs/heads/main/industry_benchmarks/analysis/processed_results/combined_pymbar3_calculated_dg_data.csv")
private_dg_data = pd.read_csv("https://raw.githubusercontent.com/OpenFreeEnergy/IndustryBenchmarks2024/refs/heads/main/industry_benchmarks/analysis/private_processed_results/combined_pymbar3_calculated_dg_data.csv")

In [None]:
# calculate the mean ddg uncertainty for the public and private data 
public_complex_data = normal_edge_data[[f"complex_repeat_{i}_DG (kcal/mol)" for i in range(3)]]
public_solvent_data = normal_edge_data[[f"solvent_repeat_{i}_DG (kcal/mol)" for i in range(3)]]
private_complex_data = private_edge_data[[f"complex_repeat_{i}_DG (kcal/mol)" for i in range(3)]]
private_solvent_data = private_edge_data[[f"solvent_repeat_{i}_DG (kcal/mol)" for i in range(3)]]
public_uncertainty = (public_complex_data.std(axis=1) ** 2 + public_solvent_data.std(axis=1) **2) ** 0.5
print(public_uncertainty.mean())
private_uncertainty = (private_complex_data.std(axis=1) ** 2 + private_solvent_data.std(axis=1) **2) ** 0.5
print(private_uncertainty.mean())



In [None]:
# the cumulative data has each phase and repeat on a new line
# create a new csv with the DDG prediction for the edge after each ns + the error
per_ns_ddgs_neutral, per_ns_ddgs_charged = [], []
for _, row in normal_edge_data.iterrows():
    if row["alchemical_charge_difference"] == 0:
        total_ns = 5
    else:
        total_ns = 20
    cumulative_data_row = {
        "system group": row["system group"],
        "system name": row["system name"],
        "ligand_A": row["ligand_A"],
        "ligand_B": row["ligand_B"],
        "exp DDG (kcal/mol)": row["exp DDG (kcal/mol)"],
        "exp dDDG (kcal/mol)": row["exp dDDG (kcal/mol)"],
        "alchemical_charge_difference": row["alchemical_charge_difference"]
    }
    # workout the estimate of DDG for each ns
    temp_cumulative_data = cumulative_data[(cumulative_data["system group"] == row["system group"]) & (cumulative_data["system name"] == row["system name"])].copy(deep=True).reset_index(drop=True)
    complex_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "complex")]
    solvent_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "solvent")]

    for i in range(1, total_ns + 1):
        per_ns_complex = complex_data[f"Samples {i}ns (subsample) DG"].mean()
        per_ns_complex_error = complex_data[f"Samples {i}ns (subsample) DG"].std()
        per_ns_solvent = solvent_data[f"Samples {i}ns (subsample) DG"].mean()
        per_ns_solvent_error = solvent_data[f"Samples {i}ns (subsample) DG"].std()
        cumulative_data_row[f"Samples {i} ns DDG"] = per_ns_complex - per_ns_solvent
        cumulative_data_row[f"Samples {i} ns dDDG"] = (per_ns_complex_error ** 2 + per_ns_solvent_error ** 2) ** 0.5
    # workout where to store the row
    if total_ns == 5:
        per_ns_ddgs_neutral.append(cumulative_data_row)
    else:
        per_ns_ddgs_charged.append(cumulative_data_row)
    


In [None]:
# same again for the private data
per_ns_ddgs_neutral_private, per_ns_ddgs_charged_private = [], []
for _, row in private_edge_data.iterrows():
    # skip the intermediates
    if pd.isna(row["exp DDG (kcal/mol)"]):
        continue
    if row["alchemical_charge_difference"] == 0:
        total_ns = 5
    else:
        total_ns = 20
    cumulative_data_row = {
        "partner": row["partner_id"],
        "system name": row["dataset_name"],
        "ligand_A": row["ligand_A"],
        "ligand_B": row["ligand_B"],
        "exp DDG (kcal/mol)": row["exp DDG (kcal/mol)"],
        "exp dDDG (kcal/mol)": row["exp dDDG (kcal/mol)"],
        "alchemical_charge_difference": row["alchemical_charge_difference"]
    }
    # workout the estimate of DDG for each ns
    temp_cumulative_data = private_cumulative_data[(private_cumulative_data["partner_id"] == row["partner_id"]) & (private_cumulative_data["dataset_name"] == row["dataset_name"])].copy(deep=True).reset_index(drop=True)
    complex_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "complex")]
    solvent_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "solvent")]

    for i in range(1, total_ns + 1):
        # print(complex_data[f"Samples {i}ns (subsample) DG"])
        per_ns_complex = complex_data[f"Samples {i}ns (subsample) DG"].mean()
        per_ns_complex_error = complex_data[f"Samples {i}ns (subsample) DG"].std()
        per_ns_solvent = solvent_data[f"Samples {i}ns (subsample) DG"].mean()
        per_ns_solvent_error = solvent_data[f"Samples {i}ns (subsample) DG"].std()
        cumulative_data_row[f"Samples {i} ns DDG"] = per_ns_complex - per_ns_solvent
        cumulative_data_row[f"Samples {i} ns dDDG"] = (per_ns_complex_error ** 2 + per_ns_solvent_error ** 2) ** 0.5
    # workout where to store the row
    if total_ns == 5:
        per_ns_ddgs_neutral_private.append(cumulative_data_row)
    else:
        per_ns_ddgs_charged_private.append(cumulative_data_row)

In [None]:
# get the average uncertainty for the public and private data for the neutral edges
per_ns_ddgs_neutral = pd.DataFrame(per_ns_ddgs_neutral)
per_ns_ddgs_neutral["Samples 5 ns dDDG"].mean() 

In [None]:
per_ns_ddgs_neutral_private = pd.DataFrame(per_ns_ddgs_neutral_private)
per_ns_ddgs_neutral_private["Samples 5 ns dDDG"].mean()

In [None]:
per_ns_ddgs_neutral_private

In [None]:
# For some reason the estimate at 5ns is different for edge 27-6 in shp2 but all others are the same
# so we patch the value
# calculate the value for the 5 ns DDG
shp2_edge = normal_edge_data.loc[
    (normal_edge_data["system group"] == "merck") & 
    (normal_edge_data["system name"] == "shp2") & 
    (normal_edge_data["ligand_A"] == "Example-27") & 
    (normal_edge_data["ligand_B"] == "6")
]
complex_data = shp2_edge[[f"complex_repeat_{i}_DG (kcal/mol)" for i in range(3)]]
solvent_data = shp2_edge[[f"solvent_repeat_{i}_DG (kcal/mol)" for i in range(3)]]
# calculate the 5 ns DDG and dDDG
ddg_5_ns = complex_data.mean(axis=1) - solvent_data.mean(axis=1)
ddg_5_ns_error = (complex_data.std(axis=1) ** 2 + solvent_data.std(axis=1) ** 2) ** 0.5
per_ns_ddgs_neutral.loc[
    (per_ns_ddgs_neutral["system group"] == "merck") & 
    (per_ns_ddgs_neutral["system name"] == "shp2") & 
    (per_ns_ddgs_neutral["ligand_A"] == "Example-27") & 
    (per_ns_ddgs_neutral["ligand_B"] == "6"), 
    "Samples 5 ns DDG"
] = ddg_5_ns.values[0]
per_ns_ddgs_neutral.loc[
    (per_ns_ddgs_neutral["system group"] == "merck") & 
    (per_ns_ddgs_neutral["system name"] == "shp2") & 
    (per_ns_ddgs_neutral["ligand_A"] == "Example-27") & 
    (per_ns_ddgs_neutral["ligand_B"] == "6"), 
    "Samples 5 ns dDDG"
] = ddg_5_ns_error.values[0]
# print the updated row 
print(per_ns_ddgs_neutral.loc[
    (per_ns_ddgs_neutral["system group"] == "merck") & 
    (per_ns_ddgs_neutral["system name"] == "shp2") & 
    (per_ns_ddgs_neutral["ligand_A"] == "Example-27") & 
    (per_ns_ddgs_neutral["ligand_B"] == "6")
])

In [None]:
private_dg_data.rename(columns={
    "dataset_name": "system name",
    "partner_id": "partner"
}, inplace=True)

In [None]:
from cinnabar import FEMap, Measurement
from openff.units import unit
from collections import defaultdict
from scipy.stats import kendalltau
def get_metric_per_ns(df_neutral, df_charged, dg_df, public=True) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Calculate the DG values for each 20% of the simulation time, calculate the standard bootstrapped statistics and the pairwise RMSE and MUE DDG values.
    """
    if public:
        system_group_col = "system group"
        system_name_col = "system name"
    else:
        system_group_col = "partner"
        system_name_col = "system name"
    # merge the neutral and charged dataframes
    temp_df = pd.concat([df_neutral, df_charged], ignore_index=True)
    all_data = []
    all_pairwise_data = defaultdict(list)
    for system in temp_df[system_group_col].unique():
        system_df = temp_df[temp_df[system_group_col] == system].copy(deep=True).reset_index(drop=True)
        targets = system_df[system_name_col].unique()
        for target in targets:
            target_df = system_df[system_df[system_name_col] == target].copy(deep=True).reset_index(drop=True)
            # get the experimental values for this target
            exp_dg = dg_df[(dg_df[system_name_col] == target) & (dg_df[system_group_col] == system) & (dg_df["Exp DG (kcal/mol)"].notna())].copy(deep=True).reset_index(drop=True)
            exp_shift = exp_dg["Exp DG (kcal/mol)"].mean()
            for t in range(20, 120, 20):
                print(f"Calculating for {system} {target} at {t}%")
                fe_map = FEMap()
                for _, row in target_df.iterrows():
                    # workout the total sampling time
                    total_ns = 5 if row["alchemical_charge_difference"] == 0 else 20
                    # workout the number of ns to use
                    ns = int(total_ns * (t / 100))
                    # get the ddg and uncertainty for this ns
                    ddg = row[f"Samples {int(ns)} ns DDG"]
                    ddg_uncertainty = row[f"Samples {int(ns)} ns dDDG"]
                    # add the measurement to the fe map
                    fe_map.add_relative_calculation(
                        labelA=row["ligand_A"],
                        labelB=row["ligand_B"],
                        value=ddg * unit.kilocalorie_per_mole,
                        uncertainty=ddg_uncertainty * unit.kilocalorie_per_mole if ddg_uncertainty != 0 else 0.1 * unit.kilocalorie_per_mole,
                    )
                try:
                    fe_map.generate_absolute_values()
                except ValueError as e:
                    print(f"Error generating absolute values for {system} {target} at {t}%: {e}")
                    continue
                abs_df = fe_map.get_absolute_dataframe()
                # nake an array of the exp data in the same order as the predicted values
                exp_values, calculated_values = [], []
                for _, abs_row in abs_df.iterrows():
                    try:
                        exp_value = exp_dg[exp_dg["ligand name"] == abs_row["label"]]["Exp DG (kcal/mol)"].values[0]
                        exp_values.append(exp_value - exp_shift)
                        calculated_values.append(abs_row["DG (kcal/mol)"])
                    except IndexError:
                        # merck-syk has intermediate ligands that are not in the exp data so skip them
                        continue
                # calculate the stats 
                # mark if we have a Hahn system number of ligands above 16 and dynamic range >= 3 kca/mol
                ns_data = {
                    "system group": system,
                    "system name": target,
                    "Sampling ns": t,    
                    "Number of ligands": len(exp_values),
                }
                if len(exp_values) > 16 and (exp_dg["Exp DG (kcal/mol)"].max() - exp_dg["Exp DG (kcal/mol)"].min()) >= 3:
                    ns_data["Hahn system"] = True
                else:
                    ns_data["Hahn system"] = False
                boot_mue, boot_rmse, boot_ktau = np.zeros(1000), np.zeros(1000), np.zeros(1000)
                exp_values = np.array(exp_values)
                calculated_values = np.array(calculated_values)
                for i in range(1000):
                    # bootstrap the predictions and exp values
                    boot_indices = np.random.choice(len(exp_values), size=len(exp_values), replace=True)
                    boot_exp_values = exp_values[boot_indices]
                    boot_calc_values = calculated_values[boot_indices]
                    boot_mue[i] = np.mean(np.abs(boot_calc_values - boot_exp_values))
                    boot_rmse[i] = np.sqrt(np.mean((boot_calc_values - boot_exp_values) ** 2))
                    boot_ktau[i] = kendalltau(boot_calc_values, boot_exp_values)[0]
                    
                for stat, values in zip(
                    ["MUE", "RMSE", "Kendall Tau"],
                    [boot_mue, boot_rmse, boot_ktau]
                ):
                    ns_data[stat] = np.mean(values)
                    ns_data[f"{stat} lower"] = np.percentile(values, 2.5)
                    ns_data[f"{stat} higher"] = np.percentile(values, 97.5)

                # calculate the all pairwise RMSE
                pairwise_predictions, pairwise_exp = [], []
                ligands = abs_df["label"].unique()
                for i, ligand1 in enumerate(ligands):
                    for j, ligand2 in enumerate(ligands):
                        if i >= j:
                            # skip self-comparisons
                            continue
                        lig_1_value = abs_df[abs_df["label"] == ligand1]["DG (kcal/mol)"].values[0]
                        lig_2_value = abs_df[abs_df["label"] == ligand2]["DG (kcal/mol)"].values[0]
                        # these values are in the same order as the abs_df
                        try:
                            lig_1_exp = exp_dg[exp_dg["ligand name"] == ligand1]["Exp DG (kcal/mol)"].values[0] - exp_shift
                            lig_2_exp = exp_dg[exp_dg["ligand name"] == ligand2]["Exp DG (kcal/mol)"].values[0] - exp_shift
                            # transforming I->j is J - I
                            pairwise_exp.append(lig_2_exp - lig_1_exp)
                            pairwise_predictions.append(lig_2_value - lig_1_value)
                        except IndexError:
                            # skip if we have an intermediate ligand that is not in the exp data
                            print(f"Skipping pairwise comparison for {ligand1} and {ligand2} in {system} {target} at {t}% due to missing experimental data.")
                            continue
                # calculate the pairwise stats
                # add the absolute pairwise error data
                all_pairwise_data[t].extend(abs(np.array(pairwise_predictions) - np.array(pairwise_exp)))
                pairwise_rmse = np.zeros(1000)
                for i in range(1000):
                    # bootstrap the pairwise predictions and exp values
                    boot_indices = np.random.choice(len(pairwise_exp), size=len(pairwise_exp), replace=True)
                    boot_pairwise_predictions = np.array(pairwise_predictions)[boot_indices]
                    boot_pairwise_exp = np.array(pairwise_exp)[boot_indices]
                    pairwise_rmse[i] = np.sqrt(np.mean((boot_pairwise_predictions - boot_pairwise_exp) ** 2))

                ns_data[f"Pairwise RMSE"] = np.mean(pairwise_rmse)
                ns_data[f"Pairwise RMSE lower"] = np.percentile(pairwise_rmse, 2.5)
                ns_data[f"Pairwise RMSE higher"] = np.percentile(pairwise_rmse, 97.5)
                all_data.append(ns_data)
    return pd.DataFrame(all_data), all_pairwise_data

In [None]:
metrics_public, pairwise_public = get_metric_per_ns(pd.DataFrame(per_ns_ddgs_neutral), pd.DataFrame(per_ns_ddgs_charged), public_dg_data)

In [None]:
metrics_private, pairwise_private = get_metric_per_ns(pd.DataFrame(per_ns_ddgs_neutral_private), pd.DataFrame(per_ns_ddgs_charged_private), private_dg_data, public=False)

In [None]:
metrics_public.head(20)

In [None]:
# plot the ecdf for each 20% of the simulation time for the pairwise data
# make the figure
fig, ax = plt.subplots(figsize=(8, 6))
for t, pairwise_data in pairwise_public.items():
    sns.ecdfplot(
        pairwise_data,
        ax=ax,
        label=f"{t}%",
        linewidth=2,
    )
    sns.ecdfplot(
        pairwise_private[t],
        ax=ax,
        label=f"{t}% (private)",
        linewidth=2,
        linestyle="--",
    )
ax.set_xlabel(r"Pairwise |$\Delta\Delta$G$_{calc} - \Delta\Delta$G$_{exp}$ | (kcal/mol)", fontsize=14)
ax.set_ylabel("Cumulative Probability", fontsize=14)
plt.legend(title="Simulation Time", fontsize=12)
# set tick fontsize
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
# plt.xlim(0, 4)

In [None]:
len(metrics_public[metrics_public["Sampling ns"] == 100])

In [None]:
metrics_private[metrics_private["system group"] == "Roche"]

In [None]:
# boxplot to compare KTAU for each 20% of the simulation time
import seaborn as sns
fig, ax = plt.subplots(1, 2, figsize=(8, 6))
sns.boxplot(
    x="Sampling ns",
    y="RMSE",
    data=metrics_public,
    ax=ax[0],
    label="Public",
)
sns.boxplot(
    x="Sampling ns",
    y="RMSE",
    data=metrics_private,
    ax=ax[1],
    label="Private",
    color="orange",
)
# check for significant differences between the 80 and 100% sampling time using a wilcoxon test
from scipy.stats import wilcoxon
for i, d in enumerate([metrics_public, metrics_private]):
    dh = 0.1
    y_max = 0
    rx = 4
    for lx, j in enumerate([20, 40, 60, 80]):
        # get the rmse for each sampling time
        sample_rmse = d[d["Sampling ns"] == j]
        full_rmse = []
        # some MLE estimates failed for low sampling times so filter them out
        for _, row in sample_rmse.iterrows():
            full_rmse.append(d[(d["system group"] == row["system group"]) & (d["system name"] == row["system name"]) & (d["Sampling ns"] == 100)]["RMSE"].values[0])

        # test the difference between the full and sample rmse
        stat, p_value = wilcoxon(full_rmse, sample_rmse["RMSE"].values)

        print(f"Wilcoxon test between {j}% and 100% sampling time: stat={stat}, p-value={p_value}")
        # if there is a significant difference show it on the plot 
        if p_value < 0.05:
            y_max = max(max(full_rmse), sample_rmse["RMSE"].values.max(), y_max) + dh
            barx = [lx, lx, rx, rx]
            bary = [y_max, y_max + 0.05, y_max + 0.05, y_max]
            ax[i].plot(barx, bary, color="black", linewidth=1.5)
            p = .05
            text = ""
            while p_value < p:
                text += "*"
                p *= 0.1

            ax[i].text((lx + rx) / 2, y_max + 0.05, f"{text}", ha="center", va="bottom", fontsize=10)
            y_max += 0.1

fig.legend(title="Dataset", fontsize=10, ncol=2, loc="upper center", bbox_to_anchor=(0.5, 1.0), frameon=True, title_fontsize=14)
ax[0].get_legend().remove()
ax[1].get_legend().remove()



In [None]:
# boxplot to compare KTAU for each 20% of the simulation time
import seaborn as sns
fig, ax = plt.subplots(1, 2, figsize=(8, 6))
hahn_public = metrics_public[metrics_public["Hahn system"] == True]
hahn_private = metrics_private[metrics_private["Hahn system"] == True]
sns.boxplot(
    x="Sampling ns",
    y="Kendall Tau",
    data=hahn_public,
    ax=ax[0],
    label="Public",
)
sns.boxplot(
    x="Sampling ns",
    y="Kendall Tau",
    data=hahn_private,
    ax=ax[1],
    label="Private",
    color="orange",)

for i, d in enumerate([hahn_public, hahn_private]):
    dh = 0.1
    y_max = 0
    rx = 4
    for lx, j in enumerate([20, 40, 60, 80]):
        # get the rmse for each sampling time
        sample_rmse = d[d["Sampling ns"] == j]
        full_rmse = []
        # some MLE estimates failed for low sampling times so filter them out
        for _, row in sample_rmse.iterrows():
            full_rmse.append(d[(d["system group"] == row["system group"]) & (d["system name"] == row["system name"]) & (d["Sampling ns"] == 100)]["Kendall Tau"].values[0])

        # test the difference between the full and sample rmse
        stat, p_value = wilcoxon(full_rmse, sample_rmse["Kendall Tau"].values)

        print(f"Wilcoxon test between {j}% and 100% sampling time: stat={stat}, p-value={p_value}")
        # if there is a significant difference show it on the plot 
        if p_value < 0.05:
            y_max = max(max(full_rmse), sample_rmse["Kendall Tau"].values.max(), y_max) + dh
            barx = [lx, lx, rx, rx]
            bary = [y_max, y_max + 0.05, y_max + 0.05, y_max]
            ax[i].plot(barx, bary, color="black", linewidth=1.5)
            p = .05
            text = ""
            while p_value < p:
                text += "*"
                p *= 0.1

            ax[i].text((lx + rx) / 2, y_max + 0.05, f"{text}", ha="center", va="bottom", fontsize=10)
            y_max += 0.1

fig.legend(title="Dataset", fontsize=10, ncol=2, loc="upper center", bbox_to_anchor=(0.5, 1.0), frameon=True, title_fontsize=14)
ax[0].get_legend().remove()
ax[1].get_legend().remove()


In [None]:
for system in metrics_public["system group"].unique():
    system_df = metrics_public[metrics_public["system group"] == system]
    targets = system_df["system name"].unique()
    for target in targets:
        target_df = system_df[system_df["system name"] == target]
        rmses = []
        for t in [20, 40, 60, 80, 100]:
            rmses.append(target_df[target_df["Sampling ns"] == t]["RMSE"].values[0])
        # find system where the RMSE fluctuates a lot between 20 and 100%
        if abs(rmses[2] - rmses[-1]) > 0.3:
            print(f"{system} {target} has a large fluctuation in RMSE between 20 and 100%: {rmses}")
        


In [None]:
metrics_private

In [None]:
# same again for the private data
for system in metrics_private["system group"].unique():
    if system == "Odyssey":
        # skip the Odyssey system as it has a different sampling time
        continue
    system_df = metrics_private[metrics_private["system group"] == system]
    targets = system_df["system name"].unique()
    for target in targets:
        target_df = system_df[system_df["system name"] == target]
        rmses = []
        for t in [20, 40, 60, 80, 100]:
            print(target_df[target_df["Sampling ns"] == t])
            rmses.append(target_df[target_df["Sampling ns"] == t]["RMSE"].values[0])
        # find system where the RMSE fluctuates a lot between 20 and 100%
        if abs(rmses[2] - rmses[-1]) > 0.5:
            print(f"{system} {target} has a large fluctuation in RMSE between 20 and 100%: {rmses}")

In [None]:
# plot the public and private data together
# make the figure and axes
fig, ax = plt.subplots(figsize=(6, 4))
# calculate the mean change between x and x+1 with 95% CI
nbootstrap = 1000
ci = 0.95

colors = sns.color_palette("rocket", 4)
c = 0
for dataset, ns_range, label, line in zip([pd.DataFrame(per_ns_ddgs_neutral), pd.DataFrame(per_ns_ddgs_charged), pd.DataFrame(per_ns_ddgs_neutral_private), pd.DataFrame(per_ns_ddgs_charged_private)],
                                           [5, 20, 5, 20],
                                           ["Public Neutral", "Public Charged", "Private Neutral", "Private Charged"],
                                           ["-", "-", "--", "--"]):

    # calculate the mean change between x and x+1 with 95% CI
    changes_in_ddg = []
    # get the final value using all simulation data
    for i in range(1, ns_range + 1):
        # get the DDG values for I sampling time
        temp_df = dataset[(dataset[f"Samples {i} ns DDG"].notna()) & (dataset[f"Samples {ns_range} ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
        # get the difference between this ns and the final value using all data
        change_in_ddg = abs(temp_df[f"Samples {i} ns DDG"] - temp_df[f"Samples {ns_range} ns DDG"])
        print(i, change_in_ddg.mean())

        mean_change = change_in_ddg.mean()
        # get the bootstrap CI
        mean_values = np.zeros(nbootstrap)
        for i in range(nbootstrap):
            sample_ids = np.random.choice(len(change_in_ddg), size=len(change_in_ddg), replace=True)
            sample_data = change_in_ddg[sample_ids]
            mean_values[i] = sample_data.mean()
        # get the 95% CI
        low = np.percentile(mean_values, 2.5)
        high = np.percentile(mean_values, 97.5)
        change_stats = {
            "mean change": mean_change,
            "extra ns": i,
            "low": low,
            "high": high
        }

        changes_in_ddg.append(change_stats)
    temp_change_df = pd.DataFrame(changes_in_ddg)
    print(temp_change_df)
    x_labels = [100 * (i / ns_range) for i in range(1, ns_range + 1)]
    plt.plot(x_labels, temp_change_df["mean change"], label=label, linestyle=line, color=colors[c])
    plt.fill_between(x_labels, temp_change_df["low"], temp_change_df["high"], alpha=0.2, linestyle=line, color=colors[c])
    c += 1
plt.ylim((0, 1))
plt.xlim((0, 100))
plt.legend(labelspacing=0.1, fontsize=12)
plt.xlabel("Sampling %", fontdict={"fontsize": 12})
plt.ylabel(r"$<|\Delta\Delta G_{N} - \Delta\Delta G_{total}|>$ kcal/mol", fontdict={"fontsize": 12})
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.savefig("distance_to_final_ddg_percent_public_private.png", dpi=300, bbox_inches="tight")

In [None]:
neutral_private = pd.DataFrame(per_ns_ddgs_neutral_private)
neutral_private = neutral_private[(neutral_private["Samples 5 ns DDG"].notna()) & (neutral_private["Samples 1 ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
change_in_private = abs(neutral_private["Samples 1 ns DDG"] - neutral_private["Samples 5 ns DDG"])
change_in_private.mean(), change_in_private.std()
neutral_private["Samples 5 ns dDDG"].mean()

In [None]:
import numpy as np
def plot_the_distance_to_final_ddg(dataset, ns_range, name):

    # get the final value using all simulation data
    for i in range(1, ns_range):
        # get the DDG values for I sampling time
        temp_df = dataset[(dataset[f"Samples {i} ns DDG"].notna()) & (dataset[f"Samples {ns_range} ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
        # get the difference between this ns and the final value using all data
        change_in_ddg = abs(temp_df[f"Samples {i} ns DDG"] - temp_df[f"Samples {ns_range} ns DDG"])
        print(i, change_in_ddg.mean())

        sns.ecdfplot(x=change_in_ddg, label=f"{i} ns", linewidth=2)
        # plot the mean uncertainty at the full simulation length as a vertical line
    plt.axvline(x=dataset[f"Samples {ns_range} ns dDDG"].mean(), color="k", linestyle='--', label=f"Average uncertainty at {ns_range} ns")
       
    plt.legend(labelspacing=0.1, fontsize=12)
    plt.ylabel("Proportion", fontdict={"fontsize": 12})
    plt.xlabel(r"$|\Delta\Delta G_{N} - \Delta\Delta G_{total}|$ kcal/mol", fontdict={"fontsize": 12})
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.savefig(name, dpi=300, bbox_inches="tight")
    # plt.show()

In [None]:
plot_the_distance_to_final_ddg(dataset=pd.DataFrame(per_ns_ddgs_neutral), ns_range=5, name="distance_to_final_ddg_neutral_public.png")

In [None]:
plot_the_distance_to_final_ddg(dataset=pd.DataFrame(per_ns_ddgs_neutral_private), ns_range=5, name="distance_to_final_ddg_neutral_private.png")

In [None]:
plot_the_distance_to_final_ddg(dataset=pd.DataFrame(per_ns_ddgs_charged), ns_range=20, name="distance_to_final_ddg_charged_public.png")

In [None]:
plot_the_distance_to_final_ddg(dataset=pd.DataFrame(per_ns_ddgs_charged_private), ns_range=20, name="distance_to_final_ddg_charged_private.png")

In [None]:
# get the number of neutral edges at 4ns with a distance lower than the average uncertainty at 5ns
neutral_df = pd.DataFrame(per_ns_ddgs_neutral_private)
neutral_df = neutral_df[(neutral_df["Samples 5 ns DDG"].notna()) & (neutral_df["Samples 4 ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
# get the abs error
abs_diff = abs(neutral_df["Samples 4 ns DDG"] - neutral_df["Samples 5 ns DDG"])
no_edges = neutral_df[abs_diff < neutral_df["Samples 5 ns dDDG"].mean()]["Samples 4 ns DDG"].count()
no_edges / len(neutral_df) * 100

In [None]:
no_edges

In [None]:
# same again for the charged edges
charged_df = pd.DataFrame(per_ns_ddgs_charged)
charged_df = charged_df[(charged_df["Samples 20 ns DDG"].notna()) & (charged_df["Samples 16 ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
# get the abs error
abs_diff = abs(charged_df["Samples 16 ns DDG"] - charged_df["Samples 20 ns DDG"])
no_edges = charged_df[abs_diff < charged_df["Samples 20 ns dDDG"].mean()]["Samples 16 ns DDG"].count()
no_edges / len(charged_df) * 100

In [None]:
charged_df["Samples 20 ns dDDG"].mean()

In [None]:
from scipy.stats import wilcoxon

# calculate the wilcoxon test for the neutral edges comparing the difference after 4 ns and 5ns
neutral_df = pd.DataFrame(per_ns_ddgs_neutral)
neutral_df = neutral_df[(neutral_df["Samples 5 ns DDG"].notna()) & (neutral_df["Samples 4 ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
# calculate the signed difference
diff = neutral_df["Samples 5 ns DDG"] - neutral_df["Samples 4 ns DDG"]
# calculate the wilcoxon test
w, p = wilcoxon(diff)
print(f"Wilcoxon test: {w}, p-value: {p}")

In [None]:
# calculate the wilcoxon test for the charged edges comparing the difference after 16 ns and 20ns
charged_df = pd.DataFrame(per_ns_ddgs_charged_private)
charged_df = charged_df[(charged_df["Samples 20 ns DDG"].notna()) & (charged_df["Samples 16 ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
# calculate the signed difference
diff = charged_df["Samples 16 ns DDG"] - charged_df["Samples 20 ns DDG"]
# calculate the wilcoxon test
w, p = wilcoxon(diff)
print(f"Wilcoxon test: {w}, p-value: {p}")

In [None]:
# calculate the bootstrap confidence interval for the difference between 4 ns and 5 ns
neutral_df = pd.DataFrame(per_ns_ddgs_neutral)
diffs = neutral_df["Samples 4 ns DDG"] - neutral_df["Samples 5 ns DDG"]
n_bootstrap = 1000
boot_diffs = []
n_edges = len(diffs)

for _ in range(n_bootstrap):
    sample = np.random.choice(diffs, size=n_edges, replace=True)
    boot_diffs.append(np.mean(sample))

ci_lower = np.percentile(boot_diffs, 2.5)
ci_upper = np.percentile(boot_diffs, 97.5)
mean_diff = np.mean(diffs)
# Plot: 4 ns vs 5 ns predictions and difference histogram
fig, axs = plt.subplots(1, 2, figsize=(12, 5))

# Scatter plot of predictions
axs[0].scatter(neutral_df["Samples 5 ns DDG"], neutral_df["Samples 4 ns DDG"], color='#009384', edgecolor='k')
axs[0].plot([-15, 20], [-15, 20], 'k--', label='y = x')
axs[0].set_xlabel('∆∆G at 5 ns (kcal/mol)', fontdict={"fontsize": 12})
axs[0].set_ylabel('∆∆G at 4 ns (kcal/mol)', fontdict={"fontsize": 12})
# set the size of the xticks on the x axis
axs[0].tick_params(axis='x', labelsize=12)
# same for the y axis
axs[0].tick_params(axis='y', labelsize=12)
# axs[0].xticks(fontsize=12)
axs[0].set_title('Comparison of Predictions at 4 ns vs 5 ns')
axs[0].legend()
axs[0].grid(True)
# plt.xticks(fontsize=12)

# Histogram of differences
sns.histplot(diffs, bins=15, kde=True, ax=axs[1], color='#009384')
axs[1].axvline(ci_lower, color='red', linestyle='--', label=f"95% CI: [{ci_lower:.3f}, {ci_upper:.3f}]")
axs[1].axvline(ci_upper, color='red', linestyle='--')
axs[1].axvline(mean_diff, color='black', linestyle='-', label=f"Mean diff: {mean_diff:.3f}")
axs[1].set_xlabel('∆∆G (4 ns - 5 ns)', fontdict={"fontsize": 12})
axs[1].set_ylabel('Frequency', fontdict={"fontsize": 12})
axs[1].set_title('Distribution of Prediction Differences')
axs[1].tick_params(axis='x', labelsize=12)
# same for the y axis
axs[1].tick_params(axis='y', labelsize=12)
axs[1].legend(fontsize=12)
axs[1].grid(True)

plt.tight_layout()
plt.savefig("4ns_vs_5ns_public.png", dpi=300, bbox_inches="tight")
# plt.show()

In [None]:
# plot the charged data
# calculate the bootstrap confidence interval for the difference between 16 ns and 20 ns
diffs = charged_df["Samples 15 ns DDG"] - charged_df["Samples 20 ns DDG"]
n_bootstrap = 1000
boot_diffs = []
n_edges = len(diffs)
for _ in range(n_bootstrap):
    sample = np.random.choice(diffs, size=n_edges, replace=True)
    boot_diffs.append(np.mean(sample))
ci_lower = np.percentile(boot_diffs, 2.5)
ci_upper = np.percentile(boot_diffs, 97.5)
mean_diff = np.mean(diffs)
# Plot: 16 ns vs 20 ns predictions and difference histogram
fig, axs = plt.subplots(1, 2, figsize=(12, 5))
# Scatter plot of predictions
axs[0].scatter(charged_df["Samples 20 ns DDG"], charged_df["Samples 15 ns DDG"], color='#009384', edgecolor='k')
axs[0].plot([-5, 5], [-5, 5], 'k--', label='y = x')
axs[0].set_xlabel('∆∆G at 20 ns (kcal/mol)', fontdict={"fontsize": 12})
axs[0].set_ylabel('∆∆G at 15 ns (kcal/mol)', fontdict={"fontsize": 12})
# set the size of the xticks on the x axis
axs[0].tick_params(axis='x', labelsize=12)
# same for the y axis
axs[0].tick_params(axis='y', labelsize=12)
axs[0].set_title('Comparison of Predictions at 15 ns vs 20 ns')
axs[0].legend()
axs[0].grid(True)
# plt.xticks(fontsize=12)
# Histogram of differences
sns.histplot(diffs, bins=15, kde=True, ax=axs[1], color='#009384')
axs[1].axvline(ci_lower, color='red', linestyle='--', label=f"95% CI: [{ci_lower:.3f}, {ci_upper:.3f}]")
axs[1].axvline(ci_upper, color='red', linestyle='--')
axs[1].axvline(mean_diff, color='black', linestyle='-', label=f"Mean diff: {mean_diff:.3f}")
axs[1].set_xlabel('∆∆G (15 ns - 20 ns)', fontdict={"fontsize": 12})
axs[1].set_ylabel('Frequency', fontdict={"fontsize": 12})
axs[1].set_title('Distribution of Prediction Differences')
axs[1].tick_params(axis='x', labelsize=12)
# same for the y axis
axs[1].tick_params(axis='y', labelsize=12)
axs[1].legend(fontsize=12)
axs[1].grid(True)
plt.tight_layout()
# plt.savefig("15ns_vs_20ns_charged.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# load all of the molecules
# load all the ligands
import pathlib
from rdkit import Chem
all_ligands = {}
name_conversions = {
    "41 flip": "41-flip",
    "40 flip": "40-flip",
    "38 flip": "38-flip",
    "30 flip": "30-flip",
    "43 flip": "43-flip",
    "47 flip": "47-flip",
    "48 flip": "48-flip",
    "46 flip": "46-flip",
    "36 out": "36o",
    "37 out": "37o",
    "38 out": "38o",
    "39 out": "39o",
    "28 out": "28o",
    "CHEMBL3402756_2.7 redocked": "CHEMBL3402756_2.7_redocked",
    "CHEMBL3402757_6.5 redocked" : "CHEMBL3402757_6.5_redocked",
    "CHEMBL3402758_10 redocked": "CHEMBL3402758_10_redocked",
    "CHEMBL3402760_1 redocked":"CHEMBL3402760_1_redocked",
    "CHEMBL3402762_1 redocked": "CHEMBL3402762_1_redocked",
    "CHEMBL3402759_5.7 redocked": "CHEMBL3402759_5.7_redocked",
    "CHEMBL3402761_1 redocked": "CHEMBL3402761_1_redocked",
    "Example 22":"Example-22",
    "Example 23": "Example-23",
    "Example 14": "Example-14",
    "Example 9": "Example-9",
    "SHP099-1 Example 7": "SHP099-1-Example-7",
    "Example 28": "Example-28",
    "Example 24": "Example-24",
    "Example 26": "Example-26",
    "Example 6": "Example-6",
    "Example 1": "Example-1",
    "Example 30": "Example-30",
    "Example 8": "Example-8",
    "Example 29": "Example-29",
    "Example 2": "Example-2",
    "Example 25": "Example-25",
    "Example 4": "Example-4",
    "Example 3": "Example-3",
    "Example 27": "Example-27",
    "Example 5": "Example-5",
    "9 flip": "9-flip",
}
key_to_ligand = {}
base_data_folder = pathlib.Path("/Users/joshua/Documents/Software/IndustryBenchmarks2024/industry_benchmarks/input_structures/prepared_structures")
for folder in base_data_folder.glob("*"):
    if folder.is_dir() and folder != "template":
        for target_ligs in folder.glob("*/ligands.sdf"):
            # load the ligands
            supplier = Chem.SDMolSupplier(target_ligs, removeHs=False)
            for lig in supplier:
                name = lig.GetProp("_Name")
                if name in name_conversions:
                    name = name_conversions[name]
                all_ligands[(name, target_ligs.parent.name, folder.name)] = Chem.GetFormalCharge(lig)
                key_to_ligand[(name, target_ligs.parent.name, folder.name)] = lig

In [None]:
from gufe import SmallMoleculeComponent
import kartograf
from kartograf.filters import (
    filter_ringbreak_changes,
    filter_ringsize_changes,
    filter_whole_rings_only,
)
from rdkit.Chem import Draw
import io
import cairosvg
from PIL import Image
grid_x, grid_y = 2, 1
from gufe.visualization.mapping_visualization import draw_mapping

def mapping_karto(liga, ligb, target, system):
    d2d = Draw.rdMolDraw2D.MolDraw2DSVG(grid_x * 300, grid_y * 300, 300, 300)
    mapping_filters = [
            filter_ringbreak_changes,  # default
            filter_ringsize_changes,  # default
            filter_whole_rings_only,  # default
        ]
    mapper = kartograf.KartografAtomMapper(
        atom_map_hydrogens=True,
        additional_mapping_filter_functions=mapping_filters,
    )
    ligand_a = SmallMoleculeComponent(key_to_ligand[(liga, target, system)])
    ligand_b = SmallMoleculeComponent(key_to_ligand[(ligb, target, system)])
    mapping =  next(mapper.suggest_mappings(ligand_a, ligand_b))
    svg_text = draw_mapping(mapping._compA_to_compB, mapping.componentA.to_rdkit(), mapping.componentB.to_rdkit(), d2d)
    png_data = cairosvg.svg2png(bytestring=svg_text.encode('utf-8'))
    image = Image.open(io.BytesIO(png_data))
    return image


In [None]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import rdMolDraw2D
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
# for the neutral edges get the absolute difference between the 4 ns and 5 ns predictions
neutral_df = pd.DataFrame(per_ns_ddgs_neutral)
neutral_df = neutral_df[(neutral_df["Samples 5 ns DDG"].notna()) & (neutral_df["Samples 4 ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
# get the abs error
abs_diff = abs(neutral_df["Samples 4 ns DDG"] - neutral_df["Samples 5 ns DDG"])
# get the top 3 and bottom 3 edges by abs error after added the absolute difference to the dataframe
neutral_df["abs_diff"] = abs_diff
# sort the dataframe by the abs_diff column
neutral_df = neutral_df.sort_values(by="abs_diff", ascending=False)
# get the top 3 and bottom 3 edges
top_3_edges = neutral_df.head(3)
bottom_3_edges = neutral_df.tail(3)
# combine the two dataframes
selected_edges = pd.concat([top_3_edges, bottom_3_edges], ignore_index=True)
# Step 3: Plotting setup
n = 3
fig, axs = plt.subplots(2, n, figsize=(4 * n, 8), sharey=True)
axs = axs.flatten()

for i, row in selected_edges.iterrows():
    ax = axs[i]
    diffs = []
    for i in range(1, 6):
        diffs.append(abs(row[f"Samples {i} ns DDG"] - row[f"Samples {5} ns DDG"]))

    ax.plot(np.arange(1, 6), diffs, marker='o')
    ax.set_title(f"{row['system group']}:{row['system name']}")
    # ax.set_xlabel('Time (ns)')
    # ax.set_ylabel('ΔΔG (kcal/mol)')
    # ax.legend()

    # Optional: add molecule drawings
    img = mapping_karto(row["ligand_A"], row["ligand_B"], row["system name"], row["system group"])
    # img = Draw.MolsToImage([mol1, mol2], molsPerRow=2, subImgSize=(100,100))

    imagebox = OffsetImage(img, zoom=0.25)
    ab = AnnotationBbox(imagebox, (0.95, 0.05), frameon=True, xycoords='axes fraction',
                        box_alignment=(1, -1.75))
    ax.add_artist(ab)

    plt.tight_layout()
    fig.supxlabel('Time (ns)', fontsize=16)
    fig.supylabel(r"$|\Delta\Delta G_{N} - \Delta\Delta G_{total}|$ kcal/mol", fontsize=16)
    plt.savefig("top_bottom_3_edges_neutral.png", dpi=300, bbox_inches="tight")

In [None]:
# same again for the charged edges
charged_df = pd.DataFrame(per_ns_ddgs_charged)
charged_df = charged_df[(charged_df["Samples 20 ns DDG"].notna()) & (charged_df["Samples 16 ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
# get the abs error
abs_diff = abs(charged_df["Samples 16 ns DDG"] - charged_df["Samples 20 ns DDG"])
# get the top 3 and bottom 3 edges by abs error after added the absolute difference to the dataframe
charged_df["abs_diff"] = abs_diff
# sort the dataframe by the abs_diff column
charged_df = charged_df.sort_values(by="abs_diff", ascending=False)
# get the top 3 and bottom 3 edges
top_3_edges = charged_df.head(3)
bottom_3_edges = charged_df.tail(3)
# combine the two dataframes
selected_edges = pd.concat([top_3_edges, bottom_3_edges], ignore_index=True)
# Step 3: Plotting setup
n = 3
fig, axs = plt.subplots(2, n, figsize=(4 * n, 8), sharey=True)
axs = axs.flatten()
for i, row in selected_edges.iterrows():
    ax = axs[i]
    diffs = []
    for i in range(1, 21):
        diffs.append(abs(row[f"Samples {i} ns DDG"] - row[f"Samples {20} ns DDG"]))

    ax.plot(np.arange(1, 21), diffs, marker='o')
    ax.set_title(f"{row['system group']}:{row['system name']}")
    # ax.set_xlabel('Time (ns)')
    # ax.set_ylabel('ΔΔG (kcal/mol)')
    # ax.legend()

    # Optional: add molecule drawings
    img = mapping_karto(row["ligand_A"], row["ligand_B"], row["system name"], row["system group"])
    # img = Draw.MolsToImage([mol1, mol2], molsPerRow=2, subImgSize=(100,100))

    imagebox = OffsetImage(img, zoom=0.25)
    ab = AnnotationBbox(imagebox, (0.95, 0.05), frameon=True, xycoords='axes fraction',
                        box_alignment=(1, -1.75))
    ax.add_artist(ab)
    plt.tight_layout()
    fig.supxlabel('Time (ns)', fontsize=16)
    fig.supylabel(r"$|\Delta\Delta G_{N} - \Delta\Delta G_{total}|$ kcal/mol", fontsize=16)
    plt.savefig("top_bottom_3_edges_charged.png", dpi=300, bbox_inches="tight")

In [None]:
# create a new df which we can use for the boxplot, this should let use hue by public and private and have the change in DDG and the absolute error
# do public first
all_change_ddg_df = []
for dataset, label in zip([pd.DataFrame(per_ns_ddgs_neutral), pd.DataFrame(per_ns_ddgs_neutral_private)], ["Public", "Private"]):
    for _, row in dataset.iterrows():
        # get the change in DDG between 4 ns and 5 ns
        change_in_ddg = abs(row["Samples 4 ns DDG"] - row["Samples 5 ns DDG"])
        # get the absolute error
        abs_error = abs(row["exp DDG (kcal/mol)"] - row["Samples 5 ns DDG"])
        all_change_ddg_df.append({
            "Change in DDG 4-5 ns": "< 0.5" if change_in_ddg < 0.5 else "> 0.5",
            "abs DDG error": abs_error,
            "ligand_A": row["ligand_A"],
            "ligand_B": row["ligand_B"],
            "dataset": label
        })
all_change_ddg_df = pd.DataFrame(all_change_ddg_df)
all_change_ddg_df


In [None]:
all_change_ddg_df[(all_change_ddg_df["Change in DDG 4-5 ns"] == "> 0.5") & (all_change_ddg_df["dataset"] == "Public")].count()

In [None]:
# boxplot the absolute error distribution seperating edges with a change in ddg greater than 0.5 kcal/mol
sns.boxplot(data=all_change_ddg_df, x="Change in DDG 4-5 ns", y="abs DDG error", hue="dataset")
plt.ylabel(r"$|\Delta\Delta$G$_{calc} - \Delta\Delta$G$_{\text{exp}}$| (kcal/mol)", fontdict={"fontsize": 12})
# plt.xticks([0, 1], ["> 0.5 kcal/mol", "< 0.5 kcal/mol"], fontsize=12)
plt.xlabel(r"$|\Delta\Delta$G$_{4ns} - \Delta\Delta$G$_{5ns}|$ (kcal/mol)", fontdict={"fontsize": 12})
plt.yticks(fontsize=12)
plt.legend(title="Dataset", fontsize=12, title_fontsize=12)
# add text to the top of the boxplots with the number of edges in each boxplot for the public and private datasets only for the > 0.5 kcal/mol boxplot
# for i, change in enumerate(["> 0.5", "< 0.5"]):
#     for j, dataset in [(-1, "Public"), (1,"Private")]:
#         count = all_change_ddg_df[(all_change_ddg_df["Change in DDG 4-5 ns"] == change) & (all_change_ddg_df["dataset"] == dataset)].shape[0]
#         plt.text(i + j * 0.2, -0.8, f"{count}", ha='center', va='bottom', fontsize=12)
plt.tight_layout()
plt.savefig("absolute_error_boxplot_neutral_large_change_pub_private.png", dpi=300, bbox_inches="tight")

In [None]:
neutral_df

In [None]:
# get all neutral edges with a change in ddg greater than 0.5 kcal/mol
neutral_df = pd.DataFrame(per_ns_ddgs_neutral_private)
neutral_df = neutral_df[(neutral_df["Samples 5 ns DDG"].notna()) & (neutral_df["Samples 4 ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
# get the abs error
abs_diff = abs(neutral_df["Samples 4 ns DDG"] - neutral_df["Samples 5 ns DDG"])
neutral_large_change_df = neutral_df[abs_diff >= 0.5].copy(deep=True).reset_index(drop=True)
neutral_small_change_df = neutral_df[abs_diff < 0.5].copy(deep=True).reset_index(drop=True)
# create a single dataframe with small and large changes and the edge metrics for a box plot
all_edge_data = []
for _, row in neutral_large_change_df.iterrows():
    for metric in ["lomap_score", "shape_score", "volume_score", "mapping_rmsd_score", "morgan_tanimoto_similarity", "atom_pair_dice_similarity", "topological_torsion_dice_similarity"]:
        # grab the row from the edge dataframe
        edge_row = private_edge_data[(private_edge_data["ligand_A"] == row["ligand_A"]) & (private_edge_data["ligand_B"] == row["ligand_B"]) & (private_edge_data["dataset_name"] == row["system name"]) & (private_edge_data["partner_id"] == row["partner"])].iloc[0]
        data = {
            "Change in DDG 4-5 ns": ">0.5" if abs(row["Samples 4 ns DDG"] - row["Samples 5 ns DDG"]) > 0.5 else "<0.5",
            "metric": metric.replace("_", "\n"),
            "value": edge_row[metric],
        }
        all_edge_data.append(data)
for _, row in neutral_small_change_df.iterrows():
    for metric in ["lomap_score", "shape_score", "volume_score", "mapping_rmsd_score", "morgan_tanimoto_similarity", "atom_pair_dice_similarity", "topological_torsion_dice_similarity"]:
        # grab the row from the edge dataframe
        edge_row = private_edge_data[(private_edge_data["ligand_A"] == row["ligand_A"]) & (private_edge_data["ligand_B"] == row["ligand_B"]) & (private_edge_data["dataset_name"] == row["system name"]) & (private_edge_data["partner_id"] == row["partner"])].iloc[0]
        data = {
            "Change in DDG 4-5 ns": "<0.5" if abs(row["Samples 4 ns DDG"] - row["Samples 5 ns DDG"]) < 0.5 else ">0.5",
            "metric": metric.replace("_", "\n"),
            "value": edge_row[metric],
        }
        all_edge_data.append(data)
all_edge_data_df = pd.DataFrame(all_edge_data)
# plot the boxplot
sns.boxplot(data=all_edge_data_df, x="metric", y="value", hue="Change in DDG 4-5 ns")
plt.xticks(fontsize=10)
plt.yticks(fontsize=12)
# rename the ledend title
plt.legend(title=r"$\Delta\Delta\Delta$G$_{4-5}$")
plt.ylabel("Edge metric value", fontdict={"fontsize": 12})
plt.xlabel("Edge metric", fontdict={"fontsize": 12})
plt.savefig("edge_metrics_boxplot_neutral_large_change_private.png", dpi=300, bbox_inches="tight")

In [None]:
neutral_df = pd.DataFrame(per_ns_ddgs_neutral)
# get the number of edges in the charge ptp1b system
neutral_df = neutral_df[(neutral_df["Samples 5 ns DDG"].notna()) & (neutral_df["Samples 4 ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
system_name = "ptp1b"
system_group = "charge_annihilation_set"
system_edges = neutral_df[(neutral_df["system name"] == system_name) & (neutral_df["system group"] == system_group)].copy(deep=True).reset_index(drop=True)
system_edges

In [None]:
def plot_uncert_distribution_per_ns(df, ns_range):
    all_data = []
    for i in range(1, ns_range + 1):
        # get the error for that many ns
        temp_df = df[df[f"Samples {i} ns dDDG"].notna()].copy(deep=True).reset_index(drop=True)
        for ddg_error in temp_df[f"Samples {i} ns dDDG"]:
            all_data.append(
                {"value": ddg_error, "ns": i}
            )
    tmp_all_data = pd.DataFrame(all_data)
    sns.boxplot(data=tmp_all_data, x="ns", y="value", order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, width=0.6, dodge=True, fliersize=5, linewidth=None, whis=1.5, ax=None)
    plt.xlabel("Sampling ns")
    plt.ylabel(r"$dDDG_{N}$")
    plt.show()

In [None]:
plot_uncert_distribution_per_ns(pd.DataFrame(per_ns_ddgs_neutral), 5)

In [None]:
# plot the average distance to the final result per ns for each system
# write the plots to file seperate the neutral and charged edges
import pathlib
def plot_distance_to_final_ddg_distribution_per_system(df, ns_range, plot_type):
    nbootstrap = 1000
    ci = 0.95
    # calculate the mean change between x and ns_range with 95% CI
    # get the final value using all simulation data
    systems = df["system group"].unique()
    for system in systems:
        system_folder = pathlib.Path(system)
        system_folder.mkdir(exist_ok=True)
        system_group_df = df[df["system group"] == system].copy(deep=True).reset_index(drop=True)
        targets = system_group_df["system name"].unique()
        for target in targets:
            changes_in_ddg = []
            print(f"Running for system:{system} and target: {target}")
            target_folder = system_folder.joinpath(target)
            target_folder.mkdir(exist_ok=True)
            target_df = system_group_df[system_group_df["system name"] == target].copy(deep=True).reset_index(drop=True)
            # for each ns compute the abs distance to the fnal value and plot the convergence
            for i in range(1, ns_range):
                # get the DDG values for I sampling time
                temp_df = target_df[(target_df[f"Samples {i} ns DDG"].notna()) & (target_df[f"Samples {ns_range} ns DDG"].notna())].copy(deep=True).reset_index(drop=True)
                # get the difference between this ns and the final value using all data
                change_in_ddg = abs(temp_df[f"Samples {i} ns DDG"] - temp_df[f"Samples {ns_range} ns DDG"])
                mean_change = change_in_ddg.mean()
                # get the bootstrap CI
                mean_values = []
                for _ in range(1000):
                    sample_data =np.zeros_like(change_in_ddg)
                    for x, y in enumerate(
                        np.random.choice(np.arange(len(change_in_ddg)), size=[len(change_in_ddg)], replace=True)
                    ):
                        sample_data[x] = change_in_ddg[y]
                    mean_values.append(sample_data.mean())
                # sort and get the ci
                mean_values = np.sort(mean_values)
                low_frac = (1.0 - ci) / 2.0
                high_frac = 1.0 - low_frac
                change_stats = {
                    "mean change": mean_change,
                    "extra ns": i
                }
                change_stats["low"] = mean_values[int(np.floor(nbootstrap * low_frac))]
                change_stats["high"] = mean_values[int(np.ceil(nbootstrap * high_frac))]

                changes_in_ddg.append(change_stats)
            temp_change_df = pd.DataFrame(changes_in_ddg)
            x_labels = [j for j in range(1, ns_range)]
            plt.scatter(x_labels, temp_change_df["mean change"])
            plt.errorbar(x_labels, temp_change_df["mean change"], yerr=(temp_change_df["mean change"] - temp_change_df["low"], temp_change_df["high"] - temp_change_df["mean change"]), capsize=4)
            plt.fill_between(np.arange(ns_range + 1), y1=[0.15 for _ in range(ns_range + 1)], alpha=0.3, color="lightgreen")
            plt.fill_between(np.arange(ns_range + 1), y1=[0.15 for _ in range(ns_range + 1)], y2=[0.3 for _ in range(ns_range + 1)], alpha=0.3, color="orange")
            plt.ylim(0, 3)
            plt.xlim(0, ns_range)
            plt.grid()
            plt.title(f"System: {system}, target: {target}")
            plt.xlabel("Sampling ns")
            plt.ylabel(r"Average $|DDG_{N} - DDG_{total}|$")
            plt.xticks(x_labels)
            plt.tight_layout()
            plt.savefig(target_folder.joinpath(f"abs_distance_to_final_ddg_{plot_type}.png"))
            plt.close()

In [None]:
plot_distance_to_final_ddg_distribution_per_system(pd.DataFrame(per_ns_ddgs_neutral), 5, "neutral")

In [None]:
# for each 1/5 of the sampling time calculate the sub abs 1kcal/mol error stats breakdown
# we need to make a new df combinging the charged and neutral edges 
combined_per_ns_data = []
for _, row in normal_edge_data.iterrows():
    if np.isnan(row["exp DDG (kcal/mol)"]):
        continue
    if row["alchemical_charge_difference"] == 0:
        total_ns = 5
    else:
        total_ns = 20
    cumulative_data_row = {
        "system group": row["system group"],
        "system name": row["system name"],
        "ligand_A": row["ligand_A"],
        "ligand_B": row["ligand_B"],
        "exp DDG (kcal/mol)": row["exp DDG (kcal/mol)"],
        "exp dDDG (kcal/mol)": row["exp dDDG (kcal/mol)"],
        "alchemical_charge_difference": row["alchemical_charge_difference"]
    }
    # workout the estimate of DDG for each 1/5 of total ns
    temp_cumulative_data = cumulative_data[(cumulative_data["system group"] == row["system group"]) & (cumulative_data["system name"] == row["system name"])].copy(deep=True).reset_index(drop=True)
    complex_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "complex")]
    solvent_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "solvent")]
    fith_sample_time = int(total_ns / 5)

    for i in range(fith_sample_time, total_ns + 1, fith_sample_time):
        per_ns_complex = complex_data[f"Samples {i}ns (subsample) DG"].mean()
        per_ns_complex_error = complex_data[f"Samples {i}ns (subsample) DG"].std()
        per_ns_solvent = solvent_data[f"Samples {i}ns (subsample) DG"].mean()
        per_ns_solvent_error = solvent_data[f"Samples {i}ns (subsample) DG"].std()
        cumulative_data_row[f"Samples {int((i/total_ns)*100)}% DDG"] = per_ns_complex - per_ns_solvent
        cumulative_data_row[f"Samples {int((i/total_ns)*100)}% dDDG"] = (per_ns_complex_error ** 2 + per_ns_solvent_error ** 2) ** 0.5
    combined_per_ns_data.append(cumulative_data_row)
combined_per_ns_df = pd.DataFrame(combined_per_ns_data)
combined_per_ns_df


In [None]:
# same again for the private data
combined_per_ns_data_private = []
for _, row in private_edge_data.iterrows():
    if np.isnan(row["exp DDG (kcal/mol)"]):
        continue
    if row["alchemical_charge_difference"] == 0:
        total_ns = 5
    else:
        total_ns = 20
    cumulative_data_row = {
        "partner": row["partner_id"],
        "system name": row["dataset_name"],
        "ligand_A": row["ligand_A"],
        "ligand_B": row["ligand_B"],
        "exp DDG (kcal/mol)": row["exp DDG (kcal/mol)"],
        "exp dDDG (kcal/mol)": row["exp dDDG (kcal/mol)"],
        "alchemical_charge_difference": row["alchemical_charge_difference"]
    }
    # workout the estimate of DDG for each 1/5 of total ns
    temp_cumulative_data = private_cumulative_data[(private_cumulative_data["partner_id"] == row["partner_id"]) & (private_cumulative_data["dataset_name"] == row["dataset_name"])].copy(deep=True).reset_index(drop=True)
    complex_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "complex")]
    solvent_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "solvent")]
    fith_sample_time = int(total_ns / 5)

    for i in range(fith_sample_time, total_ns + 1, fith_sample_time):
        per_ns_complex = complex_data[f"Samples {i}ns (subsample) DG"].mean()
        per_ns_complex_error = complex_data[f"Samples {i}ns (subsample) DG"].std()
        per_ns_solvent = solvent_data[f"Samples {i}ns (subsample) DG"].mean()
        per_ns_solvent_error = solvent_data[f"Samples {i}ns (subsample) DG"].std()
        cumulative_data_row[f"Samples {int((i/total_ns)*100)}% DDG"] = per_ns_complex - per_ns_solvent
        cumulative_data_row[f"Samples {int((i/total_ns)*100)}% dDDG"] = (per_ns_complex_error ** 2 + per_ns_solvent_error ** 2) ** 0.5
    combined_per_ns_data_private.append(cumulative_data_row)
combined_per_ns_df_private = pd.DataFrame(combined_per_ns_data_private)
combined_per_ns_df_private

In [None]:
def create_stats_breakdown(df):
    # for each 1/5 of data find all edges with abs error under 1 kcal/mol
    # calculate the MUE and rho and number of edges under the threshold
    break_down_data = []
    for i in range(20, 120, 20):
        # grab notna results
        temp_df = df[(df[f"Samples {i}% DDG"].notna()) & (df[f"Samples {i}% dDDG"].notna())].copy(deep=True).reset_index(drop=True)
        error_thresh = abs(temp_df["exp DDG (kcal/mol)"] - temp_df[f"Samples {i}% DDG"]) < 1
        error_thresh_df = temp_df[error_thresh].copy(deep=True).reset_index(drop=True)
        sample_data = {"ns": i, "no": len(error_thresh_df)}
        for stat in ["MUE", "rho"]:
            s = stats.bootstrap_statistic(
                y_true=error_thresh_df["exp DDG (kcal/mol)"],
                y_pred=error_thresh_df[f"Samples {i}% DDG"],
                dy_true=error_thresh_df["exp dDDG (kcal/mol)"],
                dy_pred=error_thresh_df[f"Samples {i}% dDDG"],
                statistic=stat,
            )
            sample_data[stat] = s["mle"]
        break_down_data.append(sample_data)
    return pd.DataFrame(break_down_data)


In [None]:
create_stats_breakdown(combined_per_ns_df)

In [None]:
# plot the distribution of the change in DDG at N compared to the full data averaged across all datasets using the % of the sampling data
# to combine the charge changes with the neutral edges
def plot_distribution_of_ddg_distance_per_system(df):
    all_data = []
    for i in range(20, 120, 20):
        # get the DDG values for I sampling time
        temp_df = df[(df[f"Samples {i}% DDG"].notna()) & (df[f"Samples 100% DDG"].notna())].copy(deep=True).reset_index(drop=True)
        # for each system calculate the average
        for system in temp_df["system group"].unique():
            system_df = temp_df[temp_df["system group"] == system].copy(deep=True).reset_index(drop=True)
            for target in system_df["system name"].unique():
                target_df = system_df[system_df["system name"] == target].copy(deep=True).reset_index(drop=True)
                # get the difference between this ns and the final value using all data
                mean_change_in_ddg = np.mean(abs(target_df[f"Samples {i}% DDG"] - target_df[f"Samples 100% DDG"]))
                all_data.append(
                    {"value": mean_change_in_ddg, "ns": i, "system group": system, "system name": target}
                )
    tmp_all_data = pd.DataFrame(all_data)
    # return tmp_all_data
    sns.boxplot(data=tmp_all_data, x="ns", y="value", order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, width=0.6, dodge=True, fliersize=5, linewidth=None, whis=1.5, ax=None)
    # add annotations on the plot for the outliers for the 80% sampling boxplot 
    j = 0
    for i, row in tmp_all_data.iterrows():
        if row["ns"] != 80:
            continue
        if row["value"] > 0.2:
            print(row)
            # name conversion
            name_to_new_name = {"bayer_macrocycles": "macrocycle", "charge_annihilation_set": "charged", "miscellaneous_set": "misc"}
            system_group = name_to_new_name.get(row["system group"], row["system group"])
            if row["value"] > 0.3:
                x = 2.5
                offset = 0.2
                ha = "center"
            else:
                x = 4 - 0.24 * j
                offset = 0.1 * j
                j += 1
                if row["system name"] in ["shp2", "eg5"]:
                    ha = "left"
            plt.annotate(f"{system_group}:{row['system name']}", xy=(3, row["value"]), xytext=(x, row["value"] + offset), ha=ha, color='darkred', arrowprops=dict(arrowstyle='->', color='darkred', lw=1.2))
    plt.xlabel("Sampling %", fontdict={"fontsize": 12})
    plt.yticks(fontsize=12)
    plt.xticks(fontsize=12)
    plt.ylabel(r"$<|\Delta\Delta$G$_{N} - \Delta\Delta$G$_{total}|>_{system}$", fontdict={"fontsize": 12})
    plt.savefig("distribution_of_ddg_distance_per_system.png", dpi=300, bbox_inches="tight")
    # plt.show()


In [None]:
plot_distribution_of_ddg_distance_per_system(combined_per_ns_df)

In [None]:
system_average_distance_to_ddg_df = plot_distribution_of_ddg_distance_per_system(combined_per_ns_df)

In [None]:
system_average_distance_to_ddg_df[system_average_distance_to_ddg_df["ns"] == 80].sort_values("value", ascending=False)

In [None]:
# plot the private dataset
def plot_distribution_of_ddg_distance_per_system_private(df):
    all_data = []
    for i in range(20, 120, 20):
        # get the DDG values for I sampling time
        temp_df = df[(df[f"Samples {i}% DDG"].notna()) & (df[f"Samples 100% DDG"].notna())].copy(deep=True).reset_index(drop=True)
        # for each system calculate the average
        for system in temp_df["partner"].unique():
            system_df = temp_df[temp_df["partner"] == system].copy(deep=True).reset_index(drop=True)
            for target in system_df["system name"].unique():
                target_df = system_df[system_df["system name"] == target].copy(deep=True).reset_index(drop=True)
                # get the difference between this ns and the final value using all data
                mean_change_in_ddg = np.mean(abs(target_df[f"Samples {i}% DDG"] - target_df[f"Samples 100% DDG"]))
                all_data.append(
                    {"value": mean_change_in_ddg, "ns": i, "system group": system, "system name": target}
                )
    tmp_all_data = pd.DataFrame(all_data)
    # return tmp_all_data
    sns.boxplot(data=tmp_all_data, x="ns", y="value", order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, width=0.6, dodge=True, fliersize=5, linewidth=None, whis=1.5, ax=None)
    # add annotations on the plot for the outliers for the 80% sampling boxplot 
    j = 0
    for i, row in tmp_all_data.iterrows():
        if row["ns"] != 80:
            continue
        if row["value"] > 0.2:
            print(row)
            # name conversion
            name_to_new_name = {"bayer_macrocycles": "macrocycle", "charge_annihilation_set": "charged", "miscellaneous_set": "misc"}
            system_group = name_to_new_name.get(row["system group"], row["system group"])
            if row["value"] > 0.3:
                x = 3.5
                offset = 0.2
                ha = "center"
            else:
                ha = "center"
                x = 4 - 0.24 * j
                offset = 0.1 * j
                j += 1
                if row["system name"] in ["shp2", "eg5"]:
                    ha = "left"
            plt.annotate(f"{system_group}:{row['system name']}", xy=(3, row["value"]), xytext=(x, row["value"] + offset), ha=ha, color='darkred', arrowprops=dict(arrowstyle='->', color='darkred', lw=1.2))
    plt.xlabel("Sampling %", fontdict={"fontsize": 12})
    plt.yticks(fontsize=12)
    plt.xticks(fontsize=12)
    plt.ylabel(r"$<|\Delta\Delta$G$_{N} - \Delta\Delta$G$_{total}|>_{system}$", fontdict={"fontsize": 12})
    plt.savefig("distribution_of_ddg_distance_per_system_private.png", dpi=300, bbox_inches="tight")
    # plt.show()

In [None]:
plot_distribution_of_ddg_distance_per_system_private(combined_per_ns_df_private)

In [None]:
private_edge_data["partner_id"].unique()

In [None]:
janB_1 = private_edge_data[(private_edge_data["partner_id"] == "EliLilly") & (private_edge_data["dataset_name"] == "Project1") ].copy(deep=True).reset_index(drop=True)
lilly_1

In [None]:
# boxplot the lomap scores for roche D vs all other systems
other_systems = private_edge_data[(private_edge_data["partner_id"] != "Roche") & (private_edge_data["dataset_name"] != "target_D")].copy(deep=True).reset_index(drop=True)
# boxplot the lomap scores for roche D vs all other systems
sns.boxplot(data=private_edge_data, y="lomap_score", x=0)
sns.boxplot(data=roche_D, y="lomap_score", x=1)
sns.boxplot(data=janB_1, y="lomap_score", x=2)
sns.boxplot(data=lilly_1, y="lomap_score", x=3)
plt.ylabel("LOMAP score", fontdict={"fontsize": 12})
plt.xticks([0, 1, 2, 3], ["All systems", "Roche D", "Janssen\nSystemB set1", "EliLilly\nProject1"], fontsize=12)
plt.xlabel("System", fontdict={"fontsize": 12})
plt.savefig("lomap_score_boxplot_for_slow_private_systems.png", dpi=300, bbox_inches="tight")

In [None]:
system_average_private[system_average_private["ns"] == 80].sort_values("value", ascending=False)

In [None]:
# check the accuracy if we do not do any subsampling
# build another DF with the nonsubsampled data from 100% of the simulated data
combined_per_ns__no_sub_data = []
for _, row in normal_edge_data.iterrows():
    if row["alchemical_charge_difference"] == 0:
        total_ns = 5
    else:
        total_ns = 20

    cumulative_data_row = {
        "system group": row["system group"],
        "system name": row["system name"],
        "ligand_A": row["ligand_A"],
        "ligand_B": row["ligand_B"],
        "exp DDG (kcal/mol)": row["exp DDG (kcal/mol)"],
        "exp dDDG (kcal/mol)": row["exp dDDG (kcal/mol)"],
        "alchemical_charge_difference": row["alchemical_charge_difference"]
    }
    # workout the estimate of DDG using all of the data
    temp_cumulative_data = cumulative_data[(cumulative_data["system group"] == row["system group"]) & (cumulative_data["system name"] == row["system name"])].copy(deep=True).reset_index(drop=True)
    complex_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "complex")]
    solvent_data = temp_cumulative_data[(temp_cumulative_data["ligand_A"] == row["ligand_A"]) & (temp_cumulative_data["ligand_B"] == row["ligand_B"]) & (temp_cumulative_data["phase"] == "solvent")]

    per_ns_complex = complex_data[f"Samples {total_ns}ns DG"].mean()
    per_ns_complex_error = complex_data[f"Samples {total_ns}ns DG"].std()
    per_ns_solvent = solvent_data[f"Samples {total_ns}ns DG"].mean()
    per_ns_solvent_error = solvent_data[f"Samples {total_ns}ns DG"].std()
    cumulative_data_row[f"All samples DDG"] = per_ns_complex - per_ns_solvent
    cumulative_data_row[f"All samples dDDG"] = (per_ns_complex_error ** 2 + per_ns_solvent_error ** 2) ** 0.5

    combined_per_ns__no_sub_data.append(cumulative_data_row)
combined_per_ns_df_no_sub = pd.DataFrame(combined_per_ns__no_sub_data)
combined_per_ns_df_no_sub


In [None]:
def plot_ddgs(df):
    return _master_plot(
        x=df["exp DDG (kcal/mol)"],
        y=df["All samples DDG"],
        xerr=df["exp dDDG (kcal/mol)"],
        yerr=df["All samples dDDG"],
        scatter_kwargs={"edgecolors": 'black', "linewidth":0.8},
        figsize=5,
        statistic_type="mle",
        xy_lim=[-12, 12]
    )

In [None]:
plot_ddgs(combined_per_ns_df_no_sub)

In [None]:
break_down_data_no_sub = []

# grab notna results
error_thresh = abs(combined_per_ns_df_no_sub["exp DDG (kcal/mol)"] - combined_per_ns_df_no_sub[f"All samples DDG"]) < 1
error_thresh_df = combined_per_ns_df_no_sub[error_thresh].copy(deep=True).reset_index(drop=True)
sample_data = {"no": len(error_thresh_df)}
for stat in ["MUE", "rho"]:
    s = stats.bootstrap_statistic(
        y_true=error_thresh_df["exp DDG (kcal/mol)"],
        y_pred=error_thresh_df[f"All samples DDG"],
        dy_true=error_thresh_df["exp dDDG (kcal/mol)"],
        dy_pred=error_thresh_df[f"All samples dDDG"],
        statistic=stat,
    )
    sample_data[stat] = s["mle"]
break_down_data_no_sub.append(sample_data)
pd.DataFrame(break_down_data_no_sub)