## CASP16 FlowDock vs. MULTICOM_ligand Structure Prediction Results Plotting

#### Import packages

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

#### Make plot

In [None]:
# CASP16 Structure Prediction Results #

# Prepare FlowDock and MULTICOM_ligand's results
flowdock_top_ranked_results_df = pd.read_csv("casp16_flowdock_bust_results.csv")

multicom_ligand_results_df = pd.read_csv("casp16_ligand_pose_results.csv")
multicom_ligand_top_ranked_results_df = multicom_ligand_results_df[
    (multicom_ligand_results_df["group"] == "LG207")
    & (multicom_ligand_results_df["model"].str.contains("_1"))
]

# Find common targets between FlowDock and MULTICOM_ligand's completed predictions
common_targets = set(flowdock_top_ranked_results_df["mol_id"]).intersection(
    set(multicom_ligand_top_ranked_results_df["target"])
)
flowdock_top_ranked_results_df = flowdock_top_ranked_results_df[
    flowdock_top_ranked_results_df["mol_id"].isin(common_targets)
]
multicom_ligand_top_ranked_results_df = multicom_ligand_top_ranked_results_df[
    multicom_ligand_top_ranked_results_df["target"].isin(common_targets)
]

# Plot FlowDock vs. MULTICOM_ligand's results as a histogram comparison
n = len(flowdock_top_ranked_results_df)

sns.set_theme(style="whitegrid")
plt.figure(figsize=(8, 6))
ax = sns.histplot(
    flowdock_top_ranked_results_df["rmsd"],
    bins=100,
    kde=True,
    color="orange",
    label="FlowDock (w/o relaxation)",
)
ax = sns.histplot(
    multicom_ligand_top_ranked_results_df["rmsd"],
    bins=100,
    kde=True,
    color="skyblue",
    label="MULTICOM_ligand",
)
plt.xlabel("RMSD")
plt.ylabel("Frequency")
plt.xlim(0, 10)
plt.legend(title="Method")
plt.title("")
plt.savefig(
    f"casp16_flowdock_vs_multicom_ligand_structure_prediction_results_comparison_n={n}.png"
)
plt.show()

#### Make plot (for multi-ligands only)

In [None]:
# Find results for single-ligand targets only
common_multi_ligand_targets = common_targets.intersection(
    set(
        flowdock_top_ranked_results_df["mol_id"]
        .value_counts()[flowdock_top_ranked_results_df["mol_id"].value_counts() > 1]
        .index
    )
)
flowdock_top_ranked_multi_ligand_results_df = flowdock_top_ranked_results_df[
    flowdock_top_ranked_results_df["mol_id"].isin(common_multi_ligand_targets)
]
multicom_ligand_top_ranked_multi_ligand_results_df = multicom_ligand_top_ranked_results_df[
    multicom_ligand_top_ranked_results_df["target"].isin(common_multi_ligand_targets)
]

# Plot FlowDock vs. MULTICOM_ligand's multi-ligand results as a histogram comparison
n = len(flowdock_top_ranked_multi_ligand_results_df)

sns.set_theme(style="whitegrid")
plt.figure(figsize=(8, 6))
ax = sns.histplot(
    flowdock_top_ranked_multi_ligand_results_df["rmsd"],
    bins=100,
    kde=True,
    color="orange",
    label="FlowDock (w/o relaxation)",
)
ax = sns.histplot(
    multicom_ligand_top_ranked_multi_ligand_results_df["rmsd"],
    bins=100,
    kde=True,
    color="skyblue",
    label="MULTICOM_ligand",
)
plt.xlabel("RMSD")
plt.ylabel("Frequency")
plt.xlim(0, 10)
plt.legend(title="Method")
plt.title("")
plt.savefig(
    f"casp16_flowdock_vs_multicom_ligand_multi_ligand_structure_prediction_results_comparison_n={n}.png"
)
plt.show()