diff --git a/CHANGELOG.md b/CHANGELOG.md index 59268de..29d4d3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +### 1.0.0 - 11/04/2025 + +**Changes**: + +- Added dot plots on top of bar plots for improved clarity in CASP15 results. + ### 0.7.1 - 08/11/2025 **Changes**: diff --git a/docs/source/conf.py b/docs/source/conf.py index 7b4a338..cdc4ec7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -9,7 +9,7 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "PoseBench" author = "Alex Morehead" -release = "0.7.1" +release = "1.0.0" copyright = f"{datetime.datetime.now().year}, {author}" # -- General configuration --------------------------------------------------- diff --git a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png index 3ba56d9..19f54e5 100644 Binary files a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png index dce2899..8e6ecf6 100644 Binary files a/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_inference_results_plotting.ipynb b/notebooks/casp15_inference_results_plotting.ipynb index f964c1c..67f3a28 100644 --- a/notebooks/casp15_inference_results_plotting.ipynb +++ b/notebooks/casp15_inference_results_plotting.ipynb @@ -27,6 +27,7 @@ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", + "from matplotlib.ticker import FuncFormatter\n", "\n", "from posebench.analysis.inference_analysis_casp import (\n", " CASP_BUST_TEST_COLUMNS,\n", @@ -304,7 +305,18 @@ " :param method: Method name.\n", " :return: Method category.\n", " \"\"\"\n", - " return method_category_mapping.get(method, \"DL-based blind\")" + " return method_category_mapping.get(method, \"DL-based blind\")\n", + "\n", + "\n", + "def percent_angstrom_formatter(x, pos):\n", + " \"\"\"\n", + " Format function for percent/angstrom axis.\n", + "\n", + " :param x: Value.\n", + " :param pos: Position.\n", + " :return: Formatted string.\n", + " \"\"\"\n", + " return f\"{x:.0f}% / Å\"" ] }, { @@ -799,9 +811,100 @@ " width=bar_width,\n", " )\n", "\n", + " # extract raw RMSD values for each method and condition\n", + " for method_idx, method in enumerate(method_mapping.values()):\n", + " # get unrelaxed RMSD values grouped by target\n", + " unrelaxed_rmsd_by_target = {}\n", + " relaxed_rmsd_by_target = {}\n", + "\n", + " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", + " # unrelaxed data\n", + " casp15_unrelaxed = (\n", + " globals()[f\"scoring_results_table_{repeat_index}\"][\n", + " (\n", + " globals()[f\"scoring_results_table_{repeat_index}\"][\"dataset\"]\n", + " == \"CASP15 set\"\n", + " )\n", + " & (\n", + " globals()[f\"scoring_results_table_{repeat_index}\"][\"post-processing\"]\n", + " == \"none\"\n", + " )\n", + " & (globals()[f\"scoring_results_table_{repeat_index}\"][\"method\"] == method)\n", + " ]\n", + " .groupby(\"target\")\n", + " .agg({\"rmsd\": \"mean\"})\n", + " )\n", + "\n", + " # relaxed data\n", + " casp15_relaxed = (\n", + " globals()[f\"scoring_results_table_{repeat_index}\"][\n", + " (\n", + " globals()[f\"scoring_results_table_{repeat_index}\"][\"dataset\"]\n", + " == \"CASP15 set\"\n", + " )\n", + " & (\n", + " globals()[f\"scoring_results_table_{repeat_index}\"][\"post-processing\"]\n", + " == \"energy minimization\"\n", + " )\n", + " & (globals()[f\"scoring_results_table_{repeat_index}\"][\"method\"] == method)\n", + " ]\n", + " .groupby(\"target\")\n", + " .agg({\"rmsd\": \"mean\"})\n", + " )\n", + "\n", + " # accumulate values by target\n", + " for target, rmsd_value in casp15_unrelaxed.iterrows():\n", + " if target not in unrelaxed_rmsd_by_target:\n", + " unrelaxed_rmsd_by_target[target] = []\n", + " unrelaxed_rmsd_by_target[target].append(rmsd_value[\"rmsd\"])\n", + "\n", + " for target, rmsd_value in casp15_relaxed.iterrows():\n", + " if target not in relaxed_rmsd_by_target:\n", + " relaxed_rmsd_by_target[target] = []\n", + " relaxed_rmsd_by_target[target].append(rmsd_value[\"rmsd\"])\n", + "\n", + " # calculate average RMSD across repeats for each target\n", + " unrelaxed_rmsd_averages = [\n", + " np.mean(values) for values in unrelaxed_rmsd_by_target.values()\n", + " ]\n", + " relaxed_rmsd_averages = [np.mean(values) for values in relaxed_rmsd_by_target.values()]\n", + "\n", + " # overlay unrelaxed RMSD points (averaged per target)\n", + " if len(unrelaxed_rmsd_averages) > 0:\n", + " # add small random jitter for better visibility when points overlap\n", + " x_positions = np.random.normal(r1[method_idx], 0.05, len(unrelaxed_rmsd_averages))\n", + " # clamp RMSD values at 100\n", + " clamped_rmsd = [min(val, 100) for val in unrelaxed_rmsd_averages]\n", + " axis.scatter(\n", + " x_positions,\n", + " clamped_rmsd,\n", + " alpha=0.6,\n", + " s=20,\n", + " color=\"darkred\",\n", + " edgecolors=\"black\",\n", + " linewidth=0.5,\n", + " zorder=10,\n", + " ) # higher zorder to appear on top\n", + "\n", + " # overlay relaxed RMSD points (averaged per target)\n", + " if len(relaxed_rmsd_averages) > 0:\n", + " x_positions = np.random.normal(r2[method_idx], 0.05, len(relaxed_rmsd_averages))\n", + " # clamp RMSD values at 100\n", + " clamped_rmsd = [min(val, 100) for val in relaxed_rmsd_averages]\n", + " axis.scatter(\n", + " x_positions,\n", + " clamped_rmsd,\n", + " alpha=0.6,\n", + " s=20,\n", + " color=\"purple\",\n", + " edgecolors=\"black\",\n", + " linewidth=0.5,\n", + " zorder=10,\n", + " )\n", + "\n", " # add labels, titles, ticks, etc.\n", " axis.set_xlabel(f\"{complex_type.title()}-ligand blind docking ({complex_license})\")\n", - " axis.set_ylabel(\"Percentage of predictions\")\n", + " axis.set_ylabel(\"Percentage of predictions / RMSD (Å)\")\n", " axis.set_xlim(1, 23 + 0.1)\n", " axis.set_ylim(0, 125)\n", "\n", @@ -809,7 +912,7 @@ " axis.bar_label(casp15_relaxed_rmsd_lt_2_bar, fmt=\"{:,.1f}\", label_type=\"center\")\n", " axis.bar_label(casp15_plif_wm_bar, fmt=\"{:,.1f}\", label_type=\"center\")\n", "\n", - " axis.yaxis.set_major_formatter(mtick.PercentFormatter())\n", + " axis.yaxis.set_major_formatter(FuncFormatter(percent_angstrom_formatter))\n", "\n", " axis.set_yticks([0, 20, 40, 60, 80, 100])\n", " axis.axhline(y=0, color=\"#EAEFF8\")\n", diff --git a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png index d8a201d..030471f 100644 Binary files a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png index 0049d6e..77ebba4 100644 Binary files a/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/pyproject.toml b/pyproject.toml index 72edc4a..dfa8f33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "posebench" -version = "0.7.1" +version = "1.0.0" description = "Comprehensive benchmarking of protein-ligand structure prediction methods" authors = [ { name = "Alex Morehead", email = "acmwhb@umsystem.edu" }