From a2eceafdd893707e112c331f95552cb4d18f590a Mon Sep 17 00:00:00 2001
From: siemdejong <28396796+siemdejong@users.noreply.github.com>
Date: Mon, 12 Aug 2024 10:42:45 +0200
Subject: [PATCH 1/5] fix: change Subplot to Axes

---
 dabest/plot_tools.py         | 36 ++++++++++++++++++------------------
 nbs/API/plot_tools.ipynb     | 36 ++++++++++++++++++------------------
 nbs/tests/test_plot_tools.py |  2 +-
 settings.ini                 |  2 +-
 4 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py
index 65fea009..ece9b6e4 100644
--- a/dabest/plot_tools.py
+++ b/dabest/plot_tools.py
@@ -783,7 +783,7 @@ def swarmplot(
     data: pd.DataFrame,
     x: str,
     y: str,
-    ax: axes.Subplot,
+    ax: axes.Axes,
     order: List = None,
     hue: str = None,
     palette: Union[Iterable, str] = "black",
@@ -806,8 +806,8 @@ def swarmplot(
         The column in the DataFrame to be used as the x-axis.
     y : str
         The column in the DataFrame to be used as the y-axis.
-    ax : axes._subplots.Subplot | axes._axes.Axes
-        Matplotlib AxesSubplot object for which the plot would be drawn on. Default is None.
+    ax : axes.Axes
+        Matplotlib axes.Axes object for which the plot would be drawn on. Default is None.
     order : List
         The order in which x-axis categories should be displayed. Default is None.
     hue : str
@@ -832,8 +832,8 @@ def swarmplot(
 
     Returns
     -------
-    axes._subplots.Subplot | axes._axes.Axes
-        Matplotlib AxesSubplot object for which the swarm plot has been drawn on.
+    axes.Axes
+        Matplotlib axes.Axes object for which the swarm plot has been drawn on.
     """
     s = SwarmPlot(data, x, y, ax, order, hue, palette, zorder, size, side, jitter)
     ax = s.plot(is_drop_gutter, gutter_limit, ax, **kwargs)
@@ -846,7 +846,7 @@ def __init__(
         data: pd.DataFrame,
         x: str,
         y: str,
-        ax: axes.Subplot,
+        ax: axes.Axes,
         order: List = None,
         hue: str = None,
         palette: Union[Iterable, str] = "black",
@@ -866,8 +866,8 @@ def __init__(
             The column in the DataFrame to be used as the x-axis.
         y : str
             The column in the DataFrame to be used as the y-axis.
-        ax : axes.Subplot
-            Matplotlib AxesSubplot object for which the plot would be drawn on.
+        ax : axes.Axes
+            Matplotlib axes.Axes object for which the plot would be drawn on.
         order : List
             The order in which x-axis categories should be displayed. Default is None.
         hue : str
@@ -954,7 +954,7 @@ def __init__(
         self.__dsize = dsize
 
     def _check_errors(
-        self, data: pd.DataFrame, ax: axes.Subplot, size: float, side: str
+        self, data: pd.DataFrame, ax: axes.Axes, size: float, side: str
     ) -> None:
         """
         Check the validity of input parameters. Raises exceptions if detected.
@@ -963,8 +963,8 @@ def _check_errors(
         ----------
         data : pd.Dataframe
             Input data used for generation of the swarmplot.
-        ax : axes.Subplot
-            Matplotlib AxesSubplot object for which the plot would be drawn on.
+        ax : axes.Axes
+            Matplotlib axes.Axes object for which the plot would be drawn on.
         size : int | float
             scalar value determining size of dots of the swarmplot.
         side: str
@@ -977,9 +977,9 @@ def _check_errors(
         # Type enforcement
         if not isinstance(data, pd.DataFrame):
             raise ValueError("`data` must be a Pandas Dataframe.")
-        if not isinstance(ax, (axes._subplots.Subplot, axes._axes.Axes)):
+        if not isinstance(ax, axes.Axes):
             raise ValueError(
-                f"`ax` must be a Matplotlib AxesSubplot. The current `ax` is a {type(ax)}"
+                f"`ax` must be a Matplotlib axes.Axes. The current `ax` is a {type(ax)}"
             )
         if not isinstance(size, (int, float)):
             raise ValueError("`size` must be a scalar or float.")
@@ -1239,8 +1239,8 @@ def _adjust_gutter_points(
         return points_data
 
     def plot(
-        self, is_drop_gutter: bool, gutter_limit: float, ax: axes.Subplot, **kwargs
-    ) -> axes.Subplot:
+        self, is_drop_gutter: bool, gutter_limit: float, ax: axes.Axes, **kwargs
+    ) -> axes.Axes:
         """
         Generate a swarm plot.
 
@@ -1250,15 +1250,15 @@ def plot(
             If True, drop points that hit the gutters; otherwise, readjust them.
         gutter_limit : int | float
             The limit for points hitting the gutters.
-        ax : axes.Subplot
+        ax : axes.Axes
             The matplotlib figure object to which the swarm plot will be added.
         **kwargs:
             Additional keyword arguments to be passed to the scatter plot.
 
         Returns
         -------
-        axes.Subplot:
-            The matplotlib figure containing the swarm plot.
+        axes.Axes:
+            The matplotlib axes containing the swarm plot.
         """
         # Input validation
         if not isinstance(is_drop_gutter, bool):
diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb
index 7187025b..a6f4b536 100644
--- a/nbs/API/plot_tools.ipynb
+++ b/nbs/API/plot_tools.ipynb
@@ -846,7 +846,7 @@
     "    data: pd.DataFrame,\n",
     "    x: str,\n",
     "    y: str,\n",
-    "    ax: axes.Subplot,\n",
+    "    ax: axes.Axes,\n",
     "    order: List = None,\n",
     "    hue: str = None,\n",
     "    palette: Union[Iterable, str] = \"black\",\n",
@@ -869,8 +869,8 @@
     "        The column in the DataFrame to be used as the x-axis.\n",
     "    y : str\n",
     "        The column in the DataFrame to be used as the y-axis.\n",
-    "    ax : axes._subplots.Subplot | axes._axes.Axes\n",
-    "        Matplotlib AxesSubplot object for which the plot would be drawn on. Default is None.\n",
+    "    ax : axes.Axes\n",
+    "        Matplotlib axes.Axes object for which the plot would be drawn on. Default is None.\n",
     "    order : List\n",
     "        The order in which x-axis categories should be displayed. Default is None.\n",
     "    hue : str\n",
@@ -895,8 +895,8 @@
     "\n",
     "    Returns\n",
     "    -------\n",
-    "    axes._subplots.Subplot | axes._axes.Axes\n",
-    "        Matplotlib AxesSubplot object for which the swarm plot has been drawn on.\n",
+    "    axes.Axes\n",
+    "        Matplotlib axes.Axes object for which the swarm plot has been drawn on.\n",
     "    \"\"\"\n",
     "    s = SwarmPlot(data, x, y, ax, order, hue, palette, zorder, size, side, jitter)\n",
     "    ax = s.plot(is_drop_gutter, gutter_limit, ax, **kwargs)\n",
@@ -909,7 +909,7 @@
     "        data: pd.DataFrame,\n",
     "        x: str,\n",
     "        y: str,\n",
-    "        ax: axes.Subplot,\n",
+    "        ax: axes.Axes,\n",
     "        order: List = None,\n",
     "        hue: str = None,\n",
     "        palette: Union[Iterable, str] = \"black\",\n",
@@ -929,8 +929,8 @@
     "            The column in the DataFrame to be used as the x-axis.\n",
     "        y : str\n",
     "            The column in the DataFrame to be used as the y-axis.\n",
-    "        ax : axes.Subplot\n",
-    "            Matplotlib AxesSubplot object for which the plot would be drawn on.\n",
+    "        ax : axes.Axes\n",
+    "            Matplotlib axes.Axes object for which the plot would be drawn on.\n",
     "        order : List\n",
     "            The order in which x-axis categories should be displayed. Default is None.\n",
     "        hue : str\n",
@@ -1017,7 +1017,7 @@
     "        self.__dsize = dsize\n",
     "\n",
     "    def _check_errors(\n",
-    "        self, data: pd.DataFrame, ax: axes.Subplot, size: float, side: str\n",
+    "        self, data: pd.DataFrame, ax: axes.Axes, size: float, side: str\n",
     "    ) -> None:\n",
     "        \"\"\"\n",
     "        Check the validity of input parameters. Raises exceptions if detected.\n",
@@ -1026,8 +1026,8 @@
     "        ----------\n",
     "        data : pd.Dataframe\n",
     "            Input data used for generation of the swarmplot.\n",
-    "        ax : axes.Subplot\n",
-    "            Matplotlib AxesSubplot object for which the plot would be drawn on.\n",
+    "        ax : axes.Axes\n",
+    "            Matplotlib axes.Axes object for which the plot would be drawn on.\n",
     "        size : int | float\n",
     "            scalar value determining size of dots of the swarmplot.\n",
     "        side: str\n",
@@ -1040,9 +1040,9 @@
     "        # Type enforcement\n",
     "        if not isinstance(data, pd.DataFrame):\n",
     "            raise ValueError(\"`data` must be a Pandas Dataframe.\")\n",
-    "        if not isinstance(ax, (axes._subplots.Subplot, axes._axes.Axes)):\n",
+    "        if not isinstance(ax, axes.Axes):\n",
     "            raise ValueError(\n",
-    "                f\"`ax` must be a Matplotlib AxesSubplot. The current `ax` is a {type(ax)}\"\n",
+    "                f\"`ax` must be a Matplotlib axes.Axes. The current `ax` is a {type(ax)}\"\n",
     "            )\n",
     "        if not isinstance(size, (int, float)):\n",
     "            raise ValueError(\"`size` must be a scalar or float.\")\n",
@@ -1302,8 +1302,8 @@
     "        return points_data\n",
     "\n",
     "    def plot(\n",
-    "        self, is_drop_gutter: bool, gutter_limit: float, ax: axes.Subplot, **kwargs\n",
-    "    ) -> axes.Subplot:\n",
+    "        self, is_drop_gutter: bool, gutter_limit: float, ax: axes.Axes, **kwargs\n",
+    "    ) -> axes.Axes:\n",
     "        \"\"\"\n",
     "        Generate a swarm plot.\n",
     "\n",
@@ -1313,15 +1313,15 @@
     "            If True, drop points that hit the gutters; otherwise, readjust them.\n",
     "        gutter_limit : int | float\n",
     "            The limit for points hitting the gutters.\n",
-    "        ax : axes.Subplot\n",
+    "        ax : axes.Axes\n",
     "            The matplotlib figure object to which the swarm plot will be added.\n",
     "        **kwargs:\n",
     "            Additional keyword arguments to be passed to the scatter plot.\n",
     "\n",
     "        Returns\n",
     "        -------\n",
-    "        axes.Subplot:\n",
-    "            The matplotlib figure containing the swarm plot.\n",
+    "        axes.Axes:\n",
+    "            The matplotlib axes containing the swarm plot.\n",
     "        \"\"\"\n",
     "        # Input validation\n",
     "        if not isinstance(is_drop_gutter, bool):\n",
diff --git a/nbs/tests/test_plot_tools.py b/nbs/tests/test_plot_tools.py
index b47dba7f..0b40ee25 100644
--- a/nbs/tests/test_plot_tools.py
+++ b/nbs/tests/test_plot_tools.py
@@ -84,7 +84,7 @@ def test_check_data_matches_labels():
     ("data", None, "`data` must be a Pandas Dataframe.", ValueError),
     ("x", None, "`x` must be a string.", ValueError),
     ("y", None, "`y` must be a string.", ValueError),
-    ("ax", None, "`ax` must be a Matplotlib AxesSubplot. The current `ax` is a <class 'NoneType'>", ValueError),
+    ("ax", None, "`ax` must be a Matplotlib axes.Axes. The current `ax` is a <class 'NoneType'>", ValueError),
     ("order", 5, "`order` must be either an Iterable or None.", ValueError),
     ("hue", 5, "`hue` must be either a string or None.", ValueError),
     ("palette", None, "`palette` must be either a string indicating a color name or an Iterable.", ValueError),
diff --git a/settings.ini b/settings.ini
index 5c22d22d..d750d22e 100644
--- a/settings.ini
+++ b/settings.ini
@@ -37,7 +37,7 @@ language = English
 status = 3
 user = acclab
 
-requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.6.3 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt
+requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.8 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt
 dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1
 
 ### Optional ###

From 3164aee6b56e261c6e6f56d31727ff84ea2792c3 Mon Sep 17 00:00:00 2001
From: siemdejong <28396796+siemdejong@users.noreply.github.com>
Date: Mon, 12 Aug 2024 12:35:34 +0200
Subject: [PATCH 2/5] fix: detail matplotlib version number

---
 settings.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/settings.ini b/settings.ini
index d750d22e..1f64360f 100644
--- a/settings.ini
+++ b/settings.ini
@@ -37,7 +37,7 @@ language = English
 status = 3
 user = acclab
 
-requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.8 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt
+requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt
 dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1
 
 ### Optional ###

From a27f6c1eab3925261aaf30e257b12998f7a9bda8 Mon Sep 17 00:00:00 2001
From: Jacobluke- <javcobll@gmail.com>
Date: Mon, 16 Sep 2024 17:13:40 +0800
Subject: [PATCH 3/5] Fix Upgrading Error

---
 .github/workflows/test-pytest.yaml |  2 +-
 dabest/_modidx.py                  |  4 +++-
 dabest/misc_tools.py               | 16 +++++++++++++++-
 dabest/plot_tools.py               |  9 +++++----
 dabest/plotter.py                  | 18 ++++++++++--------
 nbs/API/misc_tools.ipynb           | 16 +++++++++++++++-
 nbs/API/plot_tools.ipynb           |  9 +++++----
 nbs/API/plotter.ipynb              | 18 ++++++++++--------
 settings.ini                       |  4 ++--
 9 files changed, 66 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/test-pytest.yaml b/.github/workflows/test-pytest.yaml
index 599c62a6..344c88b7 100644
--- a/.github/workflows/test-pytest.yaml
+++ b/.github/workflows/test-pytest.yaml
@@ -8,7 +8,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: 3.8
+          python-version: 3.9
           cache: "pip"
           cache-dependency-path: settings.ini
       - name: Run pytest
diff --git a/dabest/_modidx.py b/dabest/_modidx.py
index 14bfa3da..c18ab957 100644
--- a/dabest/_modidx.py
+++ b/dabest/_modidx.py
@@ -65,7 +65,9 @@
                                                                               'dabest/forest_plot.py'),
                                     'dabest.forest_plot.forest_plot': ('API/forest_plot.html#forest_plot', 'dabest/forest_plot.py'),
                                     'dabest.forest_plot.load_plot_data': ('API/forest_plot.html#load_plot_data', 'dabest/forest_plot.py')},
-            'dabest.misc_tools': { 'dabest.misc_tools.get_varname': ('API/misc_tools.html#get_varname', 'dabest/misc_tools.py'),
+            'dabest.misc_tools': { 'dabest.misc_tools.get_unique_categories': ( 'API/misc_tools.html#get_unique_categories',
+                                                                                'dabest/misc_tools.py'),
+                                   'dabest.misc_tools.get_varname': ('API/misc_tools.html#get_varname', 'dabest/misc_tools.py'),
                                    'dabest.misc_tools.merge_two_dicts': ('API/misc_tools.html#merge_two_dicts', 'dabest/misc_tools.py'),
                                    'dabest.misc_tools.print_greeting': ('API/misc_tools.html#print_greeting', 'dabest/misc_tools.py'),
                                    'dabest.misc_tools.unpack_and_add': ('API/misc_tools.html#unpack_and_add', 'dabest/misc_tools.py')},
diff --git a/dabest/misc_tools.py b/dabest/misc_tools.py
index 7c5b2020..0a5c90bb 100644
--- a/dabest/misc_tools.py
+++ b/dabest/misc_tools.py
@@ -1,10 +1,12 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/misc_tools.ipynb.
 
 # %% auto 0
-__all__ = ['merge_two_dicts', 'unpack_and_add', 'print_greeting', 'get_varname']
+__all__ = ['merge_two_dicts', 'unpack_and_add', 'print_greeting', 'get_varname', 'get_unique_categories']
 
 # %% ../nbs/API/misc_tools.ipynb 4
 import datetime as dt
+import numpy as np
+import pandas as pd
 from numpy import repeat
 
 # %% ../nbs/API/misc_tools.ipynb 5
@@ -68,3 +70,15 @@ def get_varname(obj):
     if len(matching_vars) > 0:
         return matching_vars[0]
     return ""
+
+def get_unique_categories(names):
+    """
+    Extract unique categories from various input types.
+    """
+    if isinstance(names, np.ndarray):
+        return names  # numpy.unique() returns a sorted array
+    elif isinstance(names, (pd.Categorical, pd.Series)):
+        return names.cat.categories if hasattr(names, 'cat') else names.unique()
+    else:
+        # For dict_keys and other iterables
+        return np.unique(list(names))
diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py
index ece9b6e4..b6f0cfe0 100644
--- a/dabest/plot_tools.py
+++ b/dabest/plot_tools.py
@@ -1136,9 +1136,10 @@ def _swarm(
             raise ValueError("`dsize` must be a scalar or float.")
 
         # Sorting algorithm based off of: https://github.com/mgymrek/pybeeswarm
-        points_data = pd.DataFrame(
-            {"y": [yval * 1.0 / dsize for yval in values], "x": [0] * len(values)}
-        )
+        points_data = pd.DataFrame({
+            "y": [yval * 1.0 / dsize for yval in values],
+            "x": np.zeros(len(values), dtype=float)  # Initialize with float zeros
+        })
         for i in range(1, points_data.shape[0]):
             y_i = points_data["y"].values[i]
             points_placed = points_data[0:i]
@@ -1271,7 +1272,7 @@ def plot(
             0  # x-coordinate of center of each individual swarm of the swarm plot
         )
         x_tick_tabels = []
-        for group_i, values_i in self.__data_copy.groupby(self.__x):
+        for group_i, values_i in self.__data_copy.groupby(self.__x, observed=False):
             x_new = []
             values_i_y = values_i[self.__y]
             x_offset = self._swarm(
diff --git a/dabest/plotter.py b/dabest/plotter.py
index fcd65ee5..086db5b6 100644
--- a/dabest/plotter.py
+++ b/dabest/plotter.py
@@ -54,7 +54,7 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs):
         fontsize_contrastxlabel=12, fontsize_contrastylabel=12,
         fontsize_delta2label=12
     """
-    from .misc_tools import merge_two_dicts
+    from .misc_tools import merge_two_dicts, get_unique_categories
     from .plot_tools import (
         halfviolin,
         get_swarm_spans,
@@ -298,14 +298,16 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs):
                 raise ValueError(err1 + err2)
 
     if custom_pal is None and color_col is None:
+        categories = get_unique_categories(names)
+        
         swarm_colors = [sns.desaturate(c, swarm_desat) for c in unsat_colors]
-        plot_palette_raw = dict(zip(names.categories, swarm_colors))
-
         bar_color = [sns.desaturate(c, bar_desat) for c in unsat_colors]
-        plot_palette_bar = dict(zip(names.categories, bar_color))
-
         contrast_colors = [sns.desaturate(c, contrast_desat) for c in unsat_colors]
-        plot_palette_contrast = dict(zip(names.categories, contrast_colors))
+
+        
+        plot_palette_raw = dict(zip(categories, swarm_colors))
+        plot_palette_bar = dict(zip(categories, bar_color))
+        plot_palette_contrast = dict(zip(categories, contrast_colors))
 
         # For Sankey Diagram plot, no need to worry about the color, each bar will have the same two colors
         # default color palette will be set to "hls"
@@ -1081,10 +1083,10 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs):
             )
         elif effect_size_type == "median_diff":
             control_group_summary = (
-                plot_data.groupby(xvar).median().loc[current_control, yvar]
+                plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar]
             )
             test_group_summary = (
-                plot_data.groupby(xvar).median().loc[current_group, yvar]
+                plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar]
             )
 
         if swarm_ylim is None:
diff --git a/nbs/API/misc_tools.ipynb b/nbs/API/misc_tools.ipynb
index 0395a57c..3eec5de2 100644
--- a/nbs/API/misc_tools.ipynb
+++ b/nbs/API/misc_tools.ipynb
@@ -55,6 +55,8 @@
    "source": [
     "#| export\n",
     "import datetime as dt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
     "from numpy import repeat"
    ]
   },
@@ -125,7 +127,19 @@
     "    matching_vars = [k for k, v in globals().items() if v is obj]\n",
     "    if len(matching_vars) > 0:\n",
     "        return matching_vars[0]\n",
-    "    return \"\""
+    "    return \"\"\n",
+    "\n",
+    "def get_unique_categories(names):\n",
+    "    \"\"\"\n",
+    "    Extract unique categories from various input types.\n",
+    "    \"\"\"\n",
+    "    if isinstance(names, np.ndarray):\n",
+    "        return names  # numpy.unique() returns a sorted array\n",
+    "    elif isinstance(names, (pd.Categorical, pd.Series)):\n",
+    "        return names.cat.categories if hasattr(names, 'cat') else names.unique()\n",
+    "    else:\n",
+    "        # For dict_keys and other iterables\n",
+    "        return np.unique(list(names))"
    ]
   }
  ],
diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb
index a6f4b536..4932e7e9 100644
--- a/nbs/API/plot_tools.ipynb
+++ b/nbs/API/plot_tools.ipynb
@@ -1199,9 +1199,10 @@
     "            raise ValueError(\"`dsize` must be a scalar or float.\")\n",
     "\n",
     "        # Sorting algorithm based off of: https://github.com/mgymrek/pybeeswarm\n",
-    "        points_data = pd.DataFrame(\n",
-    "            {\"y\": [yval * 1.0 / dsize for yval in values], \"x\": [0] * len(values)}\n",
-    "        )\n",
+    "        points_data = pd.DataFrame({\n",
+    "            \"y\": [yval * 1.0 / dsize for yval in values],\n",
+    "            \"x\": np.zeros(len(values), dtype=float)  # Initialize with float zeros\n",
+    "        })\n",
     "        for i in range(1, points_data.shape[0]):\n",
     "            y_i = points_data[\"y\"].values[i]\n",
     "            points_placed = points_data[0:i]\n",
@@ -1334,7 +1335,7 @@
     "            0  # x-coordinate of center of each individual swarm of the swarm plot\n",
     "        )\n",
     "        x_tick_tabels = []\n",
-    "        for group_i, values_i in self.__data_copy.groupby(self.__x):\n",
+    "        for group_i, values_i in self.__data_copy.groupby(self.__x, observed=False):\n",
     "            x_new = []\n",
     "            values_i_y = values_i[self.__y]\n",
     "            x_offset = self._swarm(\n",
diff --git a/nbs/API/plotter.ipynb b/nbs/API/plotter.ipynb
index 7e054ea4..127fa24d 100644
--- a/nbs/API/plotter.ipynb
+++ b/nbs/API/plotter.ipynb
@@ -113,7 +113,7 @@
     "        fontsize_contrastxlabel=12, fontsize_contrastylabel=12,\n",
     "        fontsize_delta2label=12\n",
     "    \"\"\"\n",
-    "    from .misc_tools import merge_two_dicts\n",
+    "    from .misc_tools import merge_two_dicts, get_unique_categories\n",
     "    from .plot_tools import (\n",
     "        halfviolin,\n",
     "        get_swarm_spans,\n",
@@ -357,14 +357,16 @@
     "                raise ValueError(err1 + err2)\n",
     "\n",
     "    if custom_pal is None and color_col is None:\n",
+    "        categories = get_unique_categories(names)\n",
+    "        \n",
     "        swarm_colors = [sns.desaturate(c, swarm_desat) for c in unsat_colors]\n",
-    "        plot_palette_raw = dict(zip(names.categories, swarm_colors))\n",
-    "\n",
     "        bar_color = [sns.desaturate(c, bar_desat) for c in unsat_colors]\n",
-    "        plot_palette_bar = dict(zip(names.categories, bar_color))\n",
-    "\n",
     "        contrast_colors = [sns.desaturate(c, contrast_desat) for c in unsat_colors]\n",
-    "        plot_palette_contrast = dict(zip(names.categories, contrast_colors))\n",
+    "\n",
+    "        \n",
+    "        plot_palette_raw = dict(zip(categories, swarm_colors))\n",
+    "        plot_palette_bar = dict(zip(categories, bar_color))\n",
+    "        plot_palette_contrast = dict(zip(categories, contrast_colors))\n",
     "\n",
     "        # For Sankey Diagram plot, no need to worry about the color, each bar will have the same two colors\n",
     "        # default color palette will be set to \"hls\"\n",
@@ -1140,10 +1142,10 @@
     "            )\n",
     "        elif effect_size_type == \"median_diff\":\n",
     "            control_group_summary = (\n",
-    "                plot_data.groupby(xvar).median().loc[current_control, yvar]\n",
+    "                plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar]\n",
     "            )\n",
     "            test_group_summary = (\n",
-    "                plot_data.groupby(xvar).median().loc[current_group, yvar]\n",
+    "                plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar]\n",
     "            )\n",
     "\n",
     "        if swarm_ylim is None:\n",
diff --git a/settings.ini b/settings.ini
index 1f64360f..449f5aa8 100644
--- a/settings.ini
+++ b/settings.ini
@@ -3,7 +3,7 @@
 repo = DABEST-python
 lib_name = dabest
 version = 2024.03.29
-min_python = 3.8
+min_python = 3.9
 license = apache2
 
 ### nbdev ###
@@ -37,7 +37,7 @@ language = English
 status = 3
 user = acclab
 
-requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt
+requirements = fastcore pandas~=1.5.3 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt
 dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1
 
 ### Optional ###

From 17d10144d8690bda3bd69d65c54baf37a23f22bd Mon Sep 17 00:00:00 2001
From: Jacobluke- <javcobll@gmail.com>
Date: Mon, 16 Sep 2024 17:19:16 +0800
Subject: [PATCH 4/5] nbdev adding description for api files

---
 dabest/_api.py                             | 2 ++
 dabest/_dabest_object.py                   | 2 ++
 dabest/_delta_objects.py                   | 2 ++
 dabest/_effsize_objects.py                 | 2 ++
 dabest/_stats_tools/confint_1group.py      | 2 ++
 dabest/_stats_tools/confint_2group_diff.py | 2 ++
 dabest/_stats_tools/effsize.py             | 2 ++
 dabest/forest_plot.py                      | 2 ++
 dabest/misc_tools.py                       | 2 ++
 dabest/plot_tools.py                       | 2 ++
 dabest/plotter.py                          | 2 ++
 11 files changed, 22 insertions(+)

diff --git a/dabest/_api.py b/dabest/_api.py
index 7c8d0eac..a6399385 100644
--- a/dabest/_api.py
+++ b/dabest/_api.py
@@ -1,3 +1,5 @@
+"""Loading data and relevant groups"""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/load.ipynb.
 
 # %% auto 0
diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py
index 3f618a2a..ec917b03 100644
--- a/dabest/_dabest_object.py
+++ b/dabest/_dabest_object.py
@@ -1,3 +1,5 @@
+"""Main class for estimating statistics and generating plots."""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/dabest_object.ipynb.
 
 # %% auto 0
diff --git a/dabest/_delta_objects.py b/dabest/_delta_objects.py
index 30c44895..1827c1b2 100644
--- a/dabest/_delta_objects.py
+++ b/dabest/_delta_objects.py
@@ -1,3 +1,5 @@
+"""Auxiliary delta classes for estimating statistics and generating plots."""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/delta_objects.ipynb.
 
 # %% auto 0
diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py
index f8bf3846..355ef971 100644
--- a/dabest/_effsize_objects.py
+++ b/dabest/_effsize_objects.py
@@ -1,3 +1,5 @@
+"""The auxiliary classes involved in the computations of bootstrapped effect sizes."""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/effsize_objects.ipynb.
 
 # %% auto 0
diff --git a/dabest/_stats_tools/confint_1group.py b/dabest/_stats_tools/confint_1group.py
index a9b0beb1..744a7142 100644
--- a/dabest/_stats_tools/confint_1group.py
+++ b/dabest/_stats_tools/confint_1group.py
@@ -1,3 +1,5 @@
+"""A range of functions to compute bootstraps for a single sample."""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/API/confint_1group.ipynb.
 
 # %% auto 0
diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py
index 3b07eb96..c599e178 100644
--- a/dabest/_stats_tools/confint_2group_diff.py
+++ b/dabest/_stats_tools/confint_2group_diff.py
@@ -1,3 +1,5 @@
+"""A range of functions to compute bootstraps for the mean difference"""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/API/confint_2group_diff.ipynb.
 
 # %% auto 0
diff --git a/dabest/_stats_tools/effsize.py b/dabest/_stats_tools/effsize.py
index 32f965b1..f5a0d4fc 100644
--- a/dabest/_stats_tools/effsize.py
+++ b/dabest/_stats_tools/effsize.py
@@ -1,3 +1,5 @@
+"""A range of functions to compute various effect sizes."""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/API/effsize.ipynb.
 
 # %% ../../nbs/API/effsize.ipynb 4
diff --git a/dabest/forest_plot.py b/dabest/forest_plot.py
index 7d29464f..583ece0c 100644
--- a/dabest/forest_plot.py
+++ b/dabest/forest_plot.py
@@ -1,3 +1,5 @@
+"""Creating forest plots from contrast objects."""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/forest_plot.ipynb.
 
 # %% auto 0
diff --git a/dabest/misc_tools.py b/dabest/misc_tools.py
index 0a5c90bb..cb3984fe 100644
--- a/dabest/misc_tools.py
+++ b/dabest/misc_tools.py
@@ -1,3 +1,5 @@
+"""Convenience functions that don't directly deal with plotting or bootstrap computations are placed here."""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/misc_tools.ipynb.
 
 # %% auto 0
diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py
index b6f0cfe0..0d4a5991 100644
--- a/dabest/plot_tools.py
+++ b/dabest/plot_tools.py
@@ -1,3 +1,5 @@
+"""A set of convenience functions used for producing plots in `dabest`."""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/plot_tools.ipynb.
 
 # %% ../nbs/API/plot_tools.ipynb 2
diff --git a/dabest/plotter.py b/dabest/plotter.py
index 086db5b6..a1de3589 100644
--- a/dabest/plotter.py
+++ b/dabest/plotter.py
@@ -1,3 +1,5 @@
+"""Creating estimation plots."""
+
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/plotter.ipynb.
 
 # %% auto 0

From 9a42975084ee6967c0f73c8afb23c6f076b61b2b Mon Sep 17 00:00:00 2001
From: Jacobluke- <javcobll@gmail.com>
Date: Tue, 17 Sep 2024 14:28:56 +0800
Subject: [PATCH 5/5] Update pandas to 2.1.4

---
 dabest/_dabest_object.py    |  2 +-
 dabest/plot_tools.py        | 10 +++++-----
 dabest/plotter.py           | 12 ++++++------
 nbs/API/dabest_object.ipynb |  2 +-
 nbs/API/plot_tools.ipynb    | 10 +++++-----
 nbs/API/plotter.ipynb       | 12 ++++++------
 settings.ini                |  2 +-
 7 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py
index ec917b03..035ef996 100644
--- a/dabest/_dabest_object.py
+++ b/dabest/_dabest_object.py
@@ -667,7 +667,7 @@ def _get_plot_data(self, x, y, all_plot_groups):
                 all_plot_groups, ordered=True, inplace=True
             )
         else:
-            plot_data.loc[:, self.__xvar] = pd.Categorical(
+            plot_data[self.__xvar] = pd.Categorical(
                 plot_data[self.__xvar], categories=all_plot_groups, ordered=True
             )
 
diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py
index 0d4a5991..af413e8f 100644
--- a/dabest/plot_tools.py
+++ b/dabest/plot_tools.py
@@ -117,15 +117,15 @@ def error_bar(
     else:
         group_order = pd.unique(data[x])
 
-    means = data.groupby(x)[y].mean().reindex(index=group_order)
+    means = data.groupby(x, observed=False)[y].mean().reindex(index=group_order)
 
     if method in ["proportional_error_bar", "sankey_error_bar"]:
         g = lambda x: np.sqrt(
             (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x))
         )
-        sd = data.groupby(x)[y].apply(g)
+        sd = data.groupby(x, observed=False)[y].apply(g)
     else:
-        sd = data.groupby(x)[y].std().reindex(index=group_order)
+        sd = data.groupby(x, observed=False)[y].std().reindex(index=group_order)
 
     lower_sd = means - sd
     upper_sd = means + sd
@@ -133,9 +133,9 @@ def error_bar(
     if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any():
         kwargs["clip_on"] = True
 
-    medians = data.groupby(x)[y].median().reindex(index=group_order)
+    medians = data.groupby(x, observed=False)[y].median().reindex(index=group_order)
     quantiles = (
-        data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)
+        data.groupby(x, observed=False)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)
     )
     lower_quartiles = quantiles[0.25]
     upper_quartiles = quantiles[0.75]
diff --git a/dabest/plotter.py b/dabest/plotter.py
index a1de3589..e797c3fc 100644
--- a/dabest/plotter.py
+++ b/dabest/plotter.py
@@ -780,7 +780,7 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs):
             )
 
     # Add the counts to the rawdata axes xticks.
-    counts = plot_data.groupby(xvar).count()[yvar]
+    counts = plot_data.groupby(xvar, observed=False).count()[yvar]
     ticks_with_counts = []
     ticks_loc = rawdata_axes.get_xticks()
     rawdata_axes.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(ticks_loc))
@@ -1076,19 +1076,19 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs):
         # Check that the effect size is within the swarm ylims.
         if effect_size_type in ["mean_diff", "cohens_d", "hedges_g", "cohens_h"]:
             control_group_summary = (
-                plot_data.groupby(xvar)
+                plot_data.groupby(xvar, observed=False)
                 .mean(numeric_only=True)
                 .loc[current_control, yvar]
             )
             test_group_summary = (
-                plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar]
+                plot_data.groupby(xvar, observed=False).mean(numeric_only=True).loc[current_group, yvar]
             )
         elif effect_size_type == "median_diff":
             control_group_summary = (
-                plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar]
+                plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_control, yvar]
             )
             test_group_summary = (
-                plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar]
+                plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_group, yvar]
             )
 
         if swarm_ylim is None:
@@ -1132,7 +1132,7 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs):
                 pooled_sd = stds[0]
 
             if effect_size_type == "hedges_g":
-                gby_count = plot_data.groupby(xvar).count()
+                gby_count = plot_data.groupby(xvar, observed=False).count()
                 len_control = gby_count.loc[current_control, yvar]
                 len_test = gby_count.loc[current_group, yvar]
 
diff --git a/nbs/API/dabest_object.ipynb b/nbs/API/dabest_object.ipynb
index 776b4fb1..c51e480f 100644
--- a/nbs/API/dabest_object.ipynb
+++ b/nbs/API/dabest_object.ipynb
@@ -735,7 +735,7 @@
     "                all_plot_groups, ordered=True, inplace=True\n",
     "            )\n",
     "        else:\n",
-    "            plot_data.loc[:, self.__xvar] = pd.Categorical(\n",
+    "            plot_data[self.__xvar] = pd.Categorical(\n",
     "                plot_data[self.__xvar], categories=all_plot_groups, ordered=True\n",
     "            )\n",
     "\n",
diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb
index 4932e7e9..351c7dad 100644
--- a/nbs/API/plot_tools.ipynb
+++ b/nbs/API/plot_tools.ipynb
@@ -170,15 +170,15 @@
     "    else:\n",
     "        group_order = pd.unique(data[x])\n",
     "\n",
-    "    means = data.groupby(x)[y].mean().reindex(index=group_order)\n",
+    "    means = data.groupby(x, observed=False)[y].mean().reindex(index=group_order)\n",
     "\n",
     "    if method in [\"proportional_error_bar\", \"sankey_error_bar\"]:\n",
     "        g = lambda x: np.sqrt(\n",
     "            (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x))\n",
     "        )\n",
-    "        sd = data.groupby(x)[y].apply(g)\n",
+    "        sd = data.groupby(x, observed=False)[y].apply(g)\n",
     "    else:\n",
-    "        sd = data.groupby(x)[y].std().reindex(index=group_order)\n",
+    "        sd = data.groupby(x, observed=False)[y].std().reindex(index=group_order)\n",
     "\n",
     "    lower_sd = means - sd\n",
     "    upper_sd = means + sd\n",
@@ -186,9 +186,9 @@
     "    if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any():\n",
     "        kwargs[\"clip_on\"] = True\n",
     "\n",
-    "    medians = data.groupby(x)[y].median().reindex(index=group_order)\n",
+    "    medians = data.groupby(x, observed=False)[y].median().reindex(index=group_order)\n",
     "    quantiles = (\n",
-    "        data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)\n",
+    "        data.groupby(x, observed=False)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)\n",
     "    )\n",
     "    lower_quartiles = quantiles[0.25]\n",
     "    upper_quartiles = quantiles[0.75]\n",
diff --git a/nbs/API/plotter.ipynb b/nbs/API/plotter.ipynb
index 127fa24d..75b81c4c 100644
--- a/nbs/API/plotter.ipynb
+++ b/nbs/API/plotter.ipynb
@@ -837,7 +837,7 @@
     "            )\n",
     "\n",
     "    # Add the counts to the rawdata axes xticks.\n",
-    "    counts = plot_data.groupby(xvar).count()[yvar]\n",
+    "    counts = plot_data.groupby(xvar, observed=False).count()[yvar]\n",
     "    ticks_with_counts = []\n",
     "    ticks_loc = rawdata_axes.get_xticks()\n",
     "    rawdata_axes.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(ticks_loc))\n",
@@ -1133,19 +1133,19 @@
     "        # Check that the effect size is within the swarm ylims.\n",
     "        if effect_size_type in [\"mean_diff\", \"cohens_d\", \"hedges_g\", \"cohens_h\"]:\n",
     "            control_group_summary = (\n",
-    "                plot_data.groupby(xvar)\n",
+    "                plot_data.groupby(xvar, observed=False)\n",
     "                .mean(numeric_only=True)\n",
     "                .loc[current_control, yvar]\n",
     "            )\n",
     "            test_group_summary = (\n",
-    "                plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar]\n",
+    "                plot_data.groupby(xvar, observed=False).mean(numeric_only=True).loc[current_group, yvar]\n",
     "            )\n",
     "        elif effect_size_type == \"median_diff\":\n",
     "            control_group_summary = (\n",
-    "                plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar]\n",
+    "                plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_control, yvar]\n",
     "            )\n",
     "            test_group_summary = (\n",
-    "                plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar]\n",
+    "                plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_group, yvar]\n",
     "            )\n",
     "\n",
     "        if swarm_ylim is None:\n",
@@ -1189,7 +1189,7 @@
     "                pooled_sd = stds[0]\n",
     "\n",
     "            if effect_size_type == \"hedges_g\":\n",
-    "                gby_count = plot_data.groupby(xvar).count()\n",
+    "                gby_count = plot_data.groupby(xvar, observed=False).count()\n",
     "                len_control = gby_count.loc[current_control, yvar]\n",
     "                len_test = gby_count.loc[current_group, yvar]\n",
     "\n",
diff --git a/settings.ini b/settings.ini
index 449f5aa8..a6b36da8 100644
--- a/settings.ini
+++ b/settings.ini
@@ -37,7 +37,7 @@ language = English
 status = 3
 user = acclab
 
-requirements = fastcore pandas~=1.5.3 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt
+requirements = fastcore pandas~=2.1.4 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt
 dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1
 
 ### Optional ###