From a2eceafdd893707e112c331f95552cb4d18f590a Mon Sep 17 00:00:00 2001 From: siemdejong <28396796+siemdejong@users.noreply.github.com> Date: Mon, 12 Aug 2024 10:42:45 +0200 Subject: [PATCH 1/5] fix: change Subplot to Axes --- dabest/plot_tools.py | 36 ++++++++++++++++++------------------ nbs/API/plot_tools.ipynb | 36 ++++++++++++++++++------------------ nbs/tests/test_plot_tools.py | 2 +- settings.ini | 2 +- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index 65fea009..ece9b6e4 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -783,7 +783,7 @@ def swarmplot( data: pd.DataFrame, x: str, y: str, - ax: axes.Subplot, + ax: axes.Axes, order: List = None, hue: str = None, palette: Union[Iterable, str] = "black", @@ -806,8 +806,8 @@ def swarmplot( The column in the DataFrame to be used as the x-axis. y : str The column in the DataFrame to be used as the y-axis. - ax : axes._subplots.Subplot | axes._axes.Axes - Matplotlib AxesSubplot object for which the plot would be drawn on. Default is None. + ax : axes.Axes + Matplotlib axes.Axes object for which the plot would be drawn on. Default is None. order : List The order in which x-axis categories should be displayed. Default is None. hue : str @@ -832,8 +832,8 @@ def swarmplot( Returns ------- - axes._subplots.Subplot | axes._axes.Axes - Matplotlib AxesSubplot object for which the swarm plot has been drawn on. + axes.Axes + Matplotlib axes.Axes object for which the swarm plot has been drawn on. """ s = SwarmPlot(data, x, y, ax, order, hue, palette, zorder, size, side, jitter) ax = s.plot(is_drop_gutter, gutter_limit, ax, **kwargs) @@ -846,7 +846,7 @@ def __init__( data: pd.DataFrame, x: str, y: str, - ax: axes.Subplot, + ax: axes.Axes, order: List = None, hue: str = None, palette: Union[Iterable, str] = "black", @@ -866,8 +866,8 @@ def __init__( The column in the DataFrame to be used as the x-axis. y : str The column in the DataFrame to be used as the y-axis. - ax : axes.Subplot - Matplotlib AxesSubplot object for which the plot would be drawn on. + ax : axes.Axes + Matplotlib axes.Axes object for which the plot would be drawn on. order : List The order in which x-axis categories should be displayed. Default is None. hue : str @@ -954,7 +954,7 @@ def __init__( self.__dsize = dsize def _check_errors( - self, data: pd.DataFrame, ax: axes.Subplot, size: float, side: str + self, data: pd.DataFrame, ax: axes.Axes, size: float, side: str ) -> None: """ Check the validity of input parameters. Raises exceptions if detected. @@ -963,8 +963,8 @@ def _check_errors( ---------- data : pd.Dataframe Input data used for generation of the swarmplot. - ax : axes.Subplot - Matplotlib AxesSubplot object for which the plot would be drawn on. + ax : axes.Axes + Matplotlib axes.Axes object for which the plot would be drawn on. size : int | float scalar value determining size of dots of the swarmplot. side: str @@ -977,9 +977,9 @@ def _check_errors( # Type enforcement if not isinstance(data, pd.DataFrame): raise ValueError("`data` must be a Pandas Dataframe.") - if not isinstance(ax, (axes._subplots.Subplot, axes._axes.Axes)): + if not isinstance(ax, axes.Axes): raise ValueError( - f"`ax` must be a Matplotlib AxesSubplot. The current `ax` is a {type(ax)}" + f"`ax` must be a Matplotlib axes.Axes. The current `ax` is a {type(ax)}" ) if not isinstance(size, (int, float)): raise ValueError("`size` must be a scalar or float.") @@ -1239,8 +1239,8 @@ def _adjust_gutter_points( return points_data def plot( - self, is_drop_gutter: bool, gutter_limit: float, ax: axes.Subplot, **kwargs - ) -> axes.Subplot: + self, is_drop_gutter: bool, gutter_limit: float, ax: axes.Axes, **kwargs + ) -> axes.Axes: """ Generate a swarm plot. @@ -1250,15 +1250,15 @@ def plot( If True, drop points that hit the gutters; otherwise, readjust them. gutter_limit : int | float The limit for points hitting the gutters. - ax : axes.Subplot + ax : axes.Axes The matplotlib figure object to which the swarm plot will be added. **kwargs: Additional keyword arguments to be passed to the scatter plot. Returns ------- - axes.Subplot: - The matplotlib figure containing the swarm plot. + axes.Axes: + The matplotlib axes containing the swarm plot. """ # Input validation if not isinstance(is_drop_gutter, bool): diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb index 7187025b..a6f4b536 100644 --- a/nbs/API/plot_tools.ipynb +++ b/nbs/API/plot_tools.ipynb @@ -846,7 +846,7 @@ " data: pd.DataFrame,\n", " x: str,\n", " y: str,\n", - " ax: axes.Subplot,\n", + " ax: axes.Axes,\n", " order: List = None,\n", " hue: str = None,\n", " palette: Union[Iterable, str] = \"black\",\n", @@ -869,8 +869,8 @@ " The column in the DataFrame to be used as the x-axis.\n", " y : str\n", " The column in the DataFrame to be used as the y-axis.\n", - " ax : axes._subplots.Subplot | axes._axes.Axes\n", - " Matplotlib AxesSubplot object for which the plot would be drawn on. Default is None.\n", + " ax : axes.Axes\n", + " Matplotlib axes.Axes object for which the plot would be drawn on. Default is None.\n", " order : List\n", " The order in which x-axis categories should be displayed. Default is None.\n", " hue : str\n", @@ -895,8 +895,8 @@ "\n", " Returns\n", " -------\n", - " axes._subplots.Subplot | axes._axes.Axes\n", - " Matplotlib AxesSubplot object for which the swarm plot has been drawn on.\n", + " axes.Axes\n", + " Matplotlib axes.Axes object for which the swarm plot has been drawn on.\n", " \"\"\"\n", " s = SwarmPlot(data, x, y, ax, order, hue, palette, zorder, size, side, jitter)\n", " ax = s.plot(is_drop_gutter, gutter_limit, ax, **kwargs)\n", @@ -909,7 +909,7 @@ " data: pd.DataFrame,\n", " x: str,\n", " y: str,\n", - " ax: axes.Subplot,\n", + " ax: axes.Axes,\n", " order: List = None,\n", " hue: str = None,\n", " palette: Union[Iterable, str] = \"black\",\n", @@ -929,8 +929,8 @@ " The column in the DataFrame to be used as the x-axis.\n", " y : str\n", " The column in the DataFrame to be used as the y-axis.\n", - " ax : axes.Subplot\n", - " Matplotlib AxesSubplot object for which the plot would be drawn on.\n", + " ax : axes.Axes\n", + " Matplotlib axes.Axes object for which the plot would be drawn on.\n", " order : List\n", " The order in which x-axis categories should be displayed. Default is None.\n", " hue : str\n", @@ -1017,7 +1017,7 @@ " self.__dsize = dsize\n", "\n", " def _check_errors(\n", - " self, data: pd.DataFrame, ax: axes.Subplot, size: float, side: str\n", + " self, data: pd.DataFrame, ax: axes.Axes, size: float, side: str\n", " ) -> None:\n", " \"\"\"\n", " Check the validity of input parameters. Raises exceptions if detected.\n", @@ -1026,8 +1026,8 @@ " ----------\n", " data : pd.Dataframe\n", " Input data used for generation of the swarmplot.\n", - " ax : axes.Subplot\n", - " Matplotlib AxesSubplot object for which the plot would be drawn on.\n", + " ax : axes.Axes\n", + " Matplotlib axes.Axes object for which the plot would be drawn on.\n", " size : int | float\n", " scalar value determining size of dots of the swarmplot.\n", " side: str\n", @@ -1040,9 +1040,9 @@ " # Type enforcement\n", " if not isinstance(data, pd.DataFrame):\n", " raise ValueError(\"`data` must be a Pandas Dataframe.\")\n", - " if not isinstance(ax, (axes._subplots.Subplot, axes._axes.Axes)):\n", + " if not isinstance(ax, axes.Axes):\n", " raise ValueError(\n", - " f\"`ax` must be a Matplotlib AxesSubplot. The current `ax` is a {type(ax)}\"\n", + " f\"`ax` must be a Matplotlib axes.Axes. The current `ax` is a {type(ax)}\"\n", " )\n", " if not isinstance(size, (int, float)):\n", " raise ValueError(\"`size` must be a scalar or float.\")\n", @@ -1302,8 +1302,8 @@ " return points_data\n", "\n", " def plot(\n", - " self, is_drop_gutter: bool, gutter_limit: float, ax: axes.Subplot, **kwargs\n", - " ) -> axes.Subplot:\n", + " self, is_drop_gutter: bool, gutter_limit: float, ax: axes.Axes, **kwargs\n", + " ) -> axes.Axes:\n", " \"\"\"\n", " Generate a swarm plot.\n", "\n", @@ -1313,15 +1313,15 @@ " If True, drop points that hit the gutters; otherwise, readjust them.\n", " gutter_limit : int | float\n", " The limit for points hitting the gutters.\n", - " ax : axes.Subplot\n", + " ax : axes.Axes\n", " The matplotlib figure object to which the swarm plot will be added.\n", " **kwargs:\n", " Additional keyword arguments to be passed to the scatter plot.\n", "\n", " Returns\n", " -------\n", - " axes.Subplot:\n", - " The matplotlib figure containing the swarm plot.\n", + " axes.Axes:\n", + " The matplotlib axes containing the swarm plot.\n", " \"\"\"\n", " # Input validation\n", " if not isinstance(is_drop_gutter, bool):\n", diff --git a/nbs/tests/test_plot_tools.py b/nbs/tests/test_plot_tools.py index b47dba7f..0b40ee25 100644 --- a/nbs/tests/test_plot_tools.py +++ b/nbs/tests/test_plot_tools.py @@ -84,7 +84,7 @@ def test_check_data_matches_labels(): ("data", None, "`data` must be a Pandas Dataframe.", ValueError), ("x", None, "`x` must be a string.", ValueError), ("y", None, "`y` must be a string.", ValueError), - ("ax", None, "`ax` must be a Matplotlib AxesSubplot. The current `ax` is a ", ValueError), + ("ax", None, "`ax` must be a Matplotlib axes.Axes. The current `ax` is a ", ValueError), ("order", 5, "`order` must be either an Iterable or None.", ValueError), ("hue", 5, "`hue` must be either a string or None.", ValueError), ("palette", None, "`palette` must be either a string indicating a color name or an Iterable.", ValueError), diff --git a/settings.ini b/settings.ini index 5c22d22d..d750d22e 100644 --- a/settings.ini +++ b/settings.ini @@ -37,7 +37,7 @@ language = English status = 3 user = acclab -requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.6.3 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt +requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.8 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1 ### Optional ### From 3164aee6b56e261c6e6f56d31727ff84ea2792c3 Mon Sep 17 00:00:00 2001 From: siemdejong <28396796+siemdejong@users.noreply.github.com> Date: Mon, 12 Aug 2024 12:35:34 +0200 Subject: [PATCH 2/5] fix: detail matplotlib version number --- settings.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.ini b/settings.ini index d750d22e..1f64360f 100644 --- a/settings.ini +++ b/settings.ini @@ -37,7 +37,7 @@ language = English status = 3 user = acclab -requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.8 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt +requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1 ### Optional ### From a27f6c1eab3925261aaf30e257b12998f7a9bda8 Mon Sep 17 00:00:00 2001 From: Jacobluke- Date: Mon, 16 Sep 2024 17:13:40 +0800 Subject: [PATCH 3/5] Fix Upgrading Error --- .github/workflows/test-pytest.yaml | 2 +- dabest/_modidx.py | 4 +++- dabest/misc_tools.py | 16 +++++++++++++++- dabest/plot_tools.py | 9 +++++---- dabest/plotter.py | 18 ++++++++++-------- nbs/API/misc_tools.ipynb | 16 +++++++++++++++- nbs/API/plot_tools.ipynb | 9 +++++---- nbs/API/plotter.ipynb | 18 ++++++++++-------- settings.ini | 4 ++-- 9 files changed, 66 insertions(+), 30 deletions(-) diff --git a/.github/workflows/test-pytest.yaml b/.github/workflows/test-pytest.yaml index 599c62a6..344c88b7 100644 --- a/.github/workflows/test-pytest.yaml +++ b/.github/workflows/test-pytest.yaml @@ -8,7 +8,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.9 cache: "pip" cache-dependency-path: settings.ini - name: Run pytest diff --git a/dabest/_modidx.py b/dabest/_modidx.py index 14bfa3da..c18ab957 100644 --- a/dabest/_modidx.py +++ b/dabest/_modidx.py @@ -65,7 +65,9 @@ 'dabest/forest_plot.py'), 'dabest.forest_plot.forest_plot': ('API/forest_plot.html#forest_plot', 'dabest/forest_plot.py'), 'dabest.forest_plot.load_plot_data': ('API/forest_plot.html#load_plot_data', 'dabest/forest_plot.py')}, - 'dabest.misc_tools': { 'dabest.misc_tools.get_varname': ('API/misc_tools.html#get_varname', 'dabest/misc_tools.py'), + 'dabest.misc_tools': { 'dabest.misc_tools.get_unique_categories': ( 'API/misc_tools.html#get_unique_categories', + 'dabest/misc_tools.py'), + 'dabest.misc_tools.get_varname': ('API/misc_tools.html#get_varname', 'dabest/misc_tools.py'), 'dabest.misc_tools.merge_two_dicts': ('API/misc_tools.html#merge_two_dicts', 'dabest/misc_tools.py'), 'dabest.misc_tools.print_greeting': ('API/misc_tools.html#print_greeting', 'dabest/misc_tools.py'), 'dabest.misc_tools.unpack_and_add': ('API/misc_tools.html#unpack_and_add', 'dabest/misc_tools.py')}, diff --git a/dabest/misc_tools.py b/dabest/misc_tools.py index 7c5b2020..0a5c90bb 100644 --- a/dabest/misc_tools.py +++ b/dabest/misc_tools.py @@ -1,10 +1,12 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/misc_tools.ipynb. # %% auto 0 -__all__ = ['merge_two_dicts', 'unpack_and_add', 'print_greeting', 'get_varname'] +__all__ = ['merge_two_dicts', 'unpack_and_add', 'print_greeting', 'get_varname', 'get_unique_categories'] # %% ../nbs/API/misc_tools.ipynb 4 import datetime as dt +import numpy as np +import pandas as pd from numpy import repeat # %% ../nbs/API/misc_tools.ipynb 5 @@ -68,3 +70,15 @@ def get_varname(obj): if len(matching_vars) > 0: return matching_vars[0] return "" + +def get_unique_categories(names): + """ + Extract unique categories from various input types. + """ + if isinstance(names, np.ndarray): + return names # numpy.unique() returns a sorted array + elif isinstance(names, (pd.Categorical, pd.Series)): + return names.cat.categories if hasattr(names, 'cat') else names.unique() + else: + # For dict_keys and other iterables + return np.unique(list(names)) diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index ece9b6e4..b6f0cfe0 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -1136,9 +1136,10 @@ def _swarm( raise ValueError("`dsize` must be a scalar or float.") # Sorting algorithm based off of: https://github.com/mgymrek/pybeeswarm - points_data = pd.DataFrame( - {"y": [yval * 1.0 / dsize for yval in values], "x": [0] * len(values)} - ) + points_data = pd.DataFrame({ + "y": [yval * 1.0 / dsize for yval in values], + "x": np.zeros(len(values), dtype=float) # Initialize with float zeros + }) for i in range(1, points_data.shape[0]): y_i = points_data["y"].values[i] points_placed = points_data[0:i] @@ -1271,7 +1272,7 @@ def plot( 0 # x-coordinate of center of each individual swarm of the swarm plot ) x_tick_tabels = [] - for group_i, values_i in self.__data_copy.groupby(self.__x): + for group_i, values_i in self.__data_copy.groupby(self.__x, observed=False): x_new = [] values_i_y = values_i[self.__y] x_offset = self._swarm( diff --git a/dabest/plotter.py b/dabest/plotter.py index fcd65ee5..086db5b6 100644 --- a/dabest/plotter.py +++ b/dabest/plotter.py @@ -54,7 +54,7 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs): fontsize_contrastxlabel=12, fontsize_contrastylabel=12, fontsize_delta2label=12 """ - from .misc_tools import merge_two_dicts + from .misc_tools import merge_two_dicts, get_unique_categories from .plot_tools import ( halfviolin, get_swarm_spans, @@ -298,14 +298,16 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs): raise ValueError(err1 + err2) if custom_pal is None and color_col is None: + categories = get_unique_categories(names) + swarm_colors = [sns.desaturate(c, swarm_desat) for c in unsat_colors] - plot_palette_raw = dict(zip(names.categories, swarm_colors)) - bar_color = [sns.desaturate(c, bar_desat) for c in unsat_colors] - plot_palette_bar = dict(zip(names.categories, bar_color)) - contrast_colors = [sns.desaturate(c, contrast_desat) for c in unsat_colors] - plot_palette_contrast = dict(zip(names.categories, contrast_colors)) + + + plot_palette_raw = dict(zip(categories, swarm_colors)) + plot_palette_bar = dict(zip(categories, bar_color)) + plot_palette_contrast = dict(zip(categories, contrast_colors)) # For Sankey Diagram plot, no need to worry about the color, each bar will have the same two colors # default color palette will be set to "hls" @@ -1081,10 +1083,10 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs): ) elif effect_size_type == "median_diff": control_group_summary = ( - plot_data.groupby(xvar).median().loc[current_control, yvar] + plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar] ) test_group_summary = ( - plot_data.groupby(xvar).median().loc[current_group, yvar] + plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar] ) if swarm_ylim is None: diff --git a/nbs/API/misc_tools.ipynb b/nbs/API/misc_tools.ipynb index 0395a57c..3eec5de2 100644 --- a/nbs/API/misc_tools.ipynb +++ b/nbs/API/misc_tools.ipynb @@ -55,6 +55,8 @@ "source": [ "#| export\n", "import datetime as dt\n", + "import numpy as np\n", + "import pandas as pd\n", "from numpy import repeat" ] }, @@ -125,7 +127,19 @@ " matching_vars = [k for k, v in globals().items() if v is obj]\n", " if len(matching_vars) > 0:\n", " return matching_vars[0]\n", - " return \"\"" + " return \"\"\n", + "\n", + "def get_unique_categories(names):\n", + " \"\"\"\n", + " Extract unique categories from various input types.\n", + " \"\"\"\n", + " if isinstance(names, np.ndarray):\n", + " return names # numpy.unique() returns a sorted array\n", + " elif isinstance(names, (pd.Categorical, pd.Series)):\n", + " return names.cat.categories if hasattr(names, 'cat') else names.unique()\n", + " else:\n", + " # For dict_keys and other iterables\n", + " return np.unique(list(names))" ] } ], diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb index a6f4b536..4932e7e9 100644 --- a/nbs/API/plot_tools.ipynb +++ b/nbs/API/plot_tools.ipynb @@ -1199,9 +1199,10 @@ " raise ValueError(\"`dsize` must be a scalar or float.\")\n", "\n", " # Sorting algorithm based off of: https://github.com/mgymrek/pybeeswarm\n", - " points_data = pd.DataFrame(\n", - " {\"y\": [yval * 1.0 / dsize for yval in values], \"x\": [0] * len(values)}\n", - " )\n", + " points_data = pd.DataFrame({\n", + " \"y\": [yval * 1.0 / dsize for yval in values],\n", + " \"x\": np.zeros(len(values), dtype=float) # Initialize with float zeros\n", + " })\n", " for i in range(1, points_data.shape[0]):\n", " y_i = points_data[\"y\"].values[i]\n", " points_placed = points_data[0:i]\n", @@ -1334,7 +1335,7 @@ " 0 # x-coordinate of center of each individual swarm of the swarm plot\n", " )\n", " x_tick_tabels = []\n", - " for group_i, values_i in self.__data_copy.groupby(self.__x):\n", + " for group_i, values_i in self.__data_copy.groupby(self.__x, observed=False):\n", " x_new = []\n", " values_i_y = values_i[self.__y]\n", " x_offset = self._swarm(\n", diff --git a/nbs/API/plotter.ipynb b/nbs/API/plotter.ipynb index 7e054ea4..127fa24d 100644 --- a/nbs/API/plotter.ipynb +++ b/nbs/API/plotter.ipynb @@ -113,7 +113,7 @@ " fontsize_contrastxlabel=12, fontsize_contrastylabel=12,\n", " fontsize_delta2label=12\n", " \"\"\"\n", - " from .misc_tools import merge_two_dicts\n", + " from .misc_tools import merge_two_dicts, get_unique_categories\n", " from .plot_tools import (\n", " halfviolin,\n", " get_swarm_spans,\n", @@ -357,14 +357,16 @@ " raise ValueError(err1 + err2)\n", "\n", " if custom_pal is None and color_col is None:\n", + " categories = get_unique_categories(names)\n", + " \n", " swarm_colors = [sns.desaturate(c, swarm_desat) for c in unsat_colors]\n", - " plot_palette_raw = dict(zip(names.categories, swarm_colors))\n", - "\n", " bar_color = [sns.desaturate(c, bar_desat) for c in unsat_colors]\n", - " plot_palette_bar = dict(zip(names.categories, bar_color))\n", - "\n", " contrast_colors = [sns.desaturate(c, contrast_desat) for c in unsat_colors]\n", - " plot_palette_contrast = dict(zip(names.categories, contrast_colors))\n", + "\n", + " \n", + " plot_palette_raw = dict(zip(categories, swarm_colors))\n", + " plot_palette_bar = dict(zip(categories, bar_color))\n", + " plot_palette_contrast = dict(zip(categories, contrast_colors))\n", "\n", " # For Sankey Diagram plot, no need to worry about the color, each bar will have the same two colors\n", " # default color palette will be set to \"hls\"\n", @@ -1140,10 +1142,10 @@ " )\n", " elif effect_size_type == \"median_diff\":\n", " control_group_summary = (\n", - " plot_data.groupby(xvar).median().loc[current_control, yvar]\n", + " plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar]\n", " )\n", " test_group_summary = (\n", - " plot_data.groupby(xvar).median().loc[current_group, yvar]\n", + " plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar]\n", " )\n", "\n", " if swarm_ylim is None:\n", diff --git a/settings.ini b/settings.ini index 1f64360f..449f5aa8 100644 --- a/settings.ini +++ b/settings.ini @@ -3,7 +3,7 @@ repo = DABEST-python lib_name = dabest version = 2024.03.29 -min_python = 3.8 +min_python = 3.9 license = apache2 ### nbdev ### @@ -37,7 +37,7 @@ language = English status = 3 user = acclab -requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt +requirements = fastcore pandas~=1.5.3 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1 ### Optional ### From 17d10144d8690bda3bd69d65c54baf37a23f22bd Mon Sep 17 00:00:00 2001 From: Jacobluke- Date: Mon, 16 Sep 2024 17:19:16 +0800 Subject: [PATCH 4/5] nbdev adding description for api files --- dabest/_api.py | 2 ++ dabest/_dabest_object.py | 2 ++ dabest/_delta_objects.py | 2 ++ dabest/_effsize_objects.py | 2 ++ dabest/_stats_tools/confint_1group.py | 2 ++ dabest/_stats_tools/confint_2group_diff.py | 2 ++ dabest/_stats_tools/effsize.py | 2 ++ dabest/forest_plot.py | 2 ++ dabest/misc_tools.py | 2 ++ dabest/plot_tools.py | 2 ++ dabest/plotter.py | 2 ++ 11 files changed, 22 insertions(+) diff --git a/dabest/_api.py b/dabest/_api.py index 7c8d0eac..a6399385 100644 --- a/dabest/_api.py +++ b/dabest/_api.py @@ -1,3 +1,5 @@ +"""Loading data and relevant groups""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/load.ipynb. # %% auto 0 diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py index 3f618a2a..ec917b03 100644 --- a/dabest/_dabest_object.py +++ b/dabest/_dabest_object.py @@ -1,3 +1,5 @@ +"""Main class for estimating statistics and generating plots.""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/dabest_object.ipynb. # %% auto 0 diff --git a/dabest/_delta_objects.py b/dabest/_delta_objects.py index 30c44895..1827c1b2 100644 --- a/dabest/_delta_objects.py +++ b/dabest/_delta_objects.py @@ -1,3 +1,5 @@ +"""Auxiliary delta classes for estimating statistics and generating plots.""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/delta_objects.ipynb. # %% auto 0 diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py index f8bf3846..355ef971 100644 --- a/dabest/_effsize_objects.py +++ b/dabest/_effsize_objects.py @@ -1,3 +1,5 @@ +"""The auxiliary classes involved in the computations of bootstrapped effect sizes.""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/effsize_objects.ipynb. # %% auto 0 diff --git a/dabest/_stats_tools/confint_1group.py b/dabest/_stats_tools/confint_1group.py index a9b0beb1..744a7142 100644 --- a/dabest/_stats_tools/confint_1group.py +++ b/dabest/_stats_tools/confint_1group.py @@ -1,3 +1,5 @@ +"""A range of functions to compute bootstraps for a single sample.""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/API/confint_1group.ipynb. # %% auto 0 diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py index 3b07eb96..c599e178 100644 --- a/dabest/_stats_tools/confint_2group_diff.py +++ b/dabest/_stats_tools/confint_2group_diff.py @@ -1,3 +1,5 @@ +"""A range of functions to compute bootstraps for the mean difference""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/API/confint_2group_diff.ipynb. # %% auto 0 diff --git a/dabest/_stats_tools/effsize.py b/dabest/_stats_tools/effsize.py index 32f965b1..f5a0d4fc 100644 --- a/dabest/_stats_tools/effsize.py +++ b/dabest/_stats_tools/effsize.py @@ -1,3 +1,5 @@ +"""A range of functions to compute various effect sizes.""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/API/effsize.ipynb. # %% ../../nbs/API/effsize.ipynb 4 diff --git a/dabest/forest_plot.py b/dabest/forest_plot.py index 7d29464f..583ece0c 100644 --- a/dabest/forest_plot.py +++ b/dabest/forest_plot.py @@ -1,3 +1,5 @@ +"""Creating forest plots from contrast objects.""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/forest_plot.ipynb. # %% auto 0 diff --git a/dabest/misc_tools.py b/dabest/misc_tools.py index 0a5c90bb..cb3984fe 100644 --- a/dabest/misc_tools.py +++ b/dabest/misc_tools.py @@ -1,3 +1,5 @@ +"""Convenience functions that don't directly deal with plotting or bootstrap computations are placed here.""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/misc_tools.ipynb. # %% auto 0 diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index b6f0cfe0..0d4a5991 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -1,3 +1,5 @@ +"""A set of convenience functions used for producing plots in `dabest`.""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/plot_tools.ipynb. # %% ../nbs/API/plot_tools.ipynb 2 diff --git a/dabest/plotter.py b/dabest/plotter.py index 086db5b6..a1de3589 100644 --- a/dabest/plotter.py +++ b/dabest/plotter.py @@ -1,3 +1,5 @@ +"""Creating estimation plots.""" + # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/plotter.ipynb. # %% auto 0 From 9a42975084ee6967c0f73c8afb23c6f076b61b2b Mon Sep 17 00:00:00 2001 From: Jacobluke- Date: Tue, 17 Sep 2024 14:28:56 +0800 Subject: [PATCH 5/5] Update pandas to 2.1.4 --- dabest/_dabest_object.py | 2 +- dabest/plot_tools.py | 10 +++++----- dabest/plotter.py | 12 ++++++------ nbs/API/dabest_object.ipynb | 2 +- nbs/API/plot_tools.ipynb | 10 +++++----- nbs/API/plotter.ipynb | 12 ++++++------ settings.ini | 2 +- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py index ec917b03..035ef996 100644 --- a/dabest/_dabest_object.py +++ b/dabest/_dabest_object.py @@ -667,7 +667,7 @@ def _get_plot_data(self, x, y, all_plot_groups): all_plot_groups, ordered=True, inplace=True ) else: - plot_data.loc[:, self.__xvar] = pd.Categorical( + plot_data[self.__xvar] = pd.Categorical( plot_data[self.__xvar], categories=all_plot_groups, ordered=True ) diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index 0d4a5991..af413e8f 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -117,15 +117,15 @@ def error_bar( else: group_order = pd.unique(data[x]) - means = data.groupby(x)[y].mean().reindex(index=group_order) + means = data.groupby(x, observed=False)[y].mean().reindex(index=group_order) if method in ["proportional_error_bar", "sankey_error_bar"]: g = lambda x: np.sqrt( (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x)) ) - sd = data.groupby(x)[y].apply(g) + sd = data.groupby(x, observed=False)[y].apply(g) else: - sd = data.groupby(x)[y].std().reindex(index=group_order) + sd = data.groupby(x, observed=False)[y].std().reindex(index=group_order) lower_sd = means - sd upper_sd = means + sd @@ -133,9 +133,9 @@ def error_bar( if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any(): kwargs["clip_on"] = True - medians = data.groupby(x)[y].median().reindex(index=group_order) + medians = data.groupby(x, observed=False)[y].median().reindex(index=group_order) quantiles = ( - data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order) + data.groupby(x, observed=False)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order) ) lower_quartiles = quantiles[0.25] upper_quartiles = quantiles[0.75] diff --git a/dabest/plotter.py b/dabest/plotter.py index a1de3589..e797c3fc 100644 --- a/dabest/plotter.py +++ b/dabest/plotter.py @@ -780,7 +780,7 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs): ) # Add the counts to the rawdata axes xticks. - counts = plot_data.groupby(xvar).count()[yvar] + counts = plot_data.groupby(xvar, observed=False).count()[yvar] ticks_with_counts = [] ticks_loc = rawdata_axes.get_xticks() rawdata_axes.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(ticks_loc)) @@ -1076,19 +1076,19 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs): # Check that the effect size is within the swarm ylims. if effect_size_type in ["mean_diff", "cohens_d", "hedges_g", "cohens_h"]: control_group_summary = ( - plot_data.groupby(xvar) + plot_data.groupby(xvar, observed=False) .mean(numeric_only=True) .loc[current_control, yvar] ) test_group_summary = ( - plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar] + plot_data.groupby(xvar, observed=False).mean(numeric_only=True).loc[current_group, yvar] ) elif effect_size_type == "median_diff": control_group_summary = ( - plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar] + plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_control, yvar] ) test_group_summary = ( - plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar] + plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_group, yvar] ) if swarm_ylim is None: @@ -1132,7 +1132,7 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs): pooled_sd = stds[0] if effect_size_type == "hedges_g": - gby_count = plot_data.groupby(xvar).count() + gby_count = plot_data.groupby(xvar, observed=False).count() len_control = gby_count.loc[current_control, yvar] len_test = gby_count.loc[current_group, yvar] diff --git a/nbs/API/dabest_object.ipynb b/nbs/API/dabest_object.ipynb index 776b4fb1..c51e480f 100644 --- a/nbs/API/dabest_object.ipynb +++ b/nbs/API/dabest_object.ipynb @@ -735,7 +735,7 @@ " all_plot_groups, ordered=True, inplace=True\n", " )\n", " else:\n", - " plot_data.loc[:, self.__xvar] = pd.Categorical(\n", + " plot_data[self.__xvar] = pd.Categorical(\n", " plot_data[self.__xvar], categories=all_plot_groups, ordered=True\n", " )\n", "\n", diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb index 4932e7e9..351c7dad 100644 --- a/nbs/API/plot_tools.ipynb +++ b/nbs/API/plot_tools.ipynb @@ -170,15 +170,15 @@ " else:\n", " group_order = pd.unique(data[x])\n", "\n", - " means = data.groupby(x)[y].mean().reindex(index=group_order)\n", + " means = data.groupby(x, observed=False)[y].mean().reindex(index=group_order)\n", "\n", " if method in [\"proportional_error_bar\", \"sankey_error_bar\"]:\n", " g = lambda x: np.sqrt(\n", " (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x))\n", " )\n", - " sd = data.groupby(x)[y].apply(g)\n", + " sd = data.groupby(x, observed=False)[y].apply(g)\n", " else:\n", - " sd = data.groupby(x)[y].std().reindex(index=group_order)\n", + " sd = data.groupby(x, observed=False)[y].std().reindex(index=group_order)\n", "\n", " lower_sd = means - sd\n", " upper_sd = means + sd\n", @@ -186,9 +186,9 @@ " if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any():\n", " kwargs[\"clip_on\"] = True\n", "\n", - " medians = data.groupby(x)[y].median().reindex(index=group_order)\n", + " medians = data.groupby(x, observed=False)[y].median().reindex(index=group_order)\n", " quantiles = (\n", - " data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)\n", + " data.groupby(x, observed=False)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)\n", " )\n", " lower_quartiles = quantiles[0.25]\n", " upper_quartiles = quantiles[0.75]\n", diff --git a/nbs/API/plotter.ipynb b/nbs/API/plotter.ipynb index 127fa24d..75b81c4c 100644 --- a/nbs/API/plotter.ipynb +++ b/nbs/API/plotter.ipynb @@ -837,7 +837,7 @@ " )\n", "\n", " # Add the counts to the rawdata axes xticks.\n", - " counts = plot_data.groupby(xvar).count()[yvar]\n", + " counts = plot_data.groupby(xvar, observed=False).count()[yvar]\n", " ticks_with_counts = []\n", " ticks_loc = rawdata_axes.get_xticks()\n", " rawdata_axes.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(ticks_loc))\n", @@ -1133,19 +1133,19 @@ " # Check that the effect size is within the swarm ylims.\n", " if effect_size_type in [\"mean_diff\", \"cohens_d\", \"hedges_g\", \"cohens_h\"]:\n", " control_group_summary = (\n", - " plot_data.groupby(xvar)\n", + " plot_data.groupby(xvar, observed=False)\n", " .mean(numeric_only=True)\n", " .loc[current_control, yvar]\n", " )\n", " test_group_summary = (\n", - " plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar]\n", + " plot_data.groupby(xvar, observed=False).mean(numeric_only=True).loc[current_group, yvar]\n", " )\n", " elif effect_size_type == \"median_diff\":\n", " control_group_summary = (\n", - " plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar]\n", + " plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_control, yvar]\n", " )\n", " test_group_summary = (\n", - " plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar]\n", + " plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_group, yvar]\n", " )\n", "\n", " if swarm_ylim is None:\n", @@ -1189,7 +1189,7 @@ " pooled_sd = stds[0]\n", "\n", " if effect_size_type == \"hedges_g\":\n", - " gby_count = plot_data.groupby(xvar).count()\n", + " gby_count = plot_data.groupby(xvar, observed=False).count()\n", " len_control = gby_count.loc[current_control, yvar]\n", " len_test = gby_count.loc[current_group, yvar]\n", "\n", diff --git a/settings.ini b/settings.ini index 449f5aa8..a6b36da8 100644 --- a/settings.ini +++ b/settings.ini @@ -37,7 +37,7 @@ language = English status = 3 user = acclab -requirements = fastcore pandas~=1.5.3 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt +requirements = fastcore pandas~=2.1.4 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1 ### Optional ###