From 59bd92a7ee96be0e6daa5f122dc4f470d66e9a97 Mon Sep 17 00:00:00 2001 From: Johannes Hentschel Date: Sun, 3 Dec 2023 23:44:42 +0100 Subject: [PATCH] fix: do not convert "count" column to "Int64" by default (because of Plotly bug); instead convert integer columns when making ranking tables to prevent counts coming as floats --- src/dimcat/data/resources/results.py | 16 ++++++++++++++-- src/dimcat/steps/analyzers/counters.py | 3 ++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/dimcat/data/resources/results.py b/src/dimcat/data/resources/results.py index 0a3ac95e..b1eb711d 100644 --- a/src/dimcat/data/resources/results.py +++ b/src/dimcat/data/resources/results.py @@ -558,7 +558,11 @@ def make_ranking_table( """ - def make_table(df, drop_columns=None): + def make_table( + df, + drop_columns: Optional[List[str]] = None, + make_int_nullable: bool = False, + ): if top_k and top_k > 0: ranking = df.nlargest(top_k, sort_column, keep=keep) else: @@ -567,6 +571,14 @@ def make_table(df, drop_columns=None): if drop_columns: ranking = ranking.drop(columns=drop_columns) ranking.index = (ranking.index + 1).rename("rank") + if make_int_nullable: + conversion = { + col: "Int64" + for col, dtype in ranking.dtypes.items() + if pd.api.types.is_integer_dtype(dtype) + } + if conversion: + ranking = ranking.astype(conversion) return ranking if sort_order == SortOrder.DESCENDING: @@ -586,7 +598,7 @@ def make_table(df, drop_columns=None): if not group_cols: return make_table(df) ranking_groups = { - group: make_table(df, group_cols + drop_cols) + group: make_table(df, group_cols + drop_cols, make_int_nullable=True) for group, df in df.groupby(group_cols) } return pd.concat(ranking_groups, names=group_cols, axis=1) diff --git a/src/dimcat/steps/analyzers/counters.py b/src/dimcat/steps/analyzers/counters.py index 437d8325..697872a8 100644 --- a/src/dimcat/steps/analyzers/counters.py +++ b/src/dimcat/steps/analyzers/counters.py @@ -47,7 +47,8 @@ def groupby_apply(self, feature: Feature, groupby: SomeSeries = None, **kwargs): ): groupby.append(feature.formatted_column) result = feature.groupby(groupby).size() - result = result.astype("Int64").to_frame(self._dimension_column_name) + result = result.to_frame(self._dimension_column_name) + return result def resource_name_factory(self, resource: DimcatResource) -> str: