Skip to content

Commit

Permalink
fix: do not convert "count" column to "Int64" by default (because of …
Browse files Browse the repository at this point in the history
…Plotly bug); instead convert integer columns when making ranking tables to prevent counts coming as floats
  • Loading branch information
johentsch committed Dec 3, 2023
1 parent 55c6d54 commit 59bd92a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
16 changes: 14 additions & 2 deletions src/dimcat/data/resources/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,11 @@ def make_ranking_table(
"""

def make_table(df, drop_columns=None):
def make_table(
df,
drop_columns: Optional[List[str]] = None,
make_int_nullable: bool = False,
):
if top_k and top_k > 0:
ranking = df.nlargest(top_k, sort_column, keep=keep)
else:
Expand All @@ -567,6 +571,14 @@ def make_table(df, drop_columns=None):
if drop_columns:
ranking = ranking.drop(columns=drop_columns)
ranking.index = (ranking.index + 1).rename("rank")
if make_int_nullable:
conversion = {
col: "Int64"
for col, dtype in ranking.dtypes.items()
if pd.api.types.is_integer_dtype(dtype)
}
if conversion:
ranking = ranking.astype(conversion)
return ranking

if sort_order == SortOrder.DESCENDING:
Expand All @@ -586,7 +598,7 @@ def make_table(df, drop_columns=None):
if not group_cols:
return make_table(df)
ranking_groups = {
group: make_table(df, group_cols + drop_cols)
group: make_table(df, group_cols + drop_cols, make_int_nullable=True)
for group, df in df.groupby(group_cols)
}
return pd.concat(ranking_groups, names=group_cols, axis=1)
Expand Down
3 changes: 2 additions & 1 deletion src/dimcat/steps/analyzers/counters.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def groupby_apply(self, feature: Feature, groupby: SomeSeries = None, **kwargs):
):
groupby.append(feature.formatted_column)
result = feature.groupby(groupby).size()
result = result.astype("Int64").to_frame(self._dimension_column_name)
result = result.to_frame(self._dimension_column_name)

return result

def resource_name_factory(self, resource: DimcatResource) -> str:
Expand Down

0 comments on commit 59bd92a

Please sign in to comment.