diff --git a/activitysim/abm/models/util/mode.py b/activitysim/abm/models/util/mode.py index 49776684a..d82dd09ea 100644 --- a/activitysim/abm/models/util/mode.py +++ b/activitysim/abm/models/util/mode.py @@ -88,7 +88,8 @@ def mode_choice_simulate( choices[mode_column_name] = choices[mode_column_name].map( dict(list(zip(list(range(len(alts))), alts))) ) - cat_type = pd.api.types.CategoricalDtype([""] + alts.tolist(), ordered=True) + # note: do we want to order the mode categories? + cat_type = pd.api.types.CategoricalDtype([""] + alts.tolist()) choices[mode_column_name] = choices[mode_column_name].astype(cat_type) return choices diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py index 4a9f2ef17..665844023 100644 --- a/activitysim/abm/models/util/school_escort_tours_trips.py +++ b/activitysim/abm/models/util/school_escort_tours_trips.py @@ -767,7 +767,9 @@ def merge_school_escort_trips_into_pipeline(state: workflow.State): if isinstance(school_escort_trips[c].dtype, pd.api.types.CategoricalDtype): from pandas.api.types import union_categoricals - uc = union_categoricals([trips[c], school_escort_trips[c]]) + uc = union_categoricals( + [trips[c], school_escort_trips[c]], sort_categories=True + ) trips[c] = pd.Categorical(trips[c], categories=uc.categories) school_escort_trips[c] = pd.Categorical( school_escort_trips[c], categories=uc.categories diff --git a/activitysim/core/util.py b/activitysim/core/util.py index 940796e4b..d7d0766a0 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -340,6 +340,24 @@ def assign_in_place(df, df2, downcast_int=False, downcast_float=False): common_columns = df2.columns.intersection(df.columns) if len(common_columns) > 0: old_dtypes = [df[c].dtype for c in common_columns] + # in pandas 2.x, update a categorical column with any new categories will cause TypeError + # so we need to add the new categories first + # this is a workaround for pandas 2.x, see discussion in + # https://github.com/ActivitySim/activitysim/discussions/946 + for c in common_columns: + if isinstance(df[c].dtype, pd.CategoricalDtype): + if not isinstance(df2[c].dtype, pd.CategoricalDtype): + # if df column is categorical, but df2 column is not + # convert df2 column to categorical then union categories + df2[c] = df2[c].astype("category") + + # when df and df2 column are both categorical, union categories + from pandas.api.types import union_categoricals + + uc = union_categoricals([df[c], df2[c]], sort_categories=True) + df[c] = pd.Categorical(df[c], categories=uc.categories) + df2[c] = pd.Categorical(df2[c], categories=uc.categories) + df.update(df2) # avoid needlessly changing int columns to float @@ -426,7 +444,7 @@ def auto_opt_pd_dtypes( else: df[col] = pd.to_numeric(df[col], downcast="float") # Skip if the column is already categorical - if pd.api.types.is_categorical_dtype(dtype): + if isinstance(dtype, pd.CategoricalDtype): continue # Handle integer types if pd.api.types.is_integer_dtype(dtype): diff --git a/activitysim/core/workflow/state.py b/activitysim/core/workflow/state.py index 45aa7a1f7..50ba0342a 100644 --- a/activitysim/core/workflow/state.py +++ b/activitysim/core/workflow/state.py @@ -1099,7 +1099,9 @@ def extend_table(self, table_name, df, axis=0): if isinstance(df[c].dtype, pd.api.types.CategoricalDtype): from pandas.api.types import union_categoricals - uc = union_categoricals([table_df[c], df[c]]) + uc = union_categoricals( + [table_df[c], df[c]], sort_categories=True + ) table_df[c] = pd.Categorical( table_df[c], categories=uc.categories )