You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
...
sd=SmartDrift(
df_current=df_current, # with datetime columndf_baseline=df_baseline# with datetime column
)
sd.compile(full_validation=True)
---------------------------------------------------------------------------TypeErrorTraceback (mostrecentcalllast)
File_catboost.pyx:1130, in_catboost._FloatOrNan()
TypeError: float() argumentmustbeastringoranumber, not'Timestamp'Duringhandlingoftheaboveexception, anotherexceptionoccurred:
TypeErrorTraceback (mostrecentcalllast)
File_catboost.pyx:2275, in_catboost.get_float_feature()
File_catboost.pyx:1132, in_catboost._FloatOrNan()
TypeError: Cannotconvertobj2022-01-0100:00:00tofloatDuringhandlingoftheaboveexception, anotherexceptionoccurred:
CatBoostErrorTraceback (mostrecentcalllast)
CellIn [25], line1---->1sd.compile(full_validation=True)
File~/github/eurybia/eurybia/core/smartdrift.py:305, inSmartDrift.compile(self, full_validation, ignore_cols, sampling, sample_size, datadrift_file, date_compile_auc, hyperparameter, attr_importance)
302x_test=test[varz]
303y_test=test[self._datadrift_target]
-->305xpl.compile(x=x_test)
306xpl.compute_features_import(force=True)
308self.xpl=xplFile~/anaconda3/envs/eurybia/lib/python3.8/site-packages/shapash/explainer/smart_explainer.py:267, inSmartExplainer.compile(self, x, contributions, y_pred)
264self.x_init=inverse_transform(self.x_encoded, self.preprocessing)
265self.y_pred=check_ypred(self.x_init, y_pred)
-->267self._get_contributions_from_backend_or_user(x, contributions)
268self.check_contributions()
270self.columns_dict= {i: colfori, colinenumerate(self.x_init.columns)}
File~/anaconda3/envs/eurybia/lib/python3.8/site-packages/shapash/explainer/smart_explainer.py:288, inSmartExplainer._get_contributions_from_backend_or_user(self, x, contributions)
285def_get_contributions_from_backend_or_user(self, x, contributions):
286# Computing contributions using backend287ifcontributionsisNone:
-->288self.explain_data=self.backend.run_explainer(x=x)
289self.contributions=self.backend.get_local_contributions(x=x, explain_data=self.explain_data)
290else:
File~/anaconda3/envs/eurybia/lib/python3.8/site-packages/shapash/backend/shap_backend.py:34, inShapBackend.run_explainer(self, x)
20defrun_explainer(self, x: pd.DataFrame) ->dict:
21""" 22 Computes and returns local contributions using Shap explainer 23 (...) 32 local contributions 33 """--->34contributions=self.explainer(x, **self.explainer_compute_args)
35explain_data=dict(contributions=contributions.values)
36returnexplain_dataFile~/anaconda3/envs/eurybia/lib/python3.8/site-packages/shap/explainers/_tree.py:217, inTree.__call__(self, X, y, interactions, check_additivity)
214feature_names=getattr(self, "data_feature_names", None)
216ifnotinteractions:
-->217v=self.shap_values(X, y=y, from_call=True, check_additivity=check_additivity, approximate=self.approximate)
218iftype(v) islist:
219v=np.stack(v, axis=-1) # put outputs at the endFile~/anaconda3/envs/eurybia/lib/python3.8/site-packages/shap/explainers/_tree.py:367, inTree.shap_values(self, X, y, tree_limit, approximate, check_additivity, from_call)
365importcatboost366iftype(X) !=catboost.Pool:
-->367X=catboost.Pool(X, cat_features=self.model.cat_feature_indices)
368phi=self.model.original_model.get_feature_importance(data=X, fstr_type='ShapValues')
370# note we pull off the last column and keep it as our expected_valueFile~/anaconda3/envs/eurybia/lib/python3.8/site-packages/catboost/core.py:790, inPool.__init__(self, data, label, cat_features, text_features, embedding_features, embedding_features_data, column_description, pairs, delimiter, has_header, ignore_csv_quoting, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count, log_cout, log_cerr)
784ifisinstance(feature_names, PATH_TYPES):
785raiseCatBoostError(
786"feature_names must be None or have non-string type when the pool is created from "787"python objects."788 )
-->790self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
791group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
792super(Pool, self).__init__()
File~/anaconda3/envs/eurybia/lib/python3.8/site-packages/catboost/core.py:1411, inPool._init(self, data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
1409iffeature_tagsisnotNone:
1410feature_tags=self._check_transform_tags(feature_tags, feature_names)
->1411self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
1412group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
File_catboost.pyx:3941, in_catboost._PoolBase._init_pool()
File_catboost.pyx:4008, in_catboost._PoolBase._init_pool()
File_catboost.pyx:3914, in_catboost._PoolBase._init_objects_order_layout_pool()
File_catboost.pyx:3422, in_catboost._set_data()
File_catboost.pyx:3405, in_catboost._set_data_from_generic_matrix()
File_catboost.pyx:2277, in_catboost.get_float_feature()
CatBoostError: Badvaluefornum_feature[non_default_doc_idx=0,feature_idx=0]="2022-01-01 00:00:00": Cannotconvertobj2022-01-0100:00:00tofloat
But in some use case, Eurybia should be useful to analyse difference between 2 dataset with temporal information (like seasonal information). If users only want to get some analysis about difference between 2 dataset, it should be done (via AUC). But if users want to reuse a model to get importance, this should raise an error (and invite him to drop datetime columns as it can't be done).
Overview of the Solution:
If there are datetime columns in datasets, automatically create years / month / day features based on this column and drop original one.
If deployed_model is filled in SmartDrift then raised an error.
Examples:
importpandasaspdimportnumpyasnpfromlightgbmimportLGBMRegressorfromeurybiaimportSmartDrift# Create random datasetdate_list=pd.date_range(start='01/01/2022', end='01/30/2022')
X1=np.random.rand(len(date_list))
X2=np.random.rand(len(date_list))
df_current=pd.DataFrame(date_list, columns=['date'])
df_current['col1'] =X1df_baseline=pd.DataFrame(date_list, columns=['date'])
df_baseline['col1'] =X2sd=SmartDrift(df_current=df_current,
df_baseline=df_baseline)
# Datetime columns will be transform into df_current# Datetime columns will be transform into df_baselinesd.compile(full_validation=True)
# Bloc user when using model# Random modelsregressor=LGBMRegressor(n_estimators=2).fit(df_baseline[['col1']],
df_baseline[['col1']])
sd=SmartDrift(df_current=df_current,
df_baseline=df_baseline,
deployed_model=regressor)
sd.compile(full_validation=True)
# Error# Raising error
Blockers:
Definition of Done:
Some tests
The text was updated successfully, but these errors were encountered:
Description of Problem:
You can't pass datetime columns in eurybia
But in some use case, Eurybia should be useful to analyse difference between 2 dataset with temporal information (like seasonal information). If users only want to get some analysis about difference between 2 dataset, it should be done (via AUC). But if users want to reuse a model to get importance, this should raise an error (and invite him to drop datetime columns as it can't be done).
Overview of the Solution:
SmartDrift
then raised an error.Examples:
Blockers:
Definition of Done:
Some tests
The text was updated successfully, but these errors were encountered: