Skip to content

Commit

Permalink
[python] dalex v1.6.1
Browse files Browse the repository at this point in the history
  • Loading branch information
hbaniecki committed Feb 28, 2024
1 parent 2b8a828 commit b1548c4
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 21 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
.vscode/settings.json
**.DS_Store

.Rproj.user
.Rhistory
.RData
Expand Down
10 changes: 7 additions & 3 deletions python/dalex/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
## Changelog

### development

* Fix an error occuring in `predict_profile()` when a DataFrame has MultiIndex in `pandas >= 1.3.0` ([#550](https://github.com/ModelOriented/DALEX/pull/550))
### v1.6.1 (2024-02-28)

* Added `keras.src.models.sequential.Sequential` to classes with a known `predict_function`; it should fix changes in `keras==3.0.0` and `tensorflow==2.16.0`
* Turn off `verbose` in the predict method of tensorflow/keras models that [changed](https://stackoverflow.com/a/73244830) in `tensorflow>=2.9.0`
* Fix an error occuring in `predict_profile()` when a DataFrame has MultiIndex in `pandas>=1.3.0` ([#550](https://github.com/ModelOriented/DALEX/pull/550))
* Fix gaussian `norm()` calculation in `model_profile()` from `pi*sqrt(2)` to `sqrt(2*pi)`
* Potential fix for a warning (future error) between `prepare_numerical_categorical()` and `prepare_x()` with `pandas == 2.1.0`
* Fix a warning (future error) between `prepare_numerical_categorical()` and `prepare_x()` with `pandas==2.1.0`
* Fix a warning (future error) concerning the default value of `numeric_only` in `pandas.DataFrame.corr()` in `dalex.aspect.calculate_assoc_matrix()`


### v1.6.0 (2023-02-16)
Expand Down
2 changes: 1 addition & 1 deletion python/dalex/dalex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .aspect import Aspect


__version__ = '1.6.0.9000'
__version__ = '1.6.1'

__all__ = [
"Arena",
Expand Down
10 changes: 6 additions & 4 deletions python/dalex/dalex/_explainer/checks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# check functions for Explainer.__init__
import numpy as np
import pandas as pd
import warnings
from copy import deepcopy
from warnings import warn

from .helper import verbose_cat, is_y_in_data, get_model_info
from .yhat import get_predict_function_and_model_type
Expand Down Expand Up @@ -199,7 +199,9 @@ def check_predict_function_and_model_type(predict_function, model_type,
# check if predict_function accepts arrays
try:
data_values = data.values[[0]]
predict_function(model, data_values)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # ignore warnings about feature names in scikit-learn
predict_function(model, data_values)
model_info_['arrays_accepted'] = True
verbose_cat(" -> predict function : Accepts pandas.DataFrame and numpy.ndarray.",
verbose=verbose)
Expand All @@ -219,11 +221,11 @@ def check_predict_function_and_model_type(predict_function, model_type,
# verbose_cat(" -> predicted values : the predict_function returns an error when executed \n",
# verbose=verbose)

warn("\n -> predicted values : 'predict_function' returns an Error when executed: \n" +
warnings.warn("\n -> predicted values : 'predict_function' returns an Error when executed: \n" +
str(error), stacklevel=2)

if not isinstance(y_hat, np.ndarray) or y_hat.shape != (data.shape[0], ):
warn("\n -> predicted values : 'predict_function' must return numpy.ndarray (1d)", stacklevel=2)
warnings.warn("\n -> predicted values : 'predict_function' must return numpy.ndarray (1d)", stacklevel=2)

if model_type is None:
# model_type not specified
Expand Down
8 changes: 5 additions & 3 deletions python/dalex/dalex/_explainer/yhat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def yhat_xgboost(m, d):

def get_tf_yhat(model):
if not (str(type(model)).startswith("<class 'tensorflow.python.keras.engine") or
str(type(model)).startswith("<class 'keras.engine")):
str(type(model)).startswith("<class 'keras.engine") or
str(type(model)).startswith("<class 'keras.src.models")):
return None

if model.output_shape[1] == 1:
Expand All @@ -30,11 +31,11 @@ def get_tf_yhat(model):


def yhat_tf_regression(m, d):
return m.predict(np.array(d)).reshape(-1, )
return m.predict(np.array(d), verbose=0).reshape(-1, )


def yhat_tf_classification(m, d):
return m.predict(np.array(d))[:, 1]
return m.predict(np.array(d), verbose=0)[:, 1]


def get_h2o_yhat(model):
Expand Down Expand Up @@ -80,6 +81,7 @@ def get_predict_function_and_model_type(model, model_class):
"keras.engine.sequential.Sequential": prep_tf,
"keras.engine.training.Model": prep_tf,
"keras.engine.functional.Functional": prep_tf,
"keras.src.models.sequential.Sequential": prep_tf,
"h2o.estimators.coxph.H2OCoxProportionalHazardsEstimator": prep_h2o,
"h2o.estimators.deeplearning.H2ODeepLearningEstimator": prep_h2o,
"h2o.estimators.gam.H2OGeneralizedAdditiveEstimator": prep_h2o,
Expand Down
2 changes: 1 addition & 1 deletion python/dalex/dalex/aspect/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def calculate_cat_num_assoc_matrix(data, categorical_variables, numerical_variab


def calculate_assoc_matrix(data, corr_method):
corr_matrix = abs(data.corr(corr_method)) # get submatrix for only numerical variables
corr_matrix = abs(data.corr(corr_method, numeric_only=True)) # get submatrix for only numerical variables
numerical_variables = corr_matrix.columns
categorical_variables = list(set(data.columns) - set(numerical_variables))
n = len(data)
Expand Down
6 changes: 3 additions & 3 deletions python/dalex/test/test_ceteris_paribus.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,15 @@ def test_single_variable_profile(self):

def test_calculate_variable_profile(self):
splits = utils.calculate_variable_split(self.X, ['age', 'gender'], 121)
vp = utils.calculate_variable_profile(self.exp.predict_function, self.exp.model, self.X.iloc[[0], :], splits, 1)
vp = utils.calculate_variable_profile(self.exp.predict_function, self.exp.model, self.X.iloc[[0], :], splits, 1, verbose=False)
self.assertIsInstance(vp, pd.DataFrame)

splits = utils.calculate_variable_split(self.X, ['gender'], 5)
vp = utils.calculate_variable_profile(self.exp.predict_function, self.exp.model, self.X.iloc[[0], :], splits, 1)
vp = utils.calculate_variable_profile(self.exp.predict_function, self.exp.model, self.X.iloc[[0], :], splits, 1, verbose=False)
self.assertIsInstance(vp, pd.DataFrame)

splits = utils.calculate_variable_split(self.X, self.X.columns, 15)
vp = utils.calculate_variable_profile(self.exp.predict_function, self.exp.model, self.X.iloc[[0], :], splits, 2)
vp = utils.calculate_variable_profile(self.exp.predict_function, self.exp.model, self.X.iloc[[0], :], splits, 2, verbose=False)
self.assertIsInstance(vp, pd.DataFrame)

def test_calculate_ceteris_paribus(self):
Expand Down
10 changes: 6 additions & 4 deletions python/dalex/test/test_fairness.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import unittest
from copy import deepcopy
from copy import copy, deepcopy
import warnings

import numpy as np
import pandas as pd
from copy import copy
from plotly.graph_objs import Figure
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
Expand Down Expand Up @@ -61,7 +61,7 @@ def setUp(self):
reg = DecisionTreeRegressor()
reg.fit(data2, target)

self.exp_reg = dx.Explainer(reg, data2, target)
self.exp_reg = dx.Explainer(reg, data2, target, verbose=False)
self.mgf_reg = self.exp_reg.model_fairness(self.protected_reg, 'A')

def test_fairness_check(self):
Expand Down Expand Up @@ -160,7 +160,9 @@ def test_calculate_ratio(self):
b = list(scf_metrics.subgroup_confusion_matrix_metrics.get('b').values())
a = list(scf_metrics.subgroup_confusion_matrix_metrics.get('a').values())

ratio = np.array(b) / np.array(a)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # ignore warning about division by zero
ratio = np.array(b) / np.array(a)
ratio[np.isinf(ratio)] = np.nan
ratio[ratio == 0] = np.nan

Expand Down
4 changes: 2 additions & 2 deletions python/dalex/test/test_model_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def test_plot(self):
self.assertIsInstance(case3, dx.model_explanations.ResidualDiagnostics)

fig1 = case1.plot(title="test1", variable="fare", N=1000, show=False)
fig2 = case2.plot(case3, variable="sibsp", yvariable="abs_residuals", N=None, show=False)
fig3 = case2.plot(smooth=False, line_width=6, marker_size=1, variable="age", show=False)
fig2 = case2.plot(case3, variable="age", yvariable="abs_residuals", N=None, show=False)
fig3 = case2.plot(smooth=False, line_width=6, marker_size=1, variable="sibsp", show=False)

self.assertIsInstance(fig1, Figure)
self.assertIsInstance(fig2, Figure)
Expand Down

0 comments on commit b1548c4

Please sign in to comment.