In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd
import dalex as dx
import plotly.express as px

# Wczytanie danych 
iris = load_iris()
# Rozdzielenie cech (X) i etykiet (y)
X, y = iris.data, iris.target
X_df = pd.DataFrame(X, columns=iris.feature_names)      

# Podział danych na zbiór treningowy i testowy                                       
X_train, X_test, y_train, y_test = train_test_split(X_df, y, test_size=0.3, random_state=42)    

# Tworzenie modelu RandomForestClassifier
rf_model = RandomForestClassifier(random_state=42)                                              
rf_model.fit(X_train, y_train)

# Tworzenie explainera za pomocą Dalex
explainer = dx.Explainer(rf_model, X_test, y_test, label=", ".join(iris.target_names))           
# Predykcje dla danych testowych
predictions = explainer.predict(X_test)     
                                                    
# Wyświetlenie wykresu Break Down dla pierwszej obserwacji
bd = explainer.predict_parts(X_test.iloc[0], type='break_down')                                 
bd.plot()

# Wyświetlenie progów klasyfikacyjnych dla pierwszej obserwacji
probabilities = rf_model.predict_proba(X_test.iloc[[0]])[0]
thresholds = {iris.target_names[i]: prob for i, prob in enumerate(probabilities)}
print("Classification thresholds for the first observation:", thresholds)

# Wyświetlenie wykresu Feature Importance
fi = explainer.model_parts()                                                                   
fi.plot()

# Wyświetlenie wykresu Partial Dependence Plot (PDP) dla jednej z cech
print(" Partial Dependence Plots")
pdp = explainer.model_profile(variables=['sepal length (cm)'])                                  
pdp.plot()

# Wyświetlenie wykresu Ceteris Paribus dla pierwszej obserwacji
print("Ceteris Paribus")
cp = explainer.predict_profile(X_test.iloc[0])                                                  
cp.plot()
# Wyświetlenie wykresu Accumulated Local Effects (ALE) dla jednej z cech
print("Accumulated Local Effects")
ale = explainer.model_profile(type='accumulated', variables=['sepal length (cm)'])             
ale.plot()

Preparation of a new explainer is initiated

  -> data              : 45 rows 4 cols
  -> target variable   : 45 values
  -> model_class       : sklearn.ensemble._forest.RandomForestClassifier (default)
  -> label             : setosa, versicolor, virginica
  -> predict function  : <function yhat_proba_default at 0x000002BC9FEA37E0> will be used (default)
  -> predict function  : Accepts pandas.DataFrame and numpy.ndarray.
  -> predicted values  : min = 0.0, mean = 0.287, max = 1.0
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -0.05, mean = 0.579, max = 2.0
  -> model_info        : package sklearn

A new explainer has been created!


Classification thresholds for the first observation: {'setosa': 0.0, 'versicolor': 0.97, 'virginica': 0.03}


 Partial Dependence Plots


Calculating ceteris paribus: 100%|██████████| 1/1 [00:00<00:00, 51.25it/s]


Ceteris Paribus


Calculating ceteris paribus: 100%|██████████| 4/4 [00:00<00:00, 118.18it/s]


Accumulated Local Effects


Calculating ceteris paribus: 100%|██████████| 1/1 [00:00<00:00, 48.16it/s]
Calculating accumulated dependency: 100%|██████████| 1/1 [00:00<00:00, 17.98it/s]
