In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from dalex import Explainer

# Wczytaj dane z pliku
df = pd.read_csv("data.csv")

# Przypisz nazwy cech
feature_names = [f"feature_{i+1}" for i in range(20)]
df.columns = feature_names + ['class']

# Przekonwertuj dane na macierz
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Nazwa klas
class_names = ["class_0","class_1", "class_2", "class_3", "class_4"]

# Podziel dane na zbiór treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Utwórz i wytrenuj model k-NN
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train, y_train)

# Utwórz funkcję predict dla DALEX
def knn_predict(model, X):
    return np.argmax(model.predict_proba(X), axis=1) 


# Utwórz instancję explainera DALEX
explainer_dalex = Explainer(knn_model, X_train, y_train, predict_function=knn_predict, label="KNN")

# Wybierz przykładową próbkę do wyjaśnienia
sample_idx = 0
sample = X_test[sample_idx]

# Wyjaśnij predykcję dla próbki przy użyciu DALEX
explanation_dalex = explainer_dalex.predict_parts(sample)

# Wyświetl wyjaśnienie
print(explanation_dalex.result)


Preparation of a new explainer is initiated

  -> data              : numpy.ndarray converted to pandas.DataFrame. Columns are set as string numbers.
  -> data              : 800 rows 20 cols
  -> target variable   : 800 values
  -> model_class       : sklearn.neighbors._classification.KNeighborsClassifier (default)
  -> label             : KNN
  -> predict function  : <function knn_predict at 0x000001F4E7702340> will be used
  -> predict function  : Accepts pandas.DataFrame and numpy.ndarray.
  -> predicted values  : min = 0.0, mean = 1.38, max = 4.0
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -4.0, mean = 0.644, max = 4.0
  -> model_info        : package sklearn

A new explainer has been created!


  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[

   variable_name variable_value               variable  cumulative  \
0      intercept                             intercept     1.37500   
1           18:7   0.2166:1.005    18:7 = 0.2166:1.005     1.35250   
2           19:1  0.7368:0.7041   19:1 = 0.7368:0.7041     1.28000   
3          14:12  0.9748:0.5746  14:12 = 0.9748:0.5746     1.34000   
4           17:8    1.026:1.064     17:8 = 1.026:1.064     1.90750   
5             13         0.8221            13 = 0.8221     1.79625   
6           15:5  0.1843:0.4793   15:5 = 0.1843:0.4793     1.81875   
7            6:2  0.6164:0.5073    6:2 = 0.6164:0.5073     1.73625   
8           16:0  0.4724:0.3187   16:0 = 0.4724:0.3187     2.38750   
9              9         0.1644             9 = 0.1644     2.06375   
10         11:10   0.2578:0.149   11:10 = 0.2578:0.149     2.95250   
11           4:3  0.3293:0.8081    4:3 = 0.3293:0.8081     2.00000   
12                                          prediction     2.00000   

    contribution  s

  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
  average_yhats_norm[i] = average_yhats[i] - diffs_1d[inds.iloc[i, 0]] - diffs_1d.iloc[inds.iloc[i, 1]]
