In [1]:
import numpy as np
import shap
from sklearn.ensemble import RandomForestClassifier
from classifiers import LocalClassifierPerLevel, LocalClassifierPerNode, LocalClassifierPerParentNode
from classifiers import Explainer
from classifiers import datasets, metrics as hmetrics
from sklearn import metrics as metrics

In [2]:
# Load train and test splits
X_train, X_test, Y_train, Y_test = datasets.load_platypus()

# Use random forest classifiers for every node
rfc = RandomForestClassifier()

# Local classifier per node approach

In [3]:
lcpn_classifier = LocalClassifierPerNode(
    local_classifier=rfc, 
    replace_classifiers=False, 
    binary_policy="inclusive",
    edge_list="./hierarchy.csv",
    verbose=-1,
)

# Train local classifier per node
lcpn_classifier.fit(X_train, Y_train)

# Predict
predictions = lcpn_classifier.predict(X_test)

INFO:LCPN:Creating digraph from 572 2D labels
INFO:LCPN:Writing edge list to file ./hierarchy.csv
INFO:LCPN:Detected 3 roots
INFO:LCPN:Initializing local classifiers
INFO:LCPN:Initializing inclusive binary policy
INFO:LCPN:Fitting local classifiers
INFO:LCPN:Training local classifier Allergy


INFO:LCPN:Training local classifier Allergy::HiClass::Separator::External
INFO:LCPN:Training local classifier Allergy::HiClass::Separator::External::HiClass::Separator::Plastic Allergy
INFO:LCPN:Training local classifier Gastrointestinal
INFO:LCPN:Training local classifier Gastrointestinal::HiClass::Separator::Norovirus
INFO:LCPN:Training local classifier Allergy::HiClass::Separator::External::HiClass::Separator::Bee Allergy
INFO:LCPN:Training local classifier Respiratory
INFO:LCPN:Training local classifier Respiratory::HiClass::Separator::Cold
INFO:LCPN:Training local classifier Gastrointestinal::HiClass::Separator::Food Poisoning
INFO:LCPN:Training local classifier Allergy::HiClass::Separator::Food Allergy
INFO:LCPN:Training local classifier Allergy::HiClass::Separator::Food Allergy::HiClass::Separator::Fish Allergy
INFO:LCPN:Training local classifier Respiratory::HiClass::Separator::Flu
INFO:LCPN:Training local classifier Allergy::HiClass::Separator::External::HiClass::Separator::Po

In [11]:
precision = hmetrics.hprecision_score(Y_test, predictions)
print("hprecision:", precision)

recall = hmetrics.hrecall_score(Y_test, predictions)
print("hrecall:", recall)

f1 = hmetrics.hf1_score(Y_test, predictions)
print("hf1:", f1)

hprecision: 0.7821428571428571
hrecall: 0.7697715289982425
hf1: 0.7759078830823738


In [22]:
true_labels = np.array(Y_test.to_list())

# Flatten the true and predicted labels to calculate micro/macro/weighted metrics
flat_true_labels = true_labels.flatten()
flat_predicted_labels = predictions.flatten()

# Calculate micro, macro, and weighted precision, recall, and F1 score
micro_precision = metrics.precision_score(flat_true_labels, flat_predicted_labels, average='micro')
macro_precision = metrics.precision_score(flat_true_labels, flat_predicted_labels, average='macro')
weighted_precision = metrics.precision_score(flat_true_labels, flat_predicted_labels, average='weighted')

micro_recall = metrics.recall_score(flat_true_labels, flat_predicted_labels, average='micro')
macro_recall = metrics.recall_score(flat_true_labels, flat_predicted_labels, average='macro')
weighted_recall = metrics.recall_score(flat_true_labels, flat_predicted_labels, average='weighted')

micro_f1 = metrics.f1_score(flat_true_labels, flat_predicted_labels, average='micro')
macro_f1 = metrics.f1_score(flat_true_labels, flat_predicted_labels, average='macro')
weighted_f1 = metrics.f1_score(flat_true_labels, flat_predicted_labels, average='weighted')

print("Micro Precision:", micro_precision)
print("Macro Precision:", macro_precision)
print("Weighted Precision:", weighted_precision)

print("Micro Recall:", micro_recall)
print("Macro Recall:", macro_recall)
print("Weighted Recall:", weighted_recall)

print("Micro F1 Score:", micro_f1)
print("Macro F1 Score:", macro_f1)
print("Weighted F1 Score:", weighted_f1)

Micro Precision: 0.8116531165311653
Macro Precision: 0.7653563707870122
Weighted Precision: 0.8152383692136904
Micro Recall: 0.8116531165311653
Macro Recall: 0.7121150186006335
Weighted Recall: 0.8116531165311653
Micro F1 Score: 0.8116531165311653
Macro F1 Score: 0.7312806709790624
Weighted F1 Score: 0.8092144014012496


In [None]:
# Flatten the true and predicted labels
flat_true_labels = [set(labels) for labels in true_labels]
flat_predicted_labels = [set(labels) for labels in predictions]

# Calculate subset accuracy
subset_accuracy = metrics.accuracy_score(flat_true_labels, flat_predicted_labels)

print("Subset Accuracy (Exact Match Ratio):", subset_accuracy)

In [27]:
# Flatten the true and predicted labels to calculate micro/macro/weighted metrics
flat_true_labels = true_labels.flatten()
flat_predicted_labels = predictions.flatten()

# Calculate Hamming Loss
loss = metrics.hamming_loss(flat_true_labels, flat_predicted_labels)

print("Hamming Loss:", loss)

Hamming Loss: 0.18834688346883469


In [28]:
# Flatten the true and predicted labels
flat_true_labels = true_labels.flatten()
flat_predicted_labels = predictions.flatten()

# Calculate Micro-F1 Score
micro_f1 = metrics.f1_score(flat_true_labels, flat_predicted_labels, average='micro')

# Calculate Macro-F1 Score
# precision_recall_fscore_support returns precision, recall, F1 score, and support for each label
precision, recall, macro_f1, _ = metrics.precision_recall_fscore_support(flat_true_labels, flat_predicted_labels, average='macro')

print("Micro-F1 Score:", micro_f1)
print("Macro-F1 Score:", macro_f1)

Micro-F1 Score: 0.8116531165311653
Macro-F1 Score: 0.7312806709790624


In [32]:
# Calculate Example-wise F1 Score
example_wise_f1_scores = []

for true_instance, predicted_instance in zip(true_labels, predictions):
    f1_instance = metrics.f1_score(true_instance, predicted_instance, average=None)
    example_wise_f1_scores.append(f1_instance)


# Calculate the average Example-wise F1 Score
average_example_wise_f1 = sum(example_wise_f1_scores) / len(example_wise_f1_scores)

print("Example-wise F1 Scores:", example_wise_f1_scores)
print("Average Example-wise F1 Score:", average_example_wise_f1)

[array([1., 0., 0., 1.]), array([1., 0., 0., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 0., 0., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 0., 0., 1.]), array([1., 0., 0., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 0., 0., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([0., 0., 0., 0., 0., 0.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 0., 0., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 0., 0., 1.]), array([1., 1., 1.]), array([1., 0., 0., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 0., 1., 0.]), array([1., 0., 0., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.]), array([1., 1., 1.])

ValueError: operands could not be broadcast together with shapes (4,) (3,) 

In [35]:
# Define Explainer
explainer = Explainer(lcpn_classifier, data=X_train.values, mode="tree")
explanations = explainer.explain(X_test.values)
print(explanations)

# Filter samples which only predicted "Respiratory" at first level
respiratory_idx = lcpn_classifier.predict(X_test)[:, 0] == "Respiratory"

# Specify additional filters to obtain only level 0
shap_filter = {"level": 0, "class": "Respiratory_1", "sample": respiratory_idx}

# Use .sel() method to apply the filter and obtain filtered results
shap_val_respiratory = explanations.sel(shap_filter)

# Plot feature importance on test set
shap.plots.violin(
    shap_val_respiratory.shap_values,
    feature_names=X_train.columns.values,
    plot_size=(13, 8),
)

INFO:LCPN:Predicting
INFO:LCPN:Predicting for node 'Allergy'
INFO:LCPN:Predicting for node 'Gastrointestinal'
INFO:LCPN:Predicting for node 'Respiratory'
INFO:LCPN:Predicting for node 'Cold'
INFO:LCPN:Predicting for node 'Flu'
INFO:LCPN:Predicting for node 'Covid'


ValueError: conflicting sizes for dimension 'class': length 1 on the data but length 2 on coordinate 'class'

# Local classifier per parent node approach

In [4]:
lcppn_classifier = LocalClassifierPerParentNode(
    local_classifier=rfc, 
    replace_classifiers=False,
    # edge_list="./hierarchy.csv",
)

# Train local classifier per node
lcppn_classifier.fit(X_train, Y_train)

# Predict
predictions = lcppn_classifier.predict(X_test)
print(predictions)

INFO:LCPPN:Creating digraph from 572 2D labels
INFO:LCPPN:Detected 3 roots
INFO:LCPPN:Initializing local classifiers
INFO:LCPPN:Fitting local classifiers
INFO:LCPPN:Training local classifier Allergy


INFO:LCPPN:Training local classifier Allergy::HiClass::Separator::External
INFO:LCPPN:Training local classifier Gastrointestinal
INFO:LCPPN:Training local classifier Respiratory
INFO:LCPPN:Training local classifier Allergy::HiClass::Separator::Food Allergy
INFO:LCPPN:Training local classifier hiclass::root
INFO:LCPPN:Cleaning up variables that can take a lot of disk space
INFO:LCPPN:Predicting


[['Respiratory' 'Cold' '']
 ['Respiratory' 'Covid' '']
 ['Respiratory' 'Cold' '']
 ['Respiratory' 'Covid' '']
 ['Gastrointestinal' 'Norovirus' '']
 ['Respiratory' 'Cold' '']
 ['Allergy' 'Food Allergy' 'Milk Allergy']
 ['Gastrointestinal' 'Norovirus' '']
 ['Respiratory' 'Cold' '']
 ['Gastrointestinal' 'Food Poisoning' '']
 ['Gastrointestinal' 'Norovirus' '']
 ['Gastrointestinal' 'Norovirus' '']
 ['Respiratory' 'Covid' '']
 ['Respiratory' 'Covid' '']
 ['Respiratory' 'Cold' '']
 ['Respiratory' 'Cold' '']
 ['Allergy' 'External' 'Pollen Allergy']
 ['Respiratory' 'Covid' '']
 ['Allergy' 'Food Allergy' 'Fish Allergy']
 ['Gastrointestinal' 'Norovirus' '']
 ['Allergy' 'External' 'Bee Allergy']
 ['Respiratory' 'Flu' '']
 ['Allergy' 'Food Allergy' 'Fish Allergy']
 ['Allergy' 'Food Allergy' 'Fish Allergy']
 ['Gastrointestinal' 'Norovirus' '']
 ['Respiratory' 'Cold' '']
 ['Respiratory' 'Covid' '']
 ['Respiratory' 'Cold' '']
 ['Respiratory' 'Covid' '']
 ['Respiratory' 'Cold' '']
 ['Allergy' 'Externa

# Local classifier per level approach

In [5]:
lcpl_classifier = LocalClassifierPerLevel(
    local_classifier=rfc, 
    replace_classifiers=False,
    # edge_list="./hierarchy.csv",
)

# Train local classifier per level
lcpl_classifier.fit(X_train, Y_train)

# Predict
predictions = lcpl_classifier.predict(X_test)
print(predictions)

INFO:LCPL:Creating digraph from 572 2D labels
INFO:LCPL:Detected 3 roots
INFO:LCPL:Initializing local classifiers
INFO:LCPL:Fitting local classifiers
INFO:LCPL:Training local classifier 0
INFO:LCPL:Training local classifier 1
INFO:LCPL:Training local classifier 2
INFO:LCPL:Cleaning up variables that can take a lot of disk space
INFO:LCPL:Predicting


[['Respiratory' 'Covid' '']
 ['Respiratory' 'Covid' '']
 ['Respiratory' 'Cold' '']
 ['Respiratory' 'Cold' '']
 ['Gastrointestinal' 'Norovirus' '']
 ['Respiratory' 'Cold' '']
 ['Allergy' 'Food Allergy' 'Milk Allergy']
 ['Gastrointestinal' 'Norovirus' '']
 ['Respiratory' 'Cold' '']
 ['Gastrointestinal' 'Food Poisoning' '']
 ['Gastrointestinal' 'Food Poisoning' '']
 ['Gastrointestinal' 'Norovirus' '']
 ['Respiratory' 'Covid' '']
 ['Respiratory' 'Covid' '']
 ['Respiratory' 'Cold' '']
 ['Respiratory' 'Flu' '']
 ['Allergy' 'External' 'Pollen Allergy']
 ['Respiratory' 'Covid' '']
 ['Allergy' 'Food Allergy' 'Fish Allergy']
 ['Gastrointestinal' 'Norovirus' '']
 ['Allergy' 'External' 'Bee Allergy']
 ['Respiratory' 'Flu' '']
 ['Allergy' 'Food Allergy' 'Fish Allergy']
 ['Allergy' 'Food Allergy' 'Fish Allergy']
 ['Gastrointestinal' 'Norovirus' '']
 ['Respiratory' 'Cold' '']
 ['Respiratory' 'Covid' '']
 ['Respiratory' 'Cold' '']
 ['Respiratory' 'Flu' '']
 ['Respiratory' 'Cold' '']
 ['Allergy' 'Exter