# Explainability Metrics


### Algorithm Class Score 

In [1]:
from difflib import get_close_matches

alg_score = {
    "RandomForestClassifier": 4,
    "KNeighborsClassifier": 3,
    "SVC": 2,
    "GaussianProcessClassifier": 3,
    "DecisionTreeClassifier": 5,
    "MLPClassifier": 1,
    "AdaBoostClassifier": 3,
    "GaussianNB": 3.5,
    "QuadraticDiscriminantAnalysis": 3,
    "LogisticRegression": 4,
    "LinearRegression": 3.5,
    "Sequential":1
}

def algorithm_class_score(clf):

    clf_name = type(clf).__name__

    ''' 
    exp_score = clf_type_score.get(clf_name,np.nan)
    properties= {"dep" :info('Depends on','Model'),
        "clf_name": info("model type",clf_name)}
    '''

    # Check if the clf_name is in the dictionary
    if clf_name in alg_score:
        exp_score = alg_score[clf_name]

        #properties= {"dep" :info('Depends on','Model'),
        #"clf_name": info("model type",clf_name)}
        #return  result(score=exp_score, properties=properties)
        return exp_score 
    
    # If not, try to find a close match
    close_matches = get_close_matches(clf_name, alg_score.keys(), n=1, cutoff=0.6)
    if close_matches:
        exp_score = alg_score[close_matches[0]]

        #properties= {"dep" :info('Depends on','Model'),
        #"clf_name": info("model type",clf_name)}
        return exp_score
    
    # If no close match found 
    raise Exception(f"No matching score found for '{clf_name}'")

In [2]:
# Example Decision Tree Classifer and Regressor
from sklearn import tree

Classifier = tree.DecisionTreeClassifier()
Regressor = tree.DecisionTreeRegressor()

print(type(Classifier).__name__)
print(algorithm_class_score(Classifier))

print(type(Regressor).__name__)
print(algorithm_class_score(Regressor))

DecisionTreeClassifier
5
DecisionTreeRegressor
5


In [3]:
# Example Neural Network Tensorflow 
import tensorflow as tf

TFNN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, input_dim=128, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

print(type(TFNN).__name__)
print(algorithm_class_score(TFNN))

Sequential
1


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
# Custom non-sequential NN using keras
class MyModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense1 = tf.keras.layers.Dense(32, activation="relu")
        self.dense2 = tf.keras.layers.Dense(5, activation="softmax")
        self.dropout = tf.keras.layers.Dropout(0.5)

    def call(self, inputs, training=False):
        x = self.dense1(inputs)
        x = self.dropout(x, training=training)
        return self.dense2(x)

model = MyModel()

print(type(model).__name__)
print(algorithm_class_score(model))

MyModel


Exception: No matching score found for 'MyModel'

In [4]:
# Example Neural Network Pytoch
import torch
import torch.nn as nn

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(128, 64)  
        self.fc2 = nn.Linear(64, 32)  
        self.fc3 = nn.Linear(32, 1)    

    def forward(self, x):
        x = torch.relu(self.fc1(x))   
        x = torch.relu(self.fc2(x))    
        x = torch.sigmoid(self.fc3(x)) 
        return x
    
TOCHNN = NeuralNetwork()

print(type(TOCHNN).__name__)
print(algorithm_class_score(TOCHNN))

NeuralNetwork


Exception: No matching score found for 'NeuralNetwork'

'NeuralNetwork', 'Sequential', 'MLPClassifier', custom Keras Neural Networks 
- whats in common, how can we identidy them?

### Feature Correlation Score

In [2]:
import numpy as np
import pandas as pd

def correlated_features_score(train_data, test_data, thresholds=[0.05, 0.16, 0.28, 0.4], target_column=None, verbose=False):
    
    test_data = test_data.copy()
    train_data = train_data.copy()
     
    if target_column:
        X_test = test_data.drop(target_column, axis=1)
        X_train = train_data.drop(target_column, axis=1)
    else:
        X_test = test_data.iloc[:,:-1]
        X_train = train_data.iloc[:,:-1]
        
    
    df_comb = pd.concat([X_test, X_train])
    corr_matrix = df_comb.corr().abs()

    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
    
    # Compute average and standar deviation from upper correlation matrix 
    avg_corr = upper.values[np.triu_indices_from(upper.values,1)].mean()
    std_corr = upper.values[np.triu_indices_from(upper.values,1)].std()

    # Find features with correlation greater than avg_corr + std_corr
    to_drop = [column for column in upper.columns if any(upper[column] > (avg_corr+std_corr))]
    if verbose: print(to_drop)
    
    pct_drop = len(to_drop)/len(df_comb.columns)
    
    bins = thresholds
    score = 5-np.digitize(pct_drop, bins, right=True) 

    #properties= {"dep" :info('Depends on','Training Data'),
    #    "pct_drop" : info("Percentage of highly correlated features", "{:.2f}%".format(100*pct_drop))}
    #return  result(score=int(score), properties=properties)
    
    return score

For experimental purposes there will be used the following datasets:

- [Healthcare Diabetes Dataset](https://www.kaggle.com/datasets/nanditapore/healthcare-diabetes)
- [Iris Dataset](https://www.kaggle.com/datasets/uciml/iris)

In [3]:
# Example with Healthcare Diabetes Dataset
import pandas as pd
from sklearn.model_selection import train_test_split

health = pd.read_csv('Data/Healthcare-Diabetes.csv')

health_X = health[health.columns[1:9]]
health_y = health[health.columns[-1]]

X_train, X_test, y_train, y_test = train_test_split(health_X, health_y, test_size=0.33, random_state=42)


print(correlated_features_score(X_train, X_test, verbose=True))

['Insulin', 'BMI']
2


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))


In [5]:
# Example with Iris Dataset
iris = pd.read_csv('Data/iris.csv')

iris_X = iris[iris.columns[:5]]
iris_y = iris['class']

X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.33, random_state=42)
print(correlated_features_score(X_train, X_test, verbose=True))


['petallength']
3


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
