In [1]:
import explainability as exp
import pandas as pd

### Regression Models

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=1000, n_features=10, n_informative=10, n_targets=1, random_state=123)
X = pd.DataFrame(X)
reg = LinearRegression()

reg.fit(X,y)

print(f"Algorithm Class: {exp.algorithm_class_score(reg)}")
print(f"Model Size Score: {exp.model_size_score(reg, X)}")

print(f"Correlated Features Score: {exp.correlated_features_score(X)}")
print(f"Feature Importance Score: {exp.feature_importance_score(reg)}")
print(f"Shap Coefficient of variance: {exp.cv_shap_score(reg, X)}")


Algorithm Class: 0.7
Model Size Score: 9


  upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))


AttributeError: module 'numpy' has no attribute 'bool'.
`np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

### Classification Models

In [4]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier

X, y = make_classification(n_samples=1000, n_features=10, n_informative=10,flip_y=0, n_redundant=0, n_repeated=0, n_clusters_per_class=2, n_classes=5, random_state=42)
X = pd.DataFrame(X)
clf = RandomForestClassifier(random_state=123)

clf.fit(X,y)


print(f"Algorithm Class: {exp.algorithm_class_score(clf)}")
print(f"Model Size Score: {exp.model_size_score(clf, X)}")

print(f"Correlated Features Score: {exp.correlated_features_score(X)}")
print(f"Feature Importance Score: {exp.feature_importance_score(clf)}")
print(f"Shap Coefficient of variance: {exp.cv_shap_score(clf, X)}")

Algorithm Class: 0.8
Model Size Score: 9
Correlated Features Score: 0.5555555555555556
Feature Importance Score: 0.4
Shap Coefficient of variance: 0.6969347183053155


### Keras Models

In [7]:
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder

X, y = make_classification(n_samples=1000, n_features=10, n_informative=10,flip_y=0, n_redundant=0, n_repeated=0, n_clusters_per_class=2, n_classes=5, random_state=42)
X = pd.DataFrame(X)

encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y.reshape(-1, 1)) 

TFmodel = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(5, activation="softmax")
])

TFmodel.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
TFmodel.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)

print(f"Algorithm Class: {exp.algorithm_class_score(TFmodel)}")
print(f"Model Size Score: {exp.model_size_score(TFmodel, X)}")

print(f"Correlated Features Score: {exp.correlated_features_score(X)}")
print(f"Feature Importance Score: {exp.feature_importance_score(TFmodel)}")
print(f"Shap Coefficient of variance: {exp.cv_shap_score(TFmodel, X)}")

TypeError: __init__() got an unexpected keyword argument 'sparse'

### Torch Models

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import DataLoader, TensorDataset, random_split

# Generate synthetic dataset
X, y = make_classification(
    n_samples=1000, n_features=10, n_informative=10, flip_y=0, n_redundant=0, 
    n_repeated=0, n_clusters_per_class=2, n_classes=5, random_state=42
)
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.int64)

# One-hot encode the labels
encoder = OneHotEncoder(sparse=False)
y_onehot = torch.tensor(encoder.fit_transform(y.reshape(-1, 1)), dtype=torch.float32)

# Create a dataset and dataloaders
dataset = TensorDataset(X, y_onehot)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# Define the model
class TorchModel(nn.Module):
    def __init__(self):
        super(TorchModel, self).__init__()
        self.dense1 = nn.Linear(10, 32)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.dense2 = nn.Linear(32, 5)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.dense1(x))
        x = self.dropout(x)
        x = self.softmax(self.dense2(x))
        return x

    def predict(self, x):
        # x is numpy not tensor, return is numpy
        xx = torch.tensor(x, dtype=torch.float32)
        
        with torch.no_grad():
            probs = torch.exp(self.forward(xx))
        return probs.numpy()

# Create an instance of the model
Tmodel = TorchModel()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Tmodel.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    Tmodel.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = Tmodel(inputs)
        loss = criterion(outputs, labels.argmax(dim=1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / train_size
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
    
    # Validation loop
    Tmodel.eval()
    val_loss = 0.0
    correct = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = Tmodel(inputs)
            loss = criterion(outputs, labels.argmax(dim=1))
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels.argmax(dim=1)).sum().item()
    
    val_loss /= val_size
    accuracy = correct / val_size
    print(f'Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}')


print(f"Algorithm Class: {exp.algorithm_class_score(Tmodel)}")
print(f"Model Size Score: {exp.model_size_score(X)}")

print(f"Correlated Features Score: {exp.correlated_features_score(X)}")
print(f"Feature Importance Score: {exp.feature_importance_score(Tmodel)}")
#print(f"Shap Coefficient of variance: {exp.cv_shap_score(Tmodel, X)}")


TypeError: __init__() got an unexpected keyword argument 'sparse'

In [17]:
import shap
background = shap.sample(X, 100)
explainer = shap.KernelExplainer(Tmodel, background)


TypeError: Unknown type passed as data object: <class 'torch.Tensor'>

In [3]:
print(f"Shap Coefficient of variance: {exp.cv_shap_score(Tmodel, X)}")

TypeError: Unknown type passed as data object: <class 'torch.Tensor'>