In [1]:
# ! pip uninstall /Path/to/the/whl/file/torchlogic-0.0.1-py3-none-any.whl -y

In [2]:
# ! pip install /Path/to/the/whl/file/torchlogic-0.0.1-py3-none-any.whl

In [3]:
# ! pip install optuna

In [35]:
import warnings
warnings.filterwarnings("ignore", message="Choices for a categorical distribution should be a tuple")
warnings.filterwarnings("ignore", message="To copy construct from a tensor, it is recommended")
warnings.filterwarnings("ignore", message="IProgress not found")
warnings.filterwarnings("ignore", message="Precision is ill-defined")

import copy

import torch
from torch import nn
from torch import optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, SubsetRandomSampler

import optuna
import numpy as np
import pandas as pd
from scipy.special import softmax

from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, roc_auc_score

from torchlogic.models import BanditNRNClassifier
from torchlogic.utils.trainers import BanditNRNTrainer

In [5]:
# If we want the logs from Bandit-RRN training

# from carrot.logger import Logger

# log_config = 'configs/logging.yaml'
# log_dir = 'logs'
# logger = Logger.get(log_config, log_dir)

# Load Data

In [6]:
data = load_iris()
data.target[[10, 25, 50]]
data.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

# Prepare Bandit-RRN Data

A dataset for the Bandit-RRN algorithm in torchlogic must return a dictionary of the following form:

```python
{
    'features': [N_FEATURES], 'target': [N_TARGETS], 'sample_idx': [1]
}
```

- The `features` key contains a tensor of the features used for prediction.  Feature must be numeric and scaled between 0 and 1.

- The `target` key must contain a tensor of the targets, with the values of 0 or 1 for each target.

- The `sample_idx` key must contain a tensor of the row number in the data corresponding to that sample.

## Preprocess Data

In [7]:
mms = MinMaxScaler()
X = mms.fit_transform(data.data)

In [8]:
y = pd.get_dummies(data.target).astype(int).values

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

###############################################################################################################################
# NOTE: The iris dataset is very small.  The validation set is particularly small in this case so we have enough training data.
# For real-world applications a larger test size is likely required.
###############################################################################################################################

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.11, random_state=42)

## Define PyTorch Dataset

In [10]:
class BanditRRNDataset(Dataset):
    def __init__(
            self,
            X: np.array,
            y: np.array
    ):
        """
        Dataset suitable for BanditRRN model from torchlogic

        Args:
            X (np.array): features data scaled to [0, 1]
            y (np.array): target data of classes 0, 1
        """
        super(BanditRRNDataset, self).__init__()
        self.X = X
        self.y = y
        self.sample_idx = np.arange(X.shape[0])  # index of samples

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        features = torch.from_numpy(self.X[idx, :]).float()
        target = torch.from_numpy(self.y[idx, :])
        return {'features': features, 'target': target, 'sample_idx': idx}

## Instantiate Datasets and Data Loaders

In [11]:
train_dataset = BanditRRNDataset(X=X_train, y=y_train)
val_dataset = BanditRRNDataset(X=X_val, y=y_val)
test_dataset = BanditRRNDataset(X=X_test, y=y_test)

In [12]:
g = torch.Generator()
g.manual_seed(42)

def create_holdout_samplers(train_dataset, pct=0.2):
    train_size = len(train_dataset)
    indices = list(range(train_size))
    np.random.seed(0)
    np.random.shuffle(indices)
    
    train_holdout_split_index = int(np.floor(pct * train_size))
    train_idx, train_holdout_idx = indices[train_holdout_split_index:], indices[:train_holdout_split_index]
    
    train_sampler = SubsetRandomSampler(train_idx)
    train_holdout_sampler = SubsetRandomSampler(train_holdout_idx)
    
    return train_sampler, train_holdout_sampler

train_sampler, train_holdout_sampler = create_holdout_samplers(train_dataset)

train_dl = DataLoader(
    train_dataset, batch_size=32, generator=g, sampler=train_sampler,
    pin_memory=False, persistent_workers=False, num_workers=0  # very important to optimize these settings in production
)
train_holdout_dl = DataLoader(
    train_dataset, batch_size=32, generator=g, sampler=train_holdout_sampler,
    pin_memory=False, persistent_workers=False, num_workers=0  # very important to optimize these settings in production
)
val_dl = DataLoader(
    val_dataset, batch_size=32, generator=g, 
    pin_memory=False, persistent_workers=False, num_workers=0  # very important to optimize these settings in production
)
test_dl = DataLoader(
    test_dataset, batch_size=32, generator=g, 
    pin_memory=False, persistent_workers=False, num_workers=0  # very important to optimize these settings in production
)

# Train Bandit-RRN Model

## Prepare the Feature Names

To aid in the explantions of our model, we can set our feature names to natural language that represents the values of the feature.
In the current data, each feature represents a measurement in centimeters and is scaled between 0 to 1, so represents a percentile.
We rename each feature to describe this represenation of our data, which is then used when extracting explanations from the model.

In [13]:
data.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [14]:
data.feature_names = ["the sepal length in cm was", "the sepal width in cm was", 
                      "the petal length in cm was", "the petal width in cm was"]

## Tune Hyper-parameters

In [15]:
torch.random.manual_seed(0)
np.random.seed(0)

class TuneParameters:
    
    def __init__(self, n_trials=10):
        self.best_model = None
        self.best_rn_val_performance = 0.0
        self.n_trials = n_trials

    def _objective(self, trial):

        ########################################################################################################################
        # NOTE: These hyper-parameter settings are specific to the iris flower dataset.  For information on generally useful
        # ranges of hyper-parameters and their descriptions see our documentation: 
        ########################################################################################################################

        # Set Parameters
        
        ## Reinforced Reasoning Network Parameters
        layer_sizes = trial.suggest_categorical('layer_sizes', [(2, ), (3, ), (5, ), 
                                                                (2, 2), (3, 3), (5, 5), 
                                                                (2, 2, 2), (3, 3, 3), (5, 5, 5)])
        n_selected_features_input = trial.suggest_int('n_selected_features_input', low=2, high=3)
        n_selected_features_internal = trial.suggest_int('n_selected_features_internal', low=2, high=min(3, min(layer_sizes)))
        n_selected_features_output = trial.suggest_int('n_selected_features_output', low=2, high=min(3, layer_sizes[-1]))
        perform_prune_plateau_count = trial.suggest_int('perform_prune_plateau_count', low=1, high=1)
        perform_prune_quantile = trial.suggest_float('perform_prune_quantile', low=0.1, high=0.9)
        increase_prune_plateau_count = trial.suggest_int('increase_prune_plateau_count', low=0, high=20)
        increase_prune_plateau_count_plateau_count = trial.suggest_int('increase_prune_plateau_count_plateau_count', low=10, high=30)
        ucb_scale = trial.suggest_float('ucb_scale', low=1.0, high=2.0)
        normal_form = trial.suggest_categorical('normal_form', ['dnf', 'cnf'])
        prune_strategy = trial.suggest_categorical('prune_strategy', ['class', 'logic'])
        delta = trial.suggest_float('delta', low=2.0, high=2.0)
        bootstrap = trial.suggest_categorical('bootstrap', [True, False])
        swa = trial.suggest_categorical('swa', [True, False])
        add_negations = trial.suggest_categorical('add_negations', [True, False])
        weight_init = trial.suggest_float('weight_init', low=0.01, high=1.0)

        ## Optimizer Parameters

        ### Learning Rate
        learning_rate = trial.suggest_float('learning_rate', low=0.01, high=0.2)

        ### L1 Regularization
        use_l1 = trial.suggest_categorical('use_l1', [True, False])
        if use_l1:
            l1_lambda = trial.suggest_float('l1_lambda', low=0.00001, high=0.1)
        else:
            l1_lambda = 0

        ### Weight Decay Regularization
        use_weight_decay = trial.suggest_categorical('use_weight_decay', [True, False])
        if use_weight_decay:
            weight_decay = trial.suggest_float('weight_decay', low=0.00001, high=0.1)
        else:
            weight_decay = 0

        ### Lookahead Optimization
        use_lookahead = trial.suggest_categorical('use_lookahead', [True, False])
        if use_lookahead:
            lookahead_steps = trial.suggest_int('lookahead_steps', low=5, high=10, step=1)
            lookahead_steps_size = trial.suggest_float('lookahead_steps_size', low=0.5, high=0.8)
        else:
            lookahead_steps = 0
            lookahead_steps_size = 0

        ### Data Augmentation
        # augment = trial.suggest_categorical('augment', ['CM', 'MU', 'AT', None])
        augment = trial.suggest_categorical('augment', ['CM', 'MU', None])  # excluding Adversarial Learning because it fails on Jupyter Notebooks
        if augment is not None:
            augment_alpha = trial.suggest_float('augment_alpha', low=0.0, high=1.0)
        else:
            augment_alpha = 0

        ### Early Stopping
        early_stopping_plateau_count = trial.suggest_int('early_stopping_plateau_count', low=20, high=50, step=1)
        
        ## Scheulder parameters
        t_0 = trial.suggest_int('T_0', low=2, high=10, step=1)
        t_mult = trial.suggest_int('T_mult', low=1, high=3, step=1)

        # init model
        model = BanditNRNClassifier(
            target_names=[x + '_label' for x in data.target_names],
            feature_names=data.feature_names,
            input_size=len(data.feature_names),
            output_size=len(data.target_names),
            layer_sizes=layer_sizes,
            n_selected_features_input=n_selected_features_input,
            n_selected_features_internal=n_selected_features_internal,
            n_selected_features_output=n_selected_features_output,
            perform_prune_quantile=perform_prune_quantile,
            ucb_scale=ucb_scale,
            normal_form=normal_form,
            delta=delta,
            prune_strategy=prune_strategy,
            bootstrap=bootstrap,
            swa=swa,
            add_negations=add_negations,
            weight_init=weight_init
        )

        epochs = 100
        accumulation_steps = 1
        optimizer = optim.AdamW(model.rn.parameters(), lr=learning_rate, weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=t_0, T_mult=t_mult)
        trainer = BanditNRNTrainer(
            model=model,
            loss_func=nn.BCELoss(),
            optimizer=optimizer,
            scheduler=scheduler,
            epochs=epochs,
            accumulation_steps=accumulation_steps,
            l1_lambda=l1_lambda,
            early_stopping_plateau_count=early_stopping_plateau_count,
            perform_prune_plateau_count=perform_prune_plateau_count,
            increase_prune_plateau_count=increase_prune_plateau_count,
            increase_prune_plateau_count_plateau_count=increase_prune_plateau_count_plateau_count,
            lookahead_steps=lookahead_steps,
            lookahead_steps_size=lookahead_steps_size,
            augment=augment,
            augment_alpha=augment_alpha,
            class_independent=True
        )

        # train model
        # The trainer defaults to optimizing the validation roc_auc_score.  To optimize against a different metric pass the sklearn metric to the 'evaluation_metric' parameter
        trainer.train(train_dl, train_holdout_dl, evaluation_metric=roc_auc_score, multi_class=True)
        trainer.set_best_state()

        # evaluate model
        predictions, targets = trainer.model.predict(val_dl)
        rn_val_performance = trainer.model.evaluate(
            predictions=predictions,
            labels=targets
        )

        if rn_val_performance > self.best_rn_val_performance:
            self.best_rn_val_performance = rn_val_performance
            self.best_model = copy.copy(trainer.model)
            self.best_model.rn = copy.deepcopy(trainer.model.rn)

        return rn_val_performance
    
    def tune(self):
        # 3. Create a study object and optimize the objective function.
        sampler = optuna.samplers.TPESampler(multivariate=True, group=True, seed=42)
        study = optuna.create_study(direction='maximize', sampler=sampler)
        study.optimize(self._objective, n_trials=self.n_trials)
        return self.best_model

In [None]:
best_model = TuneParameters(50).tune()

In [None]:
predictions, targets = best_model.predict(val_dl)
# Evaluate defaults to compute AUC
rn_val_performance = best_model.evaluate(
    predictions=predictions,
    labels=targets,
    multi_class=False
)
class_predictions = predictions.eq(predictions.max(axis=1), axis=0).astype(int)
predictions_probs = pd.DataFrame(softmax(predictions, axis=1), columns=data.target_names)  # CrossEntropyLoss takes logits, therefore predictions are logits
print("Validation AUC:\n\n", rn_val_performance)

# Evaluate with a different metric
rn_val_performance = best_model.evaluate(
    predictions=predictions,
    labels=targets,
    output_metric=precision_score,
    multi_class=True,
)
print("\n\nValidation Precision Score:\n\n", rn_val_performance)

In [None]:
predictions, targets = best_model.predict(test_dl)
# Evaluate defaults to compute AUC
rn_test_performance = best_model.evaluate(
    predictions=predictions,
    labels=targets,
    multi_class=True
)
class_predictions = predictions.eq(predictions.max(axis=1), axis=0).astype(int)  
predictions_probs = pd.DataFrame(softmax(predictions, axis=1), columns=data.target_names) # CrossEntropyLoss takes logits, therefore predictions are logits
print("Test AUC:\n\n", rn_test_performance)

# Evaluate with a different metric
rn_val_performance = best_model.evaluate(
    predictions=predictions,
    labels=targets,
    output_metric=precision_score,
    multi_class=True
)
print("\n\nTest Precision Score:\n\n", rn_val_performance)

In [19]:
predictions

Unnamed: 0,probs_setosa,probs_versicolor,probs_virginica
0,0.657724,0.414511,0.441087
1,0.723561,0.394362,0.368766
2,0.637375,0.412647,0.50808
3,0.66806,0.404046,0.450843
4,0.659075,0.410299,0.453163
5,0.710396,0.393695,0.379084
6,0.676111,0.399303,0.437005
7,0.679505,0.388562,0.487621
8,0.638697,0.410612,0.465825
9,0.662932,0.406849,0.438732


In [20]:
predictions_probs

Unnamed: 0,setosa,versicolor,virginica
0,0.386201,0.302822,0.310978
1,0.413084,0.297214,0.289703
2,0.37349,0.298319,0.328191
3,0.388694,0.298503,0.312804
4,0.385556,0.300639,0.313806
5,0.408742,0.297789,0.293469
6,0.392845,0.297855,0.309299
7,0.388657,0.290544,0.320799
8,0.379175,0.301845,0.31898
9,0.388616,0.300819,0.310565


In [21]:
class_predictions

Unnamed: 0,probs_setosa,probs_versicolor,probs_virginica
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,1,0,0


In [22]:
targets

Unnamed: 0,setosa_label,versicolor_label,virginica_label
0,0,1,0
1,1,0,0
2,0,0,1
3,0,1,0
4,0,1,0
5,1,0,0
6,0,1,0
7,0,0,1
8,0,1,0
9,0,1,0


# Inspecting the Model

### Global Explain

A global explanation prints the logic learned for each class.  The `quantile` parameter is the percent of the model you would like to be explained.

We represent our features as values scaled between 0 and 1.  Therefore, we intepret the explanations to mean that large values for a particular feature represent `truthiness` of a predicate, while small values represent `falseness` of a predicate.

For example, the following logic for the class `setosa`:

```
A flower is in the setosa class because: 
AND(
    NOT(AND(
            sepal width (cm) >= 0.77405,
            petal length (cm) >= 0.4397)),
    NOT(OR(
            AND(
            sepal width (cm) >= 0.67788,
            petal length (cm) >= 0.20122),
            NOT(sepal width (cm) >= 0.48579))))
```

The `logic` from above is intepreted as:

```
When BOTH of the following are true the class is "setosa":
    1. The flower has a sepal width below the transformed value of 0.77, and has a petal length below the transformed value of 0.44.
    2. The flower has a sepal width below the transformed value of 0.68 and a petal length below the transformed value of 0.20; OR the flower has a sepal width above the transformed value of 0.49.
```

#### Print Types

The explanation and printing methods can all be set to use different `print_type` parameters.  Each will produce a different style of explanation that may be used for different end-users.

`logical`: produces a logic statement format such as:

    AND(x1, x2)
    
`logical-natural`: produces a natural language formatted nested tree format, such as:

    the following are TRUE:
        - x1
        - x2
        
`natural`: produces a natural lanugage paragraph indicating the most important logics, to the least important logics, such as:

    Each one of the following must be met.  x1, and x2.


### Controlling explanation outputs

There are several options to control the output of our explanations that enable us to produce the right type of explanation output for our particular use case.  Some dimensions of control include:

- quantile: When less than 1.0, the most important (i.e. predictive) logic in the model is exposed.  The value corresponds to the quantile of weights that should be included, e.g. 0.4 corresponds to including logic with weights in the top 0.4 quantile or above.
- expkain_type: Can be set to 'both', 'positive' or 'negative'.  Both includes both positive and negative relations, i.e. X and NOT(X).  Optionally, we can subset the explaination to only those logics that are positively or negatively related to our target.
- print_type: Can be set to 'natural' or 'logical'.  Natural explanations are produced using more natural language, while learned logical represenations show the structure of the logic in a concise logical form.
- ignore_uninformative: Can be True or False.  If set to True, then logics that don't provide information about why a prediction is made are excluded.  This could be a logic like, the value of X is greater than the minimum value of X -- all data points will be true.
- rounding_precision: This integer controls how to round the decision boundaries of each logic.
- show_bounds: Can be True or False.  In some cases it may be uninformative or unnecessary to show the decision boundaries of the logic.  For example, if all features are boolean then decision boundaries for each logic are unnecessary.
- inverse_transform: This argument takes the inverse transformation for input features.  If supplied, the bounds from logic will show in the untransformed space, otherwise the bounds will be in the transformed space (i.e. between 0 and 1).

To demonstrate an example of this control, we reduce the size of the long model explanation above using the quantile parameter.  We see that we can explain

In [25]:
print(best_model.explain(
    quantile=1.0,
    required_output_thresholds=np.array(0.5),
    explain_type='both',
    print_type='logical', 
    explanation_prefix="A flower is in the",
    target_names=data.target_names,
    ignore_uninformative=True,
    rounding_precision=5,
    inverse_transform=mms.inverse_transform
))

A flower is in the setosa because: 


AND 
	NOT 
		OR 
			The petal length in cm was >= 6.9
			NOT 
				The petal width in cm was >= 0.1
	OR 
		NOT 
			The petal length in cm was >= 6.9
		NOT 
			The petal width in cm was >= 2.5
		NOT 
			The sepal width in cm was >= 4.4

A flower is in the versicolor because: 


OR 
	NOT 
		The petal width in cm was >= 0.1
	NOT 
		The sepal length in cm was >= 4.3
	NOT 
		The sepal width in cm was >= 2.0

A flower is in the virginica because: 


AND 
	NOT 
		The sepal width in cm was >= 2.0
	OR 
		The petal length in cm was >= 5.58695
		The petal width in cm was >= 0.8709
		The petal width in cm was >= 1.48097


In [26]:
print(best_model.explain(
    quantile=0.4,
    required_output_thresholds=np.array(0.5),
    explain_type='both',
    print_type='logical-natural', 
    explanation_prefix="A flower is in the",
    target_names=data.target_names,
    ignore_uninformative=True,
    rounding_precision=5,
    show_bounds=True,
    inverse_transform=mms.inverse_transform
))

A flower is in the setosa because: 


It was not true that 
	The petal length in cm was >= 6.9

A flower is in the versicolor because: 


It was not true that 
	The petal width in cm was >= 0.1

A flower is in the virginica because: 


It was not true that 
	The sepal width in cm was >= 2.0


### Printing the weights

We can inspect the weights the model learned for each logic to see how important they are to the overall prediction.

In [31]:
best_model.print_samples(
    val_dataset[idx]['features'].unsqueeze(0),
    quantile=1.0,
    target_names=data.target_names, 
    explain_type='both',
    print_type='logical',
    ignore_uninformative=False,
    rounding_precision=5,
    # inverse_transform=mms.inverse_transform
)

REASONING NETWORK MODEL FOR: setosa
Logic at depth 2: NOT(the petal width in cm was >= 0.0), NOT(the sepal width in cm was >= 0.43456), the petal length in cm was >= 0.14846
output: tensor([0.0339, 0.0417, 0.5000])

Logic at depth 1: ['NOT(OR(NOT(the petal width in cm was >= 0.0), NOT(the sepal width in cm was >= 0.43456), the petal length in cm was >= 0.14846))']
weights: tensor([ 0.1858, -0.1280, -0.3253])
output: 0.7083703279495239
required_threshold: 0.6870833039283752

Logic at depth 2: NOT(the petal length in cm was >= 0.91488), NOT(the petal width in cm was >= 0.62991), NOT(the sepal width in cm was >= 1.0)
output: tensor([0.0417, 0.0339, 0.5000])

Logic at depth 1: ['OR(NOT(the petal length in cm was >= 0.91488), NOT(the petal width in cm was >= 0.62991), NOT(the sepal width in cm was >= 1.0))']
weights: tensor([-0.0535, -0.0357, -0.0303])
output: 0.10098778456449509
required_threshold: 0.06949496269226074

Logic at depth 0: ['AND(NOT(OR(NOT(the petal width in cm was >= 0.0), N

In [34]:
best_model.print(
    quantile=1.0,
    required_output_thresholds=torch.tensor(0.5),
    explain_type='both',
    print_type='logical', 
    target_names=data.target_names,
    ignore_uninformative=False,
    rounding_precision=5,
    # inverse_transform=mms.inverse_transform
)

REASONING NETWORK MODEL FOR: setosa
Logic at depth 2: NOT(the petal width in cm was >= 0.0), the petal length in cm was >= 1.0
output: tensor([0.1855, 0.7307, 0.0000])

Logic at depth 1: ['NOT(OR(NOT(the petal width in cm was >= 0.0), the petal length in cm was >= 1.0))']
weights: tensor([ 0.1858, -0.1280, -0.3253])
output: 0.16345000267028809
required_threshold: 0.16192841529846191

Logic at depth 2: NOT(the petal length in cm was >= 1.0), NOT(the petal width in cm was >= 1.0), NOT(the sepal width in cm was >= 1.0)
output: tensor([1., 1., 1.])

Logic at depth 1: ['OR(NOT(the petal length in cm was >= 1.0), NOT(the petal width in cm was >= 1.0), NOT(the sepal width in cm was >= 1.0))']
weights: tensor([-0.0535, -0.0357, -0.0303])
output: 0.0
required_threshold: 0.0

Logic at depth 0: ['AND(NOT(OR(NOT(the petal width in cm was >= 0.0), the petal length in cm was >= 1.0)), OR(NOT(the petal length in cm was >= 1.0), NOT(the petal width in cm was >= 1.0), NOT(the sepal width in cm was >= 1