In [1]:
from lore_sa.dataset import TabularDataset
import pandas as pd
import json
import numpy as np

In [2]:
dataset = TabularDataset.from_csv('iris.csv', class_name = "variety")
dataset.df.dropna(inplace = True)

In [3]:
dataset.df.keys()

Index(['sepal.length', 'sepal.width', 'petal.length', 'petal.width',
       'variety'],
      dtype='object')

In [4]:
# dataset.df.drop(['fnlwgt', 'educational-num'], inplace=True, axis=1)
dataset.update_descriptor()

In [7]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from lore_sa.bbox import sklearn_classifier_bbox


def train_model(dataset: TabularDataset):
    numeric_indices = [v['index'] for v in dataset.descriptor['numeric'].values()]
    categorical_indices = [v['index'] for v in dataset.descriptor['categorical'].values()]
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_indices),
            ('cat', OrdinalEncoder(), categorical_indices)
        ]
    )
    model = make_pipeline(preprocessor, RandomForestClassifier(n_estimators=100, random_state=42))
    
    X_train, X_test, y_train, y_test = train_test_split(dataset.df.loc[:, 'sepal.length':'petal.width'].values, dataset.df['variety'].values,
                test_size=0.3, random_state=42, stratify=dataset.df['variety'].values)
    model.fit(X_train, y_train)
    
    return sklearn_classifier_bbox.sklearnBBox(model)

In [8]:
bbox = train_model(dataset)

In [9]:
from lore_sa.lore import TabularGeneticGeneratorLore

tabularLore = TabularGeneticGeneratorLore(bbox, dataset)

In [18]:
out = []
for k in dataset.descriptor.keys():
    if k != 'target':
        out.extend(list(dataset.descriptor[k].keys()))
out, dataset.descriptor.keys()

(['sepal.length', 'sepal.width', 'petal.length', 'petal.width'],
 dict_keys(['numeric', 'categorical', 'ordinal', 'target']))

In [11]:
num_row = 10
x = dataset.df.iloc[num_row][:-1]
tabularLore.explain_instance (x)

{'rule': {'premises': [{'attr': 'petal.length',
    'val': 2.43836772441864,
    'op': '<='}],
  'consequence': {'attr': 'variety', 'val': 'Setosa', 'op': '='}},
 'counterfactuals': [{'premises': [{'attr': 'petal.length',
     'val': 2.5992462635040283,
     'op': '<='},
    {'attr': 'petal.length', 'val': 2.43836772441864, 'op': '>'},
    {'attr': 'petal.width', 'val': 1.9408769607543945, 'op': '<='}],
   'consequence': {'attr': 'variety', 'val': 'Versicolor', 'op': '='}}],
 'fidelity': 1.0,
 'deltas': [[{'att': 'petal.length', 'op': '>', 'thr': 2.43836772441864}]],
 'counterfactual_samples': [[4.978399200848428,
   3.7,
   5.859611315634264,
   2.436098127833387],
  [7.829862548036328, 3.7, 5.662503558674611, 0.2],
  [6.3253298302237635, 3.7, 4.365627869045396, 1.1609794478889635],
  [6.214391623721293,
   4.397652440853118,
   2.463139479140674,
   1.0519201965449232],
  [6.214391623721293,
   4.397652440853118,
   2.463139479140674,
   1.0519201965449232],
  [6.214391623721293,
   