In [1]:
from lore_sa.dataset import TabularDataset
from lore_sa.lore import TabularGeneticGeneratorLore

In [2]:
target = 'income'

In [3]:
dataset = TabularDataset.from_csv('adult.csv', class_name = target)
dataset.df.dropna(inplace = True)

In [4]:
dataset.df.keys()

Index(['age', 'workclass', 'fnlwgt', 'education', 'educational-num',
       'marital-status', 'occupation', 'relationship', 'race', 'gender',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'income'],
      dtype='object')

In [5]:
dataset.df.drop(["marital-status", "fnlwgt", "educational-num", "occupation", "native-country"], axis=1, inplace=True)

In [6]:
dataset.update_descriptor()

In [7]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from lore_sa.bbox import sklearn_classifier_bbox

def train_model(dataset: TabularDataset):
    numeric_indices = [v['index'] for v in dataset.descriptor['numeric'].values()]
    categorical_indices = [v['index'] for v in dataset.descriptor['categorical'].values()]
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_indices),
            ('cat', OrdinalEncoder(), categorical_indices)
        ]
    )
    model = make_pipeline(preprocessor, RandomForestClassifier(n_estimators=100, random_state=42))
    
    X = dataset.df.drop([target], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X.values, dataset.df[target].values,
                test_size=0.3, random_state=42, stratify=dataset.df[target].values)
    model.fit(X_train, y_train)
    
    return sklearn_classifier_bbox.sklearnBBox(model)

In [8]:
bbox = train_model(dataset)

In [9]:
tabularLore = TabularGeneticGeneratorLore(bbox, dataset)

In [10]:
out = []
for k in dataset.descriptor.keys():
    if k != 'target':
        out.extend(list(dataset.descriptor[k].keys()))
out, dataset.descriptor.keys()

(['age',
  'capital-gain',
  'capital-loss',
  'hours-per-week',
  'workclass',
  'education',
  'relationship',
  'race',
  'gender'],
 dict_keys(['numeric', 'categorical', 'ordinal', 'target']))

In [11]:
dataset.descriptor["target"]

{'income': {'index': 9,
  'distinct_values': ['>50K', '<=50K'],
  'count': {'>50K': 559, '<=50K': 1697}}}

In [12]:
x = (dataset.df.drop([target], axis=1)).iloc[1]

In [13]:
explanation = tabularLore.explain(x)

In [14]:
explanation

{'rule': {'premises': [{'attr': 'capital-gain', 'val': 6958.5, 'op': '>'}],
  'consequence': {'attr': 'income', 'val': '>50K', 'op': '='}},
 'counterfactuals': [{'premises': [{'attr': 'capital-gain',
     'val': 6958.5,
     'op': '<='},
    {'attr': 'age', 'val': 40.0, 'op': '>'},
    {'attr': 'relationship', 'val': 'Husband', 'op': '!='}],
   'consequence': {'attr': 'income', 'val': '<=50K', 'op': '='}}],
 'fidelity': 1.0,
 'deltas': [[{'att': 'capital-gain', 'op': '<=', 'thr': 6958.5}]],
 'counterfactual_samples': [[49,
   'Private',
   'Doctorate',
   'Not-in-family',
   'White',
   'Male',
   1369,
   0,
   40],
  [20,
   'Self-emp-not-inc',
   '1st-4th',
   'Unmarried',
   'White',
   'Male',
   6427,
   0,
   8],
  [19, 'Private', 'Doctorate', 'Unmarried', 'White', 'Male', 6427, 2714, 23],
  [20,
   'Self-emp-not-inc',
   'Doctorate',
   'Not-in-family',
   'White',
   'Female',
   6427,
   0,
   8],
  [20,
   'Self-emp-not-inc',
   'Doctorate',
   'Not-in-family',
   'White',
 

In [15]:
list(explanation.keys())

['rule',
 'counterfactuals',
 'fidelity',
 'deltas',
 'counterfactual_samples',
 'counterfactual_predictions',
 'feature_importances']

In [16]:
explanation["deltas"]

[[{'att': 'capital-gain', 'op': '<=', 'thr': 6958.5}]]

In [17]:
for k,v in x.items():
    print (k,v)

age 49
workclass Private
education Doctorate
relationship Unmarried
race White
gender Male
capital-gain 7430
capital-loss 0
hours-per-week 40


In [None]:
tabularLore.interactive_explanation(x, inJupyter=False)

INFO:     Started server process [1788]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


Launching LORE_sa explanation viz webapp
Starting API server on 0.0.0.0:8000
INFO:     127.0.0.1:59916 - "GET /api/get-datasets HTTP/1.1" 200 OK
API server is ready at http://localhost:8000
NPM version detected: 10.9.3
Dependencies already installed, skipping npm install
Application started successfully!
API: http://localhost:8000/docs#/
Client: http://localhost:8080
Opening http://localhost:8080 in your default browser...
Browser opened successfully!


INFO:     127.0.0.1:64374 - "OPTIONS /api/check-custom-data HTTP/1.1" 200 OK
INFO:     127.0.0.1:64374 - "GET /api/check-custom-data HTTP/1.1" 200 OK
INFO:     127.0.0.1:50198 - "GET /api/get-datasets HTTP/1.1" 200 OK
INFO:     127.0.0.1:50209 - "GET /api/get-datasets HTTP/1.1" 200 OK
INFO:     127.0.0.1:54007 - "GET /api/check-custom-data HTTP/1.1" 200 OK
INFO:     127.0.0.1:54007 - "GET /api/get-datasets HTTP/1.1" 200 OK
INFO:     127.0.0.1:61307 - "OPTIONS /api/check-custom-data HTTP/1.1" 200 OK
INFO:     127.0.0.1:61307 - "GET /api/check-custom-data HTTP/1.1" 200 OK
INFO:     127.0.0.1:61307 - "GET /api/get-datasets HTTP/1.1" 200 OK
INFO:     127.0.0.1:52535 - "OPTIONS /api/check-custom-data HTTP/1.1" 200 OK
INFO:     127.0.0.1:52535 - "GET /api/check-custom-data HTTP/1.1" 200 OK
INFO:     127.0.0.1:52535 - "GET /api/get-datasets HTTP/1.1" 200 OK


In [19]:
tabularLore.interactive_explanation(inJupyter=True)

Launching LORE_sa explanation viz webapp
Starting API server on 0.0.0.0:8000


INFO:     Started server process [1788]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 10048] error while attempting to bind on address ('0.0.0.0', 8000): only one usage of each socket address (protocol/network address/port) is normally permitted
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


API server is ready at http://localhost:8000
NPM version detected: 10.9.3
Dependencies already installed, skipping npm install
Application started successfully!
API: http://localhost:8000/docs#/
Client: http://localhost:8080


In [20]:
import os
os.environ["INSTANCE_PROVIDED"], os.environ["CUSTOM_DATA_LOADED"]

('false', 'true')

In [21]:
from lore_sa.webapp.routes.webapp_api_state import webapp_state

In [22]:
webapp_state.dataset.df

Unnamed: 0,age,workclass,education,relationship,race,gender,capital-gain,capital-loss,hours-per-week,income
0,44,Federal-gov,Assoc-acdm,Husband,White,Male,0,0,40,>50K
1,49,Private,Doctorate,Unmarried,White,Male,7430,0,40,>50K
2,26,Local-gov,Bachelors,Own-child,Black,Female,0,0,42,<=50K
3,31,Private,Bachelors,Husband,White,Male,0,0,50,>50K
4,43,Private,Bachelors,Unmarried,Black,Female,0,0,35,>50K
...,...,...,...,...,...,...,...,...,...,...
2438,41,Local-gov,Prof-school,Husband,White,Male,0,0,50,>50K
2439,51,Private,Masters,Husband,White,Male,7298,0,50,>50K
2440,56,Private,HS-grad,Husband,White,Male,0,0,36,>50K
2441,35,Private,HS-grad,Husband,White,Male,0,0,40,<=50K


In [23]:
from lore_sa.webapp import Webapp
webapp = Webapp()

In [24]:
webapp.launch_demo()

Launching LORE_sa Demo Application
Starting API server on 0.0.0.0:8000


INFO:     Started server process [1788]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 10048] error while attempting to bind on address ('0.0.0.0', 8000): only one usage of each socket address (protocol/network address/port) is normally permitted
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


API server is ready at http://localhost:8000
NPM version detected: 10.9.3
Dependencies already installed, skipping npm install
Application started successfully!
API: http://localhost:8000/docs#/
Client: http://localhost:8080
Opening http://localhost:8080 in your default browser...
Browser opened successfully!
