In [1]:
from lore_sa.dataset import TabularDataset
from lore_sa.lore import TabularGeneticGeneratorLore

In [2]:
target = 'income'

In [3]:
dataset = TabularDataset.from_csv('adult.csv', class_name = target)
dataset.df.dropna(inplace = True)

In [4]:
dataset.df.keys()

Index(['age', 'workclass', 'fnlwgt', 'education', 'educational-num',
       'marital-status', 'occupation', 'relationship', 'race', 'gender',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'income'],
      dtype='object')

In [5]:
dataset.df.drop(["marital-status", "fnlwgt", "educational-num", "occupation", "native-country"], axis=1, inplace=True)

In [6]:
dataset.update_descriptor()

In [7]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from lore_sa.bbox import sklearn_classifier_bbox

def train_model(dataset: TabularDataset):
    numeric_indices = [v['index'] for v in dataset.descriptor['numeric'].values()]
    categorical_indices = [v['index'] for v in dataset.descriptor['categorical'].values()]
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_indices),
            ('cat', OrdinalEncoder(), categorical_indices)
        ]
    )
    model = make_pipeline(preprocessor, RandomForestClassifier(n_estimators=100, random_state=42))
    
    X = dataset.df.drop([target], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X.values, dataset.df[target].values,
                test_size=0.3, random_state=42, stratify=dataset.df[target].values)
    model.fit(X_train, y_train)
    
    return sklearn_classifier_bbox.sklearnBBox(model)

In [8]:
bbox = train_model(dataset)

In [9]:
tabularLore = TabularGeneticGeneratorLore(bbox, dataset)

In [10]:
out = []
for k in dataset.descriptor.keys():
    if k != 'target':
        out.extend(list(dataset.descriptor[k].keys()))
out, dataset.descriptor.keys()

(['age',
  'capital-gain',
  'capital-loss',
  'hours-per-week',
  'workclass',
  'education',
  'relationship',
  'race',
  'gender'],
 dict_keys(['numeric', 'categorical', 'ordinal', 'target']))

In [11]:
dataset.descriptor["target"]

{'income': {'index': 9,
  'distinct_values': ['>50K', '<=50K'],
  'count': {'>50K': 559, '<=50K': 1697}}}

In [12]:
x = (dataset.df.drop([target], axis=1)).iloc[1]

In [13]:
# tabularLore.explain(x)

In [14]:
for k,v in x.items():
    print (k,v)

age 49
workclass Private
education Doctorate
relationship Unmarried
race White
gender Male
capital-gain 7430
capital-loss 0
hours-per-week 40


In [15]:
# tabularLore.interactive_explanation(x, inJupyter=False)

In [16]:
# tabularLore.interactive_explanation(inJupyter=False)

In [17]:
from lore_sa.webapp import Webapp
webapp = Webapp()

In [None]:
webapp.launch_demo()

INFO:     Started server process [6580]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


Launching LORE_sa Demo Application
Starting API server on 0.0.0.0:8000
INFO:     127.0.0.1:54657 - "GET /api/get-datasets HTTP/1.1" 200 OK
API server is ready at http://localhost:8000
NPM version detected: 10.9.3
Dependencies already installed, skipping npm install
Application started successfully!
API: http://localhost:8000/docs#/
Client: http://localhost:8080
Opening http://localhost:8080 in your default browser...
Browser opened successfully!


INFO:     127.0.0.1:53946 - "OPTIONS /api/check-custom-data HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "GET /api/check-custom-data HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "GET /api/get-datasets HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "GET /api/get-dataset-info/iris HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "GET /api/get-classifiers HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "GET /api/get-classifiers HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "OPTIONS /api/train-model HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "POST /api/train-model HTTP/1.1" 200 OK
INFO:     127.0.0.1:53946 - "OPTIONS /api/explain HTTP/1.1" 200 OK
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.8               3.0               4.35               1.3 instance_df
[[5.8  3.   4.35 1.3 ]] encoded_instance
OrderedDict([('sepal length (cm)', 5.8), ('sepal width (cm)', 3), ('petal length (cm)', 4.35), ('petal width (cm)', 1.3)]) instance
[5.8  3.   4.35 1.3 