# Credit Prediction

## Load Data
This dataset classifies people described by a set of attributes as good or bad credit risks.

In [None]:
from xautoml.util.datasets import openml_task

X_train, y_train = openml_task(31, 0, train=True)
X_train

## Start the Model Building

Use `auto-sklearn` to train a classifier on the training data.

By default, `auto-sklearn` only keeps the models used in the ensemble in memory and deletes all other models after completing the optimization. If you want to analyse all constructed models, you will have to provide `delete_tmp_folder_after_terminate`, `max_models_on_disc` and `tmp_folder` has done in the following cell.

In [None]:
import shutil
import os
import autosklearn.classification
from autosklearn.metrics import accuracy

workdir = './_auto-sklearn_/'
if os.path.exists(workdir):
    shutil.rmtree(workdir)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=20,
    metric=accuracy,
    # Optional: Set the following three parameters to analyse all models generate by auto-sklearn. Otherwise, you can only inspect the top 50 models.
    delete_tmp_folder_after_terminate=False,
    max_models_on_disc=None,
    tmp_folder=workdir

)
automl.fit(X_train, y_train, dataset_name='credit-g')

## Visualize the Optimization Run in XAutoML

In [None]:
from xautoml.main import XAutoML
from xautoml.adapter import import_auto_sklearn

X_test, y_test = openml_task(31, 0, test=True)

rh = import_auto_sklearn(automl)
main = XAutoML(rh, X_test, y_test)
main