# Credit Prediction

## Load Data
This dataset classifies people described by a set of attributes as good or bad credit risks.

In [1]:
from xautoml.util.datasets import openml_task

X_train, y_train = openml_task(31, 0, train=True)
X_train

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,>=200,18,existing paid,furniture/equipment,3049.0,<100,<1,1,female div/dep/mar,none,1,life insurance,45,stores,own,1,unskilled resident,1,none,yes
1,no checking,24,critical/other existing credit,education,1927.0,no known savings,1<=X<4,3,female div/dep/mar,none,2,car,33,none,own,2,skilled,1,yes,yes
2,no checking,12,existing paid,new car,640.0,<100,1<=X<4,4,male div/sep,none,2,real estate,49,none,own,1,unskilled resident,1,none,yes
3,0<=X<200,6,existing paid,radio/tv,2063.0,<100,<1,4,male mar/wid,none,3,car,30,none,rent,1,high qualif/self emp/mgmt,1,yes,yes
4,<0,12,critical/other existing credit,used car,1526.0,<100,>=7,4,male single,none,4,no known property,66,none,for free,2,high qualif/self emp/mgmt,1,none,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,0<=X<200,30,critical/other existing credit,new car,4249.0,<100,unemployed,4,male mar/wid,none,2,car,28,none,own,2,high qualif/self emp/mgmt,1,none,yes
896,0<=X<200,24,critical/other existing credit,radio/tv,1216.0,100<=X<500,<1,4,male single,none,4,no known property,38,bank,own,2,skilled,2,none,yes
897,<0,36,existing paid,furniture/equipment,2712.0,<100,>=7,2,male single,none,2,life insurance,41,bank,own,1,skilled,2,none,yes
898,<0,24,all paid,furniture/equipment,3552.0,<100,4<=X<7,3,male single,none,4,car,27,bank,own,1,skilled,1,none,yes


## Start the Model Building

Use `auto-sklearn` to train a classifier on the training data.

By default, `auto-sklearn` only keeps the models used in the ensemble in memory and deletes all other models after completing the optimization. If you want to analyse all constructed models, you will have to provide `delete_tmp_folder_after_terminate`, `max_models_on_disc` and `tmp_folder` has done in the following cell.

In [2]:
import shutil
import os
import autosklearn.classification
from autosklearn.metrics import accuracy

workdir = './_auto-sklearn_/'
if os.path.exists(workdir):
    shutil.rmtree(workdir)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=20,
    metric=accuracy,
    # Optional: Set the following three parameters to analyse all models generate by auto-sklearn. Otherwise, you can only inspect the top 50 models.
    delete_tmp_folder_after_terminate=False,
    max_models_on_disc=None,
    tmp_folder=workdir

)
automl.fit(X_train, y_train, dataset_name='credit-g')

AutoSklearnClassifier(delete_tmp_folder_after_terminate=False,
                      max_models_on_disc=None, metric=accuracy,
                      per_run_time_limit=20, time_left_for_this_task=60,
                      tmp_folder='./_auto-sklearn_/')

## Visualize the Optimization Run in XAutoML

In [3]:
from xautoml.main import XAutoML
from xautoml.adapter import import_auto_sklearn

X_test, y_test = openml_task(31, 0, test=True)

rh = import_auto_sklearn(automl)
main = XAutoML(rh, X_test, y_test)
main

<xautoml.main.XAutoML at 0x7fee0c5d0ee0>

In [None]:
xautoml_report = gcx().get_class_report('00:03:04')
xautoml_report

In [None]:
#from xautoml._helper import gcx

xautoml_X, xautoml_y, xautoml_pipeline = gcx().get_pipeline('00:03:04')
xautoml_pipeline

In [None]:
gcx().fanova_overview('00:03', 'SOURCE')