# Credit Prediction

## Load Data
This dataset classifies people described by a set of attributes as good or bad credit risks.

In [None]:
from xautoml.util.datasets import openml_task

X_train, y_train = openml_task(31, 0, train=True)
X_train

## Start the Model Building

You load the data set in an AutoML tool you have found on the internet, to create a predictive model. After starting the optimization, the AutoML tool tests various possible models and evaluates how good each candidate is. In the meantime you have to wait for the program to finish its optimization.

In [None]:
import numpy as np
from scipy.stats import uniform, randint
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler

# Specify search space
param_distribution = {
    'k_best__k': randint(1, X_train.shape[1]),
    'clf__min_samples_split': uniform(loc=0, scale=0.5),
    'clf__criterion': ['gini', 'entropy']
}

cat_columns = make_column_selector(dtype_exclude=np.number)(X_train)
num_columns = make_column_selector(dtype_include=np.number)(X_train)

# Creating the classifier
pipeline = Pipeline(steps=[
    ('enc', ColumnTransformer([
        ('ordinal', OrdinalEncoder(), cat_columns),
        ('scaler', MinMaxScaler(), num_columns)
    ])),
    ('k_best', SelectKBest()),
    ('clf', RandomForestClassifier())
])

random_search = RandomizedSearchCV(pipeline, param_distributions=param_distribution, cv=3, scoring='accuracy',
                                   n_iter=100, random_state=0)
random_search.fit(X_train, y_train)

## Visualize the Optimization Run in XAutoML

In [None]:
from xautoml.main import XAutoML
from xautoml.adapter import import_sklearn
from xautoml.util.datasets import openml_task

X_test, y_test = openml_task(31, 0, test=True)

rh = import_sklearn(random_search)
main = XAutoML(rh, X_test, y_test)
main