# Credit Prediction

## Load Data
This dataset classifies people described by a set of attributes as good or bad credit risks.

In [None]:
from xautoml.util.datasets import openml_task

X_train, y_train = openml_task(31, 0, train=True)
X_train

## Install Optuna
If you haven't installed Optuna yet, you can install it via

```
pip install optuna
```

## Start the Model Building

You load the data set in an AutoML tool you have found on the internet, to create a predictive model. After starting the optimization, the AutoML tool tests various possible models and evaluates how good each candidate is. In the meantime you have to wait for the program to finish its optimization.

In [None]:
import numpy as np
import optuna
import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm
from optuna import Trial
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler

models = {}


def objective(trial: Trial):
    hyperparameters = {
        'k_best__k': trial.suggest_int('k_best__k', 1, X_train.shape[1]),
        'clf__min_samples_split': trial.suggest_float('clf__min_samples_split', 0, 0.5),
        'clf__max_features': trial.suggest_float('clf__max_features', 0, 1),
        'clf__criterion': trial.suggest_categorical('clf__criterion', ['gini', 'entropy'])
    }

    cat_columns = make_column_selector(dtype_exclude=np.number)(X_train)
    num_columns = make_column_selector(dtype_include=np.number)(X_train)

    pipeline = Pipeline(steps=[
        ('enc', ColumnTransformer([
            ('ordinal', OrdinalEncoder(), cat_columns),
            ('scaler', MinMaxScaler(), num_columns)
        ])),
        ('k_best', SelectKBest()),
        ('clf', RandomForestClassifier())
    ])
    pipeline.set_params(**hyperparameters)

    score = sklearn.model_selection.cross_val_score(pipeline, X_train, y_train, cv=3)
    accuracy = score.mean()

    # Store fitted model
    models[trial.number] = pipeline.fit(X_train, y_train)

    return accuracy


study = optuna.create_study(sampler=optuna.samplers.TPESampler(), direction='maximize')
study.optimize(objective, n_trials=200)

## Visualize the Optimization Run in XAutoML

In [None]:
from xautoml.main import XAutoML
from xautoml.adapter import import_optuna
from xautoml.util.datasets import openml_task

X_test, y_test = openml_task(31, 0, test=True)

rh = import_optuna(study, models, metric='accuracy')
main = XAutoML(rh, X_test, y_test)
main