## Load the data set

In [None]:
from xautoml.util.datasets import hearts

X_train, y_train = hearts('../heart.csv', train=True)
X_train

## Start the Model Building

In [None]:
import pickle

from dswizard.components.classification.random_forest import RandomForest
from dswizard.components.data_preprocessing.knn_imputer import KNNImputerComponent
from dswizard.components.data_preprocessing.standard_scaler import StandardScalerComponent
from dswizard.components.pipeline import ConfigurablePipeline
from dswizard.components.feature_preprocessing.one_hot_encoding import OneHotEncoderComponent
from dswizard.components.sklearn import ColumnTransformerComponent
from dswizard.optimizers.structure_generators.fixed import FixedStructure
from dswizard.optimizers.bandit_learners import PseudoBandit
from dswizard.optimizers.config_generators import Hyperopt
from dswizard.core.master import Master
from dswizard.core.model import Dataset
from dswizard.util import util

util.setup_logging('output/fixed/log.txt')

ds = Dataset(X_train.values, y_train.values, task=-2, metric='accuracy', feature_names=X_train.columns)

steps = [
    ('data_preprocessing', ColumnTransformerComponent(
        [('one_hot_encoder', OneHotEncoderComponent(), [1, 2, 6, 8, 10]),
         ('numerical',
          ConfigurablePipeline(steps=[('knn_imputer', KNNImputerComponent()), ('standard_scaler', StandardScalerComponent())]),
          [0, 3, 4, 5, 7, 9])],
        remainder='passthrough')),
    ('random_forest', RandomForest())
]

master = Master(
    ds=ds,
    working_directory='output/fixed/',
    n_workers=1,
    model='rf_complete.pkl',

    wallclock_limit=300,
    cutoff=10,
    pre_sample=False,

    config_generator_class=Hyperopt,
    config_generator_kwargs={'min_points_in_model': 50},
    structure_generator_class=FixedStructure,
    structure_generator_kwargs={'steps': steps},
    bandit_learner_class=PseudoBandit
)

pipeline, run_history, ensemble = master.optimize()

with open('output/fixed/dswizard.pkl', 'wb') as f:
    pickle.dump((run_history, ensemble), f)


In [None]:
import pickle

with open('output/fixed/dswizard.pkl', 'rb') as f:
    run_history, ensemble = pickle.load(f)

## Get the Score of the Final Ensemble

In [None]:
from sklearn.metrics import accuracy_score

X_test, y_test = hearts('../heart.csv', test=True)

predictions = ensemble.predict(X_test.values)
accuracy_score(y_test, predictions)

## View the Models found by dswizard

In [None]:
from xautoml.main import XAutoML
from xautoml.adapter import import_dswizard

rh = import_dswizard(run_history, ensemble)
main = XAutoML(rh, X_test, y_test)
main