## Check the setup and connect to the database

In [None]:
%run "010-check_setup.ipynb"

# Tables from SAP HANA

In [None]:
column_id="PassengerId"
column_label="Survived"
features_subset=[
            "NameFirstLetter",
            "FarePerPerson",
            "SibSp",
            "ParCh",
            "Gender",
            "PClass",
            "Embarked",
            "Title",
            "Age",
            "PersonsPerTicket"
]

In [None]:
hdf_titanic_train = myconn.table("DATA_LABELED_FE").select(
            [column_id]+features_subset+[column_label]
        ).cast(column_label, 'NVARCHAR(1)')

# Model training with different [Hyperparameters](https://en.wikipedia.org/wiki/Hyperparameter_(machine_learning))

In [None]:
import hana_ml.algorithms.pal.unified_classification as hml_uc

In [None]:
rdt_params = dict(random_state=2,
                  min_samples_leaf=10,
                  n_estimators=5,
                  max_depth=2)

In [None]:
uc_rdt_v6 = hml_uc.UnifiedClassification(func='RandomDecisionTree', **rdt_params)

In [None]:
uc_rdt_v6.fit(
    data=hdf_titanic_train,
    key='PassengerId', label='Survived',
    training_percent=0.8,
    partition_method='stratified', stratified_column='Survived', 
    partition_random_state=2
);

In [None]:
uc_rdt_v6.get_parameters()

## Generate a model report

In [None]:
from hana_ml.visualizers.unified_report import UnifiedReport
UnifiedReport(uc_rdt_v6).build().display()

## Debrief the model

In [None]:
from hana_ml.visualizers.model_debriefing import TreeModelDebriefing

In [None]:
TreeModelDebriefing.tree_debrief_with_dot(uc_rdt_v6.model_[0]);

# Store the model

In [None]:
from hana_ml.model_storage import ModelStorage

In [None]:
ms = ModelStorage(myconn)

In [None]:
uc_rdt_v6.name = 'CodeJam-Titanic-Classification'
uc_rdt_v6.version = 6

In [None]:
ms.save_model(model=uc_rdt_v6, if_exists='replace', save_report=False)

In [None]:
ms.list_models()

# Call prediction

In [None]:
hdf_titanic_test=myconn.table('DATA_TO_PREDICT_FE').select(
            [column_id]+features_subset
        )

In [None]:
hdf_res = uc_rdt_v6.predict(hdf_titanic_test, key = 'PassengerId')

In [None]:
display(hdf_res.select('PassengerId', 'SCORE', 'CONFIDENCE').head(4).collect())

## Global Interpretation using [Shapley values](https://en.wikipedia.org/wiki/Shapley_value#In_machine_learning)

Create Shapley explainer to explain the output of machine learning model: https://help.sap.com/doc/cd94b08fe2e041c2ba778374572ddba9/latest/en-US/hana_ml.visualizers.html#hana_ml.visualizers.model_debriefing.TreeModelDebriefing.shapley_explainer

In [None]:
from hana_ml.visualizers.model_debriefing import TreeModelDebriefing
import numpy as np

In [None]:
#np.set_printoptions(legacy="1.25")

shapley_explainer = TreeModelDebriefing.shapley_explainer(hdf_res, hdf_titanic_test, key='PassengerId', label='Survived')
shapley_explainer.summary_plot()