In [None]:
import pandas as pd
import hana_ml
from hana_ml import dataframe
from data_load_utils import Settings
from hana_ml.algorithms.pal.trees import DecisionTreeClassifier, RandomForestClassifier, HybridGradientBoostingClassifier

url, port, user, pwd = Settings.load_config("../../config/e2edata.ini")
connection_context = dataframe.ConnectionContext(url, port, user, pwd)

# Model Debriefing for Decision Tree

## Create Table

In [None]:
table_name = '#PAL_VISUALIZE_DT_DATA_TBL'

create_table_sql = '''CREATE LOCAL TEMPORARY TABLE {} (
    "OUTLOOK" VARCHAR(20),
    "TEMP" INTEGER,
    "HUMIDITY" DOUBLE,
    "WINDY" VARCHAR(10),
    "CLASS" VARCHAR(20)
);'''
data = [('Sunny', 75, 70, 'Yes', 'Play'),
       ('Sunny', 80, 90, 'Yes', 'Do not Play'),
       ('Sunny', 85, 85, 'No', 'Do not Play'),
       ('Sunny', 72, 95, 'No', 'Do not Play'),
       ('Sunny', 69, 70, 'No', 'Play'),
       ('Overcast', 72, 90, 'Yes', 'Play'),
       ('Overcast', 83, 78, 'No', 'Play'),
       ('Overcast', 64, 65, 'Yes', 'Play'),
       ('Overcast', 81, 75, 'No', 'Play'),
       ('Rain', 71, 80, 'Yes', 'Do not Play'),
       ('Rain', 65, 70, 'Yes', 'Do not Play'),
       ('Rain', 75, 80, 'No', 'Play'),
       ('Rain', 68, 80, 'No', 'Play'),
       ('Rain', 70, 96, 'No', 'Play')]

In [None]:
cursor = connection_context.connection.cursor()
cursor.execute(create_table_sql.format(table_name))
cursor.executemany("INSERT INTO {} VALUES ({})".format(table_name,', '.join(['?']*len(data[0]))), data)

## Create Data Frame

In [None]:
df = connection_context.table(table_name)
df.head(6).collect()

## Import TreeModelDebriefing Class

In [None]:
from hana_ml.visualizers.model_debriefing import TreeModelDebriefing

## Generate Model

In [None]:
dtc = DecisionTreeClassifier(algorithm='c45',
                       min_records_of_parent=2, min_records_of_leaf=1,
                       thread_ratio=0.4, split_threshold=1e-5,
                       model_format='json', output_rules=True)

dtc.fit(data=df, features=['OUTLOOK', 'TEMP', 'HUMIDITY', 'WINDY'], label='CLASS')

## Visualize Tree Model in JSON format

In [None]:
TreeModelDebriefing.tree_debrief(dtc.model_)

## Export Tree Model as a HTML file[JSON format]

In [None]:
# TreeModelDebriefing.tree_export(dtc.model_, 'Test')

## Visualize Tree Model in DOT format

In [None]:
TreeModelDebriefing.tree_parse(dtc.model_)

In [None]:
TreeModelDebriefing.tree_debrief_with_dot(dtc.model_,iframe_height=500)

## Export Tree Model as a HTML file[DOT format]

In [None]:
# TreeModelDebriefing.tree_export_with_dot(dtc.model_, 'Test')

# Model Debriefing for Random Decision Tree

## Generate Model

In [None]:
rfc = RandomForestClassifier(n_estimators=3,
                             max_features=3, random_state=2,
                             split_threshold=0.00001,
                             calculate_oob=True,
                             min_samples_leaf=1, thread_ratio=1.0)

rfc.fit(data=df, features=['OUTLOOK', 'TEMP', 'HUMIDITY', 'WINDY'], label='CLASS')

In [None]:
res = rfc.predict(data=df.add_id("ID"), key="ID", features=['OUTLOOK', 'TEMP', 'HUMIDITY', 'WINDY'], verbose=True)

In [None]:
res.head(6).collect()

## Visualize Tree Model in XML format

In [None]:
TreeModelDebriefing.tree_debrief(rfc.model_)

## Visualize Tree Model in DOT format

In [None]:
TreeModelDebriefing.tree_parse(rfc.model_)
TreeModelDebriefing.tree_debrief_with_dot(rfc.model_)

## Detele Table

In [None]:
delete_table_sql = 'DROP TABLE {}'
cursor.execute(delete_table_sql.format(table_name))

# Model Debriefing for Hybrid Gradient Boosting Tree

## Create Table

In [None]:
table_name = 'PAL_VISUALIZE_HGBT_DATA_TBL'
create_table_sql = '''CREATE COLUMN TABLE {} (
    "ATT1" DOUBLE, 
    "ATT2" DOUBLE, 
    "ATT3" DOUBLE,
    "ATT4" DOUBLE,
    "LABEL" varchar(50)
);'''
data = [(1.0, 10.0, 100, 1.0, 'A'),
       (1.1, 10.1, 100, 1.0, 'A'),
       (1.2, 10.2, 100, 1.0, 'A'),
       (1.3, 10.4, 100, 1.0, 'A'),
       (1.2, 10.3, 100, 1.0, 'A'),
       (4.0, 40.0, 400, 4.0, 'B'),
       (4.1, 40.1, 400, 4.0, 'B'),
       (4.2, 40.2, 400, 4.0, 'B'),
       (4.3, 40.4, 400, 4.0, 'B'),
       (4.2, 40.3, 400, 4.0, 'A'),
       (9.0, 90.0, 900, 2.0, 'A'),
       (9.1, 90.1, 900, 1.0, 'B'),
       (9.2, 90.2, 900, 2.0, 'B'),
       (9.3, 90.4, 900, 1.0, 'B'),
       (9.2, 90.3, 900, 1.0, 'B')]

In [None]:
cursor = connection_context.connection.cursor()
cursor.execute(create_table_sql.format(table_name))
cursor.executemany("INSERT INTO {} VALUES ({})".format(table_name,', '.join(['?']*len(data[0]))), data)

## Create Data Frame

In [None]:
hgbt_df = connection_context.table(table_name)
hgbt_df.head(6).collect()

## Generate Model

In [None]:
hgbc = HybridGradientBoostingClassifier(
          n_estimators = 4, split_threshold=0,
          learning_rate=0.5, fold_num=5, max_depth=6,
          evaluation_metric = 'error_rate', ref_metric=['auc'],
          param_range=[('learning_rate',[0.1, 0.45, 1.0]),
                       ('n_estimators', [4, 3, 10]),
                       ('split_threshold', [0.1, 0.45, 1.0])])
hgbc.fit(data=hgbt_df, features=['ATT1', 'ATT2', 'ATT3', 'ATT4'], label='LABEL')

## Visualize Tree Model in JSON format

In [None]:
TreeModelDebriefing.tree_debrief(hgbc.model_)

## Visualize Tree Model in DOT format

In [None]:
TreeModelDebriefing.tree_parse(hgbc.model_)
TreeModelDebriefing.tree_debrief_with_dot(hgbc.model_)

## Delete Table

In [None]:
delete_table_sql = 'DROP TABLE {}'
cursor.execute(delete_table_sql.format(table_name))