# Python HANA ML API

<div class="alert alert-block alert-info">
<b>Using Composite Variables in Predictive Model.</b> <br>
</div>

### Declare Training Data

In [1]:
# Connect using the HANA secure user store 
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
# Get Training Data
sql_cmd = 'SELECT * FROM "APL_SAMPLES"."AUTO_CLAIMS_FRAUD" ORDER BY CLAIM_ID'
training_data = hd.DataFrame(conn, sql_cmd)
training_data.head(8).collect()

Unnamed: 0,CLAIM_ID,DAYS_TO_REPORT,BODILY_INJURY_AMOUNT,PROPERTY_DAMAGE,PREVIOUS_CLAIMS,PAYMENT_METHOD,IS_REAR_END_COLLISION,PREM_AMOUNT,AGE,GENDER,MARITAL_STATUS,INCOME_ESTIMATE,INCOME_CATEGORY,POLICY_HOLDER,IS_FRAUD
0,CL_0000765,8,0,1957,0,CC,No,Safedriving_discount,52,Male,Single,15906.0,15,Y,No
1,CL_0000832,30,2541,3843,0,CC,No,Safedriving_discount,85,Female,Single,91100.6,75,N,No
2,CL_0002015,4,0,25719,0,CC,No,Standard,45,Male,Married,67085.4,50,N,No
3,CL_0002854,0,0,83,1,Auto,No,Standard,75,Male,Married,49561.0,35,N,No
4,CL_0002869,22,0,1264,0,CC,Yes,Standard,48,Female,Married,39598.7,35,N,No
5,CL_0003400,3,9903,7333,0,Auto,No,Safedriving_discount,41,Male,Single,48271.8,35,N,Yes
6,CL_0005084,14,0,1882,0,CC,No,Safedriving_discount,26,Female,Single,76074.1,75,Y,No
7,CL_0005346,12,15399,8864,0,Auto,No,Standard,73,Male,Married,61438.2,50,N,No


### Extend AutoClassifier by overloading the method for roles

In [3]:
from hana_ml.algorithms.apl.classification import AutoClassifier
from hana_ml.algorithms.apl.apl_base import APLArtifactTable

class AutoClassifier_ExplicitCompositeRoles(AutoClassifier):
    def _create_var_roles_table(self, 
                                data,
                                key,
                                label,
                                features):
        if getattr(self, 'composite_roles', None):
            return APLArtifactTable(
                name='#VARIABLE_ROLES_{}'.format(self.id),
                type_name=APLArtifactTable.VARIABLE_ROLES_WITH_COMPOSITES_OID,
                apl_version=self._apl_version,
                data=self.composite_roles)
        return super(AutoClassifier, self)._create_var_roles_table(data, key, label, features)

### Build the model

In [4]:
# Create Model  
model = AutoClassifier_ExplicitCompositeRoles(conn_context=conn, variable_auto_selection = True)
setattr(model, 'composite_roles',
        (
            ('GENDER_AGE', 'input', 'cross1', 'GENDER', None),
            ('GENDER_AGE', 'input', 'cross2', 'AGE', None),
            ('CLAIM_ID', 'skip',  None, None, None),
            ('IS_FRAUD', 'target', None, None, None)
        ))
# Train the model
model.set_params(cutting_strategy = 'random with no test')
model.fit(training_data, label='IS_FRAUD', key='CLAIM_ID')
# Debrief the trained model 
import pandas as pd
print('\r\n\x1b[1m'+ 'MODEL PERFORMANCE' + '\x1b[0m')
d = model.get_performance_metrics()
df = pd.DataFrame(list(d.items()), columns=["Metric", "Value"])
df.loc[df.Metric == 'AUC'].style.hide_index()


[1mMODEL PERFORMANCE[0m


Metric,Value
AUC,0.8335


In [5]:
print('\r\n\x1b[1m'+ 'VARIABLES IMPORTANCE' + '\x1b[0m')
d = model.get_feature_importances() 
df = pd.DataFrame(list(d.items()), columns=["Variable", "Contribution"])
df['Contribution'] = df['Contribution'].astype(float)
df['Cumulative'] = df['Contribution'].cumsum()
df['Contribution'] = df['Contribution'].round(4)*100
df['Cumulative'] = df['Cumulative'].round(4)*100
non_zero = df['Contribution'] != 0
dfs = df[non_zero].sort_values(by=['Contribution'], ascending=False)
dfs.style.hide_index()


[1mVARIABLES IMPORTANCE[0m


Variable,Contribution,Cumulative
BODILY_INJURY_AMOUNT,44.89,44.89
GENDER_AGE,13.85,58.75
DAYS_TO_REPORT,8.92,67.66
GENDER,8.61,76.27
INCOME_CATEGORY,8.57,84.84
PAYMENT_METHOD,5.83,90.67
AGE,4.76,95.43
INCOME_ESTIMATE,4.57,100.0
