# Python HANA ML API

<div class="alert alert-block alert-info">
<b>Extracting information by partition from a trained model.</b> <br>
</div>

## Train the model

### Create an HANA Dataframe for the training data

In [1]:
# Connect using the HANA secure user store
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
# Get Training Data
sql_cmd = 'SELECT * FROM "APL_SAMPLES"."AUTO_CLAIMS_FRAUD" ORDER BY CLAIM_ID'
training_data = hd.DataFrame(conn, sql_cmd)

### Put a subset of the data in a Pandas Dataframe and display it

In [2]:
training_data.head(10).collect()

Unnamed: 0,CLAIM_ID,DAYS_TO_REPORT,BODILY_INJURY_AMOUNT,PROPERTY_DAMAGE,PREVIOUS_CLAIMS,PAYMENT_METHOD,IS_REAR_END_COLLISION,PREM_AMOUNT,AGE,GENDER,MARITAL_STATUS,INCOME_ESTIMATE,INCOME_CATEGORY,POLICY_HOLDER,IS_FRAUD
0,CL_0000765,8,0,1957,0,CC,No,Safedriving_discount,52,Male,Single,15906.0,15,Y,No
1,CL_0000832,30,2541,3843,0,CC,No,Safedriving_discount,85,Female,Single,91100.6,75,N,No
2,CL_0002015,4,0,25719,0,CC,No,Standard,45,Male,Married,67085.4,50,N,No
3,CL_0002854,0,0,83,1,Auto,No,Standard,75,Male,Married,49561.0,35,N,No
4,CL_0002869,22,0,1264,0,CC,Yes,Standard,48,Female,Married,39598.7,35,N,No
5,CL_0003400,3,9903,7333,0,Auto,No,Safedriving_discount,41,Male,Single,48271.8,35,N,Yes
6,CL_0005084,14,0,1882,0,CC,No,Safedriving_discount,26,Female,Single,76074.1,75,Y,No
7,CL_0005346,12,15399,8864,0,Auto,No,Standard,73,Male,Married,61438.2,50,N,No
8,CL_0005677,23,2577,8883,0,CC,Yes,Safedriving_discount,62,Male,Married,127.237,14,N,No
9,CL_0005897,15,0,6390,0,CC,Yes,Standard,56,Male,Married,58641.2,50,N,No


### Build a Classification model with APL Ridge Regression

In [3]:
# Create the model
from hana_ml.algorithms.apl.classification import AutoClassifier
model = AutoClassifier(conn_context=conn)
# Train the model
model.set_params(cutting_strategy = 'random')
model.fit(training_data, label='IS_FRAUD', key='CLAIM_ID')

## Debrief the trained model

### Define a function to call APL_GET_MODEL_INFO

In [4]:
def create_artifact_table(conn, table_name, table_spec):
    conn = model.conn_context.connection
    cursor = conn.cursor()
    try:
        cursor.execute(f'drop table {table_name}')
    except:
        pass
    cursor.execute(f'create local temporary table {table_name} {table_spec}')

In [5]:
def get_model_info(model):
    conn = model.conn_context.connection
    cursor = conn.cursor()

    model_table_name = model.model_table_.name  # the temp table where the model is saved

    # --- Create temp tables for input / output
    create_artifact_table(conn=conn,
                         table_name='#FUNC_HEADER',
                         table_spec='(KEY NVARCHAR(50), VALUE NVARCHAR(255))')

    create_artifact_table(conn=conn,
                         table_name='#OPERATION_CONFIG',
                         table_spec='(KEY NVARCHAR(1000), VALUE NCLOB, CONTEXT NVARCHAR(100))')

    create_artifact_table(conn=conn,
                         table_name='#SUMMARY',
                         table_spec='(OID NVARCHAR(50), KEY NVARCHAR(100), VALUE NVARCHAR(100))')

    create_artifact_table(conn=conn,
                         table_name='#VARIABLE_ROLES_COMP',
                         table_spec='(NAME NVARCHAR(255), ROLE NVARCHAR(10), COMPOSITION_TYPE VARCHAR(10), COMPONENT_NAME VARCHAR(255))')

    create_artifact_table(conn=conn,
                         table_name='#VARIABLE_DESC',
                         table_spec='(RANK INTEGER,NAME VARCHAR(255),STORAGE VARCHAR(10),VALUETYPE VARCHAR(10),KEYLEVEL INTEGER,ORDERLEVEL INTEGER,MISSINGSTRING VARCHAR(255),GROUPNAME VARCHAR(255),DESCRIPTION VARCHAR(255), OID NVARCHAR(50))')

    create_artifact_table(conn=conn,
                         table_name='#INDICATORS_DATASET',
                         table_spec='(OID VARCHAR(50),VARIABLE VARCHAR(255),TARGET VARCHAR(255),KEY VARCHAR(100),'
                          'VALUE NCLOB,DETAIL NCLOB,DATASET VARCHAR(255))')

    create_artifact_table(conn=conn,
                         table_name='#PROFITCURVES',
                         table_spec='(OID VARCHAR(50), TYPE VARCHAR(100), VARIABLE VARCHAR(255), TARGET VARCHAR(255), '
                          '"Label" VARCHAR(255), "Frequency" VARCHAR(100), "Random" VARCHAR(100), "Wizard" VARCHAR(100), '
                          '"Estimation" VARCHAR(100), "Validation" VARCHAR(100), "Test" VARCHAR(100), "ApplyIn" VARCHAR(100))')

    create_artifact_table(conn=conn,
                         table_name='#OUTPUT_TABLE_TYPE',
                         table_spec='(OID VARCHAR(50), POSITION INTEGER, NAME VARCHAR(255), KIND VARCHAR(50), ' 
                          'PRECISION INTEGER, SCALE INTEGER, MAXIMUM_LENGTH INTEGER)')

    # Call APL
    sql = 'call "_SYS_AFL".APL_GET_MODEL_INFO(#FUNC_HEADER, {model_table_name}, #OPERATION_CONFIG, #SUMMARY, #VARIABLE_ROLES_COMP, #VARIABLE_DESC, #INDICATORS_DATASET, #PROFITCURVES) with overview'
    sql = sql.format(model_table_name=model_table_name)
    # print(sql)
    cursor.execute(sql)

### Calling APL_GET_MODEL_INFO

In [6]:
get_model_info(model)

### Put indicators data in a Pandas Dataframe

In [7]:
sql_cmd = 'SELECT * FROM #INDICATORS_DATASET'
indicators_data = hd.DataFrame(conn, sql_cmd)

### Show accuracy by partition

In [8]:
df = indicators_data.filter("KEY = 'AUC' and VARIABLE = 'rr_IS_FRAUD'").collect()
df = df[['DATASET','TARGET','KEY','VALUE']]
df['VALUE'] = df['VALUE'].astype(float)
df.columns = ['Partition','Target', 'Metric','Value']
df.style.format({'Value':'{0:,.3f}'}).hide_index() 

Partition,Target,Metric,Value
Test,IS_FRAUD,AUC,0.811
Validation,IS_FRAUD,AUC,0.783
Estimation,IS_FRAUD,AUC,0.81
