# Python HANA ML API

<div class="alert alert-block alert-info">
<b>Train a model and show the confusion matrix.</b> <br>
</div>

## Train the model

### Create an HANA Dataframe for the training data

In [1]:
import pandas as pd 
# Connect using the HANA secure user store
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
# Get Training Data
sql_cmd = 'SELECT * FROM "APL_SAMPLES"."AUTO_CLAIMS_FRAUD" ORDER BY CLAIM_ID'
training_data = hd.DataFrame(conn, sql_cmd)

### Put a subset of the data in a Pandas Dataframe and display it

In [2]:
training_data.head(5).collect()

Unnamed: 0,CLAIM_ID,DAYS_TO_REPORT,BODILY_INJURY_AMOUNT,PROPERTY_DAMAGE,PREVIOUS_CLAIMS,PAYMENT_METHOD,IS_REAR_END_COLLISION,PREM_AMOUNT,AGE,GENDER,MARITAL_STATUS,INCOME_ESTIMATE,INCOME_CATEGORY,POLICY_HOLDER,IS_FRAUD
0,CL_0000765,8,0,1957,0,CC,No,Safedriving_discount,52,Male,Single,15906.0,15,Y,No
1,CL_0000832,30,2541,3843,0,CC,No,Safedriving_discount,85,Female,Single,91100.6,75,N,No
2,CL_0002015,4,0,25719,0,CC,No,Standard,45,Male,Married,67085.4,50,N,No
3,CL_0002854,0,0,83,1,Auto,No,Standard,75,Male,Married,49561.0,35,N,No
4,CL_0002869,22,0,1264,0,CC,Yes,Standard,48,Female,Married,39598.7,35,N,No


### Build a Classification model with APL Ridge Regression

In [3]:
# Create the model
from hana_ml.algorithms.apl.classification import AutoClassifier
model = AutoClassifier(conn_context=conn)
# Train the model
model.fit(training_data, label='IS_FRAUD', key='CLAIM_ID')

## Confusion Matrix

### Define Functions

In [4]:
indicators_table_name = None
for table_name in model._artifact_tables:
    if table_name.startswith('#INDICATORS_'):
        indicators_table_name = table_name

In [5]:
def create_artifact_table(conn, table_name, table_spec):
    conn = model.conn_context.connection
    cursor = conn.cursor()
    try:
        cursor.execute(f'drop table {table_name}')
    except:
        pass
    cursor.execute(f'create local temporary table {table_name} {table_spec}')

In [6]:
def get_confusion_matrix():
    conn = model.conn_context.connection
    cursor = conn.cursor()

    model_table_name = model.model_table_.name  # the temp table where the model is saved

    # --- Create temp tables for input / output
    create_artifact_table(conn=conn,
                         table_name='#FUNC_HEADER',
                         table_spec='(KEY NVARCHAR(50), VALUE NVARCHAR(255))')

    create_artifact_table(conn=conn,
                         table_name='#OPERATION_CONFIG',
                         table_spec='(KEY NVARCHAR(1000), VALUE NCLOB, CONTEXT NVARCHAR(100))')

    create_artifact_table(conn=conn,
                         table_name='#SUMMARY',
                         table_spec='(OID NVARCHAR(50), KEY NVARCHAR(100), VALUE NVARCHAR(100))')

    create_artifact_table(conn=conn,
                         table_name='#CONF_MATRIX',
                         table_spec='(OID VARCHAR(50),VARIABLE VARCHAR(255),TARGET VARCHAR(255),KEY VARCHAR(100),'
                          'VALUE NCLOB,DETAIL NCLOB)')

    # Call APL
    sql = 'call "SAP_PA_APL"."sap.pa.apl.base::COMPUTE_CONFUSION_MATRIX"(#FUNC_HEADER, #OPERATION_CONFIG,  {indicators_input}, #CONF_MATRIX) with overview'
    sql = sql.format(indicators_input=indicators_table_name)
    cursor.execute(sql)

### Calling COMPUTE_CONFUSION_MATRIX

In [7]:
get_confusion_matrix()

### Put indicators data in a Pandas Dataframe

In [8]:
sql_cmd = 'SELECT * FROM #CONF_MATRIX'
hf = hd.DataFrame(conn, sql_cmd)
indicators_df = hf.collect()
indicators_df = indicators_df[['KEY','VALUE']]

### Matrix In Absolute Values

In [9]:
df = indicators_df[indicators_df.KEY.isin(['True_Positive', 'True_Negative', 'False_Negative', 'False_Positive'])].copy()
df['VALUE'] = df['VALUE'].astype(int)
df['KEY'] = df['KEY'].str.replace('True_Positive', 'Yes Yes')
df['KEY'] = df['KEY'].str.replace('True_Negative', 'No No')
df['KEY'] = df['KEY'].str.replace('False_Negative', 'Yes No')
df['KEY'] = df['KEY'].str.replace('False_Positive', 'No Yes')
df[['Actual', 'Predicted']] = df.KEY.str.split(expand=True)
df.drop('KEY', axis=1, inplace=True)
df.columns = ['Nb of Cases', 'Actual', 'Predicted']
pd.pivot_table(df,index=["Actual"], values=["Nb of Cases"], columns=["Predicted"])

Unnamed: 0_level_0,Nb of Cases,Nb of Cases
Predicted,No,Yes
Actual,Unnamed: 1_level_2,Unnamed: 2_level_2
No,1240,39
Yes,125,44


### Matrix In Percentage

In [10]:
df = indicators_df[indicators_df.KEY.isin(['Percent_True_Positive', 'Percent_True_Negative', 'Percent_False_Negative', 'Percent_False_Positive'])].copy()
df['VALUE'] = df['VALUE'].astype(float).round(2)
df['KEY'] = df['KEY'].str.replace('Percent_True_Positive', 'Yes Yes')
df['KEY'] = df['KEY'].str.replace('Percent_True_Negative', 'No No')
df['KEY'] = df['KEY'].str.replace('Percent_False_Negative', 'Yes No')
df['KEY'] = df['KEY'].str.replace('Percent_False_Positive', 'No Yes')
df[['Actual', 'Predicted']] = df.KEY.str.split(expand=True)
df.drop('KEY', axis=1, inplace=True)
df.columns = ['% Cases', 'Actual', 'Predicted']
pd.pivot_table(df,index=["Actual"], values=["% Cases"], columns=["Predicted"])

Unnamed: 0_level_0,% Cases,% Cases
Predicted,No,Yes
Actual,Unnamed: 1_level_2,Unnamed: 2_level_2
No,85.64,2.69
Yes,8.63,3.04


### Confusion Matrix Indicators

In [11]:
df = indicators_df[indicators_df.KEY.isin(['Accuracy', 'Sensitivity', 'Specificity', 'Precision', 'F1_Score'])].copy()
df['VALUE'] = df['VALUE'].astype(float).round(4)
df.columns = ['Indicator', 'Value']
df.style.hide_index()

Indicator,Value
Accuracy,0.8867
Sensitivity,0.2604
Specificity,0.9695
Precision,0.5301
F1_Score,0.3492
