# Python HANA ML APL

<div class="alert alert-block alert-info">
<b>Predicting Car Insurance Fraud.</b> <br>
</div>

### Create an HANA Dataframe containing historical Insurance Claims

In [1]:
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
hdf_new = conn.table('AUTO_CLAIMS_NEW', schema='APL_SAMPLES')

In [2]:
hdf_new.head(3).collect()

Unnamed: 0,CLAIM_ID,DAYS_TO_REPORT,BODILY_INJURY_AMOUNT,PROPERTY_DAMAGE,PREVIOUS_CLAIMS,PAYMENT_METHOD,IS_REAR_END_COLLISION,PREM_AMOUNT,AGE,GENDER,MARITAL_STATUS,INCOME_ESTIMATE,INCOME_CATEGORY,POLICY_HOLDER
0,CL_0959524,6,0,1066,0,CH,Yes,Standard,97,Male,Single,62903.3,50,N
1,CL_0959946,0,0,432,0,CH,Yes,Standard,90,Female,Single,42504.3,35,N
2,CL_0960121,5,0,2714,0,CC,No,Standard,23,Male,Married,22767.0,15,Y


### Load Trained Model

In [3]:
from hana_ml.model_storage import ModelStorage
model_storage = ModelStorage(connection_context=conn, schema='USER_APL')
apl_model = model_storage.load_model(name='My Fraud Model')
apl_model.get_model_info()

[<hana_ml.dataframe.DataFrame at 0x2d1ea043f40>,
 <hana_ml.dataframe.DataFrame at 0x2d1ea043df0>,
 <hana_ml.dataframe.DataFrame at 0x2d1ea043d60>,
 <hana_ml.dataframe.DataFrame at 0x2d1ea043f10>,
 <hana_ml.dataframe.DataFrame at 0x2d1ea043f70>]

In [4]:
df = apl_model.get_summary().filter("KEY in ('ModelVariableCount', 'ModelSelectedVariableCount', 'ModelRecordCount', 'ModelBuildDate')").collect()
df['KEY'] = df['KEY'].str.replace('Model', '').str.replace('Selected', 'Selected ')
df['KEY'] = df['KEY'].str.replace('Count', ' Count').str.replace('Date', ' Date')
df = df[['KEY','VALUE']]
df.columns = ['Property', 'Value']
df.style.hide_index()

Property,Value
Variable Count,15
Selected Variable Count,13
Record Count,2000
Build Date,2021-12-28 15:41:38


### Make Predictions on New Claims

In [5]:
apl_model.set_params(extra_applyout_settings=
{ 'APL/ApplyExtraMode': 'Advanced Apply Settings', 
  'APL/ApplyPredictedValue': 'true', 
  'APL/ApplyProbability': 'true', 
  'APL/ApplyDecision': 'true', 
  'APL/ApplyReasonCode/TopCount': '3', 
  'APL/ApplyReasonCode/ShowStrengthValue': 'false', 
  'APL/ApplyReasonCode/ShowStrengthIndicator': 'false' }
)
df = apl_model.predict(hdf_new).collect()

In [6]:
col_dict = {'PREDICTED': 'Target Predicted', 
        'gb_score_IS_FRAUD': 'Score', 
        'gb_proba_IS_FRAUD': 'Probability'}
df.rename(columns=col_dict, inplace=True)
df.columns = [hdr.replace("gb_", "") for hdr in df.columns]
format_dict = {'Probability': '{:,.1%}','Score':'{0:,.2f}'}
df.head(7).style.format(format_dict).hide_index()

CLAIM_ID,Target Predicted,Score,Probability,reason_top_1_name,reason_top_1_value,reason_top_2_name,reason_top_2_value,reason_top_3_name,reason_top_3_value
CL_0959524,No,-1.53,17.8%,BODILY_INJURY_AMOUNT,0.0,DAYS_TO_REPORT,6,GENDER,Male
CL_0959946,No,-4.31,1.3%,BODILY_INJURY_AMOUNT,0.0,PROPERTY_DAMAGE,432,GENDER,Female
CL_0960121,No,-1.04,26.0%,AGE,23.0,INCOME_ESTIMATE,22767,BODILY_INJURY_AMOUNT,0
CL_0960195,No,-1.03,26.4%,INCOME_ESTIMATE,95489.4,GENDER,Male,BODILY_INJURY_AMOUNT,0
CL_0960294,Yes,-0.28,43.1%,BODILY_INJURY_AMOUNT,23385.0,GENDER,Female,DAYS_TO_REPORT,15
CL_0960379,Yes,1.12,75.4%,BODILY_INJURY_AMOUNT,14718.0,INCOME_ESTIMATE,7673.4700000000003,GENDER,Female
CL_0960411,No,-2.71,6.2%,BODILY_INJURY_AMOUNT,0.0,PROPERTY_DAMAGE,46,PAYMENT_METHOD,CH
