# Python HANA ML APL

<div class="alert alert-block alert-info">
<b>Predicting a nominal target with more than  two classes.</b> <br>
</div>

## Train

### Create an HANA Dataframe for the training data

In [1]:
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
sql_cmd =  """ 
select * from apl_samples.census 
where "marital-status" not in (
  select "marital-status" from apl_samples.census 
  group by "marital-status" having count(*) < 1500 )
order by "id"
"""
hfd_train = hd.DataFrame(conn, sql_cmd)
hfd_train.head(5).collect()

Unnamed: 0,id,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
0,1,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,0
1,2,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,0
2,3,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,0
3,4,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,0
4,5,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,0


In [2]:
hfd_train.shape[0]

48177

In [3]:
col_key = 'id'
col_target = 'marital-status'
col_predictors = hfd_train.columns
col_predictors.remove(col_key)
col_predictors.remove(col_target)
col_predictors.remove('education-num')
len(col_predictors)

13

### Fit with APL Gradient Boosting

In [4]:
from hana_ml.algorithms.apl.gradient_boosting_classification import GradientBoostingClassifier
apl_model = GradientBoostingClassifier()
apl_model.set_params(variable_auto_selection = True, 
                     variable_selection_max_nb_of_final_variables = '6')
apl_model.fit(hfd_train, label=col_target, key=col_key, features=col_predictors)

##### Model Reports

In [5]:
my_filter = "\"Partition\" = 'Estimation'"
df = apl_model.get_debrief_report('MultiClassTarget_Statistics').filter(my_filter).collect()
df.drop('Oid', axis=1, inplace=True)
df.drop('Target Key', axis=1, inplace=True)
format_dict = {'% Weight':'{:,.2f}%', 'Weight':'{:,.0f}'}
df.style.format(format_dict).hide(axis='index')

Target,Partition,Category,% Weight,Weight
marital-status,Estimation,Divorced,13.71%,4917
marital-status,Estimation,Married-civ-spouse,46.25%,16592
marital-status,Estimation,Never-married,33.70%,12088
marital-status,Estimation,Separated,3.17%,1136
marital-status,Estimation,Widowed,3.17%,1139


In [6]:
from hana_ml.visualizers.unified_report import UnifiedReport
UnifiedReport(apl_model).build().display()

[31mIn order to review the unified classification model report better, you need to adjust the size of the left area or hide the left area temporarily!


In [7]:
df = apl_model.get_debrief_report('ClassificationRegression_VariablesExclusion').collect()
df = df[['Variable', 'Reason For Exclusion']]
df.style.hide(axis='index')

Variable,Reason For Exclusion
capital-gain,Low contributory
capital-loss,Low contributory
hours-per-week,Low contributory
native-country,Low contributory
occupation,Low contributory
race,Low contributory
workclass,Low contributory


## Make Predictions

#####  New dataframe to try the model

In [8]:
sql_cmd = 'select * from apl_samples.census where "id" between 550 and 554 order by "id"'
hfd_apply = hd.DataFrame(conn, sql_cmd)
hfd_apply.collect().style.hide(axis='index')

id,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
550,28,Private,184831,Some-college,10,Never-married,Craft-repair,Unmarried,White,Male,0,0,40,United-States,0
551,46,Self-emp-not-inc,245724,Some-college,10,Divorced,Exec-managerial,Not-in-family,White,Male,0,0,50,United-States,0
552,36,Self-emp-not-inc,27053,HS-grad,9,Separated,Other-service,Unmarried,White,Female,0,0,40,United-States,0
553,72,Private,205343,11th,7,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,0
554,35,Private,229328,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Wife,Black,Female,0,0,40,United-States,0


#####  Predict and give the top 3 reasons

In [9]:
apl_model.set_params( extra_applyout_settings=
{ 'APL/ApplyExtraMode': 'Advanced Apply Settings', 
  'APL/ApplyPredictedValue': 'false', 
  'APL/ApplyProbability': 'false', 
  'APL/ApplyDecision': 'true', 
  'APL/ApplyReasonCode/TopCount': '3', 
  'APL/ApplyReasonCode/ShowStrengthValue': 'false', 
  'APL/ApplyReasonCode/ShowStrengthIndicator': 'false' }
)
df = apl_model.predict(hfd_apply).collect()
df.columns = ['Id', 'Actual', 'Prediction', 'Reason 1 Name', 'Reason 1 Value', 'Reason 2 Name', 'Reason 2 Value', 'Reason 3 Name', 'Reason 3 Value']
df.style.hide(axis='index')

Id,Actual,Prediction,Reason 1 Name,Reason 1 Value,Reason 2 Name,Reason 2 Value,Reason 3 Name,Reason 3 Value
550,Never-married,Never-married,age,28,sex,Male,relationship,Unmarried
551,Divorced,Divorced,relationship,Not-in-family,age,46,education,Some-college
552,Separated,Divorced,relationship,Unmarried,age,36,fnlwgt,27053
553,Widowed,Widowed,age,72,relationship,Unmarried,sex,Female
554,Married-civ-spouse,Married-civ-spouse,relationship,Wife,fnlwgt,229328,class,0


#####  Score for each class

In [10]:
apl_model.set_params( extra_applyout_settings=
{ 'APL/ApplyExtraMode': 'Advanced Apply Settings', 
  'APL/ApplyPredictedValue': 'true', 
  'APL/ApplyProbability': 'false', 
  'APL/ApplyDecision': 'true', 
 }
)
df = apl_model.predict(hfd_apply).collect()
df.rename(columns={'TRUE_LABEL': 'Actual','PREDICTED': 'Prediction'}, inplace=True)
df.columns = [hdr.replace("gb_score_marital-status_", "") for hdr in df]
df.style.hide(axis='index')

id,Actual,Prediction,Divorced,Married-civ-spouse,Never-married,Separated,Widowed
550,Never-married,Never-married,0.805219,-4.295372,1.975135,0.056104,-2.944252
551,Divorced,Divorced,1.736278,-4.245555,0.925411,-0.212301,-2.025436
552,Separated,Divorced,1.566857,-5.082361,-0.021337,0.279672,-0.934751
553,Widowed,Widowed,1.176975,-4.545315,-0.353816,-0.340472,3.055872
554,Married-civ-spouse,Married-civ-spouse,-1.797927,6.86103,-1.204907,-1.531235,-1.976038


##### Export the model equation for scoring in stand-alone JavaScript

In [None]:
apl_scoring_equation = apl_model.export_apply_code(code_type='JSON')
text_file = open("apl_model.json", "w")
text_file.write(apl_scoring_equation)
text_file.close()