# Python HANA ML APL

<div class="alert alert-block alert-info">
<b>Predicting a continuous target (regression case).</b> <br>
</div>

## Train

### Create an HANA Dataframe for the training data

In [None]:
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
sql_cmd = 'SELECT * FROM "APL_SAMPLES"."CENSUS" order by "id"'
hdf_train = hd.DataFrame(conn, sql_cmd)

In [None]:
hdf_train.head(6).collect()

### Fit with APL Gradient Boosting

In [None]:
from hana_ml.algorithms.apl.gradient_boosting_regression import GradientBoostingRegressor
apl_model = GradientBoostingRegressor(eval_metric='MAE', variable_auto_selection = True)
apl_model.fit(hdf_train, label='age', key='id')

##### Model Reports

In [None]:
df = apl_model.get_debrief_report('ClassificationRegression_VariablesContribution').collect()
df = df.sort_values(by=['Rank'])
df.drop({'Oid','Method','Rank'}, axis=1, inplace=True)
df.drop(df[df.Contribution <=0].index, inplace=True)
format_dict = {'Contribution':'{:,.2%}','Cumulative':'{:,.2%}'}
df.style.format(format_dict).hide(axis='index')

In [None]:
my_filter = "\"Partition\" = 'Validation' and \"Indicator\" in ('MAPE','RMSE')"
df = apl_model.get_debrief_report('ClassificationRegression_Performance').filter(my_filter).collect()
df.drop('Oid', axis=1, inplace=True)
format_dict = {'Value':'{:,.3f}'}
df.style.format(format_dict).hide(axis='index')

## Make Predictions

In [None]:
sql_cmd = 'SELECT * FROM "APL_SAMPLES"."CENSUS" LIMIT 100'
hdf_apply = hd.DataFrame(conn, sql_cmd)
df = apl_model.predict(hdf_apply).collect()
df.columns = ['id', 'Actual','Prediction']
df.head(8)