# Python HANA ML APL

<div class="alert alert-block alert-info">
<b>Comparing to a Reference dataset which statistics are already there</b> <br>
</div>

In [1]:
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')    
conn.connection.isconnected()

True

### Create an HANA Dataframe

In [2]:
## Dataset for comparison
sql_cmd =  """ 
select "age", "occupation", "workclass", "education", "relationship" 
from APL_SAMPLES.CENSUS 
where "sex" = 'Female' 
order by "id" 
"""
hdf_new= hd.DataFrame(conn, sql_cmd)
hdf_new.head(3).collect()

Unnamed: 0,age,occupation,workclass,education,relationship
0,28,Prof-specialty,Private,Bachelors,Wife
1,37,Exec-managerial,Private,Masters,Wife
2,49,Other-service,Private,9th,Not-in-family


### Load Drift Model

In [3]:
from hana_ml.model_storage import ModelStorage
model_storage = ModelStorage(connection_context=conn, schema='USER_APL')
apl_model = model_storage.load_model(name='Drift Model')

### Detect Drift with APL

In [4]:
results = apl_model.detect(hdf_new, threshold=0.5, build_report=True)
print(results.collect())

       Variable  Deviation Indicator
0    occupation                1.000
1  relationship                1.000
2     workclass                1.000
3           age                0.986
4     education                0.764


In [5]:
df = apl_model.get_debrief_report('Deviation_CategoryFrequencies').deselect('Oid').collect()
format_dict = {
    'Ref Weight': '{:,.0f}', 'New Weight': '{:,.0f}', 'Change': '{:,.0f}', 
    'Ref % Weight': '{:.1f}', 'New % Weight': '{:.1f}', '% Change': '{:.1f}', 'Abs % Change': '{:.1f}'
}
df.style.format(format_dict).hide(axis='index')

Variable,Category Order,Category,Ref Weight,New Weight,Change,Ref % Weight,New % Weight,% Change,Abs % Change
relationship,0,Husband,19715,1,-19714,60.4,0.0,-60.4,60.4
relationship,4,Unmarried,1197,3928,2731,3.7,24.3,20.6,20.6
occupation,1,Adm-clerical,1842,3769,1927,5.6,23.3,17.6,17.6
occupation,3,Craft-repair,5789,323,-5466,17.7,2.0,-15.7,15.7
relationship,1,Not-in-family,6713,5870,-843,20.6,36.3,15.7,15.7
relationship,5,Wife,3,2328,2325,0.0,14.4,14.4,14.4
occupation,8,Other-service,2225,2698,473,6.8,16.7,9.8,9.8
relationship,3,Own-child,4205,3376,-829,12.9,20.9,8.0,8.0
occupation,14,Transport-moving,2228,127,-2101,6.8,0.8,-6.0,6.0
workclass,6,Self-emp-not-inc,3233,629,-2604,9.9,3.9,-6.0,6.0


In [6]:
apl_model.generate_html_report('drift_report')