# Python HANA ML APL

<div class="alert alert-block alert-info">
<b>Computing Statistics of a Reference dataset.</b> <br>
</div>

In [1]:
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')   
conn.connection.isconnected()

True

### Create the HANA Dataframe

In [2]:
## Reference Dataset
sql_cmd =  """ 
select "age", "occupation", "workclass", "education", "relationship" 
from APL_SAMPLES.CENSUS 
where "sex" = 'Male' 
order by "id" 
"""
hdf_ref= hd.DataFrame(conn, sql_cmd)
hdf_ref.head(3).collect()

Unnamed: 0,age,occupation,workclass,education,relationship
0,39,Adm-clerical,State-gov,Bachelors,Not-in-family
1,50,Exec-managerial,Self-emp-not-inc,Bachelors,Husband
2,38,Handlers-cleaners,Private,HS-grad,Not-in-family


### Fit with APL DriftDetector

In [3]:
from hana_ml.algorithms.apl.drift_detector import DriftDetector
apl_model = DriftDetector()
results = apl_model.fit(hdf_ref)

In [4]:
df = apl_model.get_debrief_report('Statistics_CategoryFrequencies').deselect('Oid').collect()
format_dict = { 'Weight': '{:,.0f}', '% Weight': '{:.1f}' }
df.style.format(format_dict).hide(axis='index')

Variable,Partition,Category,% Weight,Weight,Category Order
age,Estimation,[17 ; 20[,3.9,1274,0
age,Estimation,[20 ; 23[,5.6,1829,1
age,Estimation,[23 ; 25],6.9,2239,2
age,Estimation,[26 ; 27],4.7,1534,3
age,Estimation,]27 ; 30[,5.0,1637,4
age,Estimation,[30 ; 31],5.4,1771,5
age,Estimation,[32 ; 33],5.6,1836,6
age,Estimation,[34 ; 35],5.7,1872,7
age,Estimation,[36 ; 38],8.4,2753,8
age,Estimation,[39 ; 40],5.0,1647,9


### Save the Drift Model

In [5]:
from hana_ml.model_storage import ModelStorage
model_storage = ModelStorage(connection_context=conn, schema='USER_APL')
apl_model.name = 'Drift Model'
model_storage.save_model(model=apl_model, if_exists='replace')

In [6]:
#model_storage.list_models()