# Python HANA ML APL

<div class="alert alert-block alert-info">
<b>Reports on ozone rate time series model.</b> <br>
</div>

### Create an HANA Dataframe for the actual series

In [1]:
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
sql_cmd = 'SELECT * FROM "APL_SAMPLES"."OZONE_RATE_LA" ORDER BY "Date"'
series_in = hd.DataFrame(conn, sql_cmd)

In [2]:
series_in.head(5).collect()

Unnamed: 0,Date,OzoneRateLA
0,1955-01-28,2.63
1,1955-02-28,1.94
2,1955-03-28,3.38
3,1955-04-28,4.92
4,1955-05-28,6.29


### Fit with APL

#### Modeling

In [3]:
from hana_ml.algorithms.apl.time_series import AutoTimeSeries
apl_model = AutoTimeSeries(time_column_name= 'Date', target= 'OzoneRateLA', horizon= 12)
apl_model.set_params(
    other_params = {'force_positive_forecast': True},
    other_train_apl_aliases = {'APL/DecomposeInfluencers':'true'})
apl_model.fit(data = series_in)

 #### Reports

In [4]:
df = apl_model.get_debrief_report('TimeSeries_ModelOverview').collect()
df = df[['Target Variable','First Date','Last Date','Horizon','Granularity']]
df.style.hide_index()

Target Variable,First Date,Last Date,Horizon,Granularity
OzoneRateLA,1955-01-28,1971-12-28,12,monthAmplitude


In [5]:
my_filter = "\"Partition\"='Validation'"
df = apl_model.get_debrief_report('TimeSeries_Performance').filter(my_filter).collect()
df = df[['Partition','MAE','MAPE','RMSE']]
format_dict = {'MAE':'{:,.3f}','MAPE':'{:,.1f}%','RMSE':'{:,.3f}'}
df.style.format(format_dict).hide_index()

Partition,MAE,MAPE,RMSE
Validation,0.431,15.6%,0.536


In [6]:
df = apl_model.get_debrief_report('TimeSeries_Components').collect()
df = df[['Component Type','Component Value']]
df.style.hide_index()

Component Type,Component Value
Trend,Linear( Date )
Cycles,
Fluctuations,AR(37)


In [7]:
df = apl_model.get_debrief_report('TimeSeries_Decomposition').collect()
df = df[['Row','Type','Item','Relative Impact']]
df = df.sort_values(by=['Row'])
format_dict = {'Relative Impact':'{:,.3f}'}
df.style.format(format_dict).hide_index()

Row,Type,Item,Relative Impact
1,Trend,Linear,0.864
2,Cycles,,0.0
3,Influencers,,0.0
4,Fluctuations,,0.084
5,Residuals,,0.052


In [8]:
df = apl_model.get_debrief_report('TimeSeries_Outliers').collect()
df = df[['Partition','Date','Signal','Forecast']]
df.style.hide_index()

Partition,Date,Signal,Forecast
Estimation,1959-06-28,7.54,4.965
Estimation,1965-10-28,5.88,3.535


### Predict with APL

In [9]:
series_out = apl_model.predict(data = series_in)
df_out = series_out.collect()

In [10]:
dict = {'ACTUAL': 'Actual', 
        'PREDICTED': 'Forecast', 
        'LOWER_INT_95PCT': 'Lower Limit', 
        'UPPER_INT_95PCT': 'Upper Limit' }
df_out.rename(columns=dict, inplace=True)
df_out.tail(12).style.hide_index()

Date,Actual,Forecast,Lower Limit,Upper Limit
1972-01-28,,1.185043,0.164166,2.205919
1972-02-28,,1.835063,0.80753,2.862597
1972-03-28,,2.321015,1.282088,3.359942
1972-04-28,,2.539699,1.495163,3.584235
1972-05-28,,3.055969,1.997319,4.114618
1972-06-28,,3.277982,2.218938,4.337026
1972-07-28,,3.381808,2.323345,4.44027
1972-08-28,,3.441817,2.381831,4.501803
1972-09-28,,3.046134,1.982417,4.109851
1972-10-28,,2.32465,1.263911,3.385389
