In [1]:
# Connection
from hana_ml import dataframe
from hana_ml.dataframe import ConnectionContext, create_dataframe_from_pandas
from data_load_utils import DataSets, Settings
url, port, user, pwd = Settings.load_config("../../config/e2edata.ini")
conn = dataframe.ConnectionContext(url, port, user, pwd)

In [2]:
# Dataset function
import numpy as np
import pandas as pd
def geometric_brownian_motion(T = 1, N = 2, mu = -0.01, sigma = 0.001, S0 = 2):        
    dt = float(T)/N
    t = np.linspace(0, T, N)
    W = np.random.standard_normal(size = N) 
    W = np.cumsum(W)*np.sqrt(dt) ### standard brownian motion ###
    X = (mu-0.5*sigma**2)*t + sigma*W 
    S = S0*np.exp(X) ### geometric brownian motion ###
    return S

In [3]:
# df
dates = pd.date_range('2018-03-01', '2018-03-10',freq='H')
T = (dates.max() - dates.min()).days / 365
N = dates.size
start_price = 1000
y = pd.DataFrame()
y['INDEX'] = dates
y['Y'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
df = create_dataframe_from_pandas(conn, y, '#ARIMA_TIMESTAMP', force=True).sort("INDEX")
print(df.collect())

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s]


                  INDEX            Y
0   2018-03-01 00:00:00  1000.584817
1   2018-03-01 01:00:00   999.519375
2   2018-03-01 02:00:00   998.666737
3   2018-03-01 03:00:00  1000.857958
4   2018-03-01 04:00:00  1002.147020
..                  ...          ...
212 2018-03-09 20:00:00  1008.612219
213 2018-03-09 21:00:00  1009.683055
214 2018-03-09 22:00:00  1009.948683
215 2018-03-09 23:00:00  1010.036911
216 2018-03-10 00:00:00  1009.491548

[217 rows x 2 columns]


In [4]:
# y_arimax
dates = pd.date_range('2018-03-01', '2018-03-10', freq='H')
T = (dates.max()-dates.min()).days / 365
N = dates.size
start_price = 1000
y_arimax = pd.DataFrame()
y_arimax['INDEX'] = dates
y_arimax['Y'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
y_arimax['EX'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
df_arimax = create_dataframe_from_pandas(conn, y_arimax, '#ARIMAX_TIMESTAMP', force=True).sort("INDEX")
print(df_arimax.collect())

# y_ariamx predict
dates = pd.date_range('2018-03-10', '2018-03-11', freq='H')
T = (dates.max()-dates.min()).days / 365
N = dates.size
start_price = 1000
predict_arimax = pd.DataFrame()
predict_arimax['INDEX'] = dates
predict_arimax['EX'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
predict_arimax_df = create_dataframe_from_pandas(conn, predict_arimax, '#ARIMAX_PREDICT_TIMESTAMP', force=True).sort("INDEX")
print(predict_arimax_df.collect())

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s]


                  INDEX            Y           EX
0   2018-03-01 00:00:00   999.927148   999.720970
1   2018-03-01 01:00:00   999.270908  1000.359998
2   2018-03-01 02:00:00   998.359882  1000.812694
3   2018-03-01 03:00:00   998.413384  1002.046111
4   2018-03-01 04:00:00   997.424863  1003.696107
..                  ...          ...          ...
212 2018-03-09 20:00:00  1014.367222   986.591260
213 2018-03-09 21:00:00  1013.839979   984.729785
214 2018-03-09 22:00:00  1013.391534   985.194611
215 2018-03-09 23:00:00  1013.177074   986.294107
216 2018-03-10 00:00:00  1013.311451   985.940788

[217 rows x 3 columns]


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s]


                 INDEX           EX
0  2018-03-10 00:00:00   999.208698
1  2018-03-10 01:00:00  1000.096393
2  2018-03-10 02:00:00   999.582595
3  2018-03-10 03:00:00   999.495129
4  2018-03-10 04:00:00  1000.649157
5  2018-03-10 05:00:00  1000.074740
6  2018-03-10 06:00:00  1002.317076
7  2018-03-10 07:00:00  1001.306258
8  2018-03-10 08:00:00  1002.616478
9  2018-03-10 09:00:00  1001.260352
10 2018-03-10 10:00:00  1001.587378
11 2018-03-10 11:00:00  1003.273713
12 2018-03-10 12:00:00  1004.264993
13 2018-03-10 13:00:00  1004.766629
14 2018-03-10 14:00:00  1005.752967
15 2018-03-10 15:00:00  1005.523311
16 2018-03-10 16:00:00  1004.212798
17 2018-03-10 17:00:00  1004.104629
18 2018-03-10 18:00:00  1004.006069
19 2018-03-10 19:00:00  1005.161779
20 2018-03-10 20:00:00  1005.998193
21 2018-03-10 21:00:00  1008.371414
22 2018-03-10 22:00:00  1008.425156
23 2018-03-10 23:00:00  1009.258622
24 2018-03-11 00:00:00  1011.173455


In [5]:
# ARIMA Timestamp support test
from hana_ml.algorithms.pal.tsa.arima import ARIMA

arima = ARIMA(order=(1, 0, 0), seasonal_order=(1, 0, 0, 2), method='mle', thread_ratio=1.0)
arima.fit(df)
print(arima.fitted_.collect())
result = arima.predict(forecast_method='innovations_algorithm',forecast_length=5, allow_new_index=True)
print(result.collect())
print(result.get_table_structure())
result = arima.predict(forecast_method='innovations_algorithm',forecast_length=5, allow_new_index=False)
print(result.collect())
print(result.get_table_structure())

     INDEX(INT)       FITTED  RESIDUALS
0             1  1005.148296  -4.563478
1             2  1000.627228  -1.107854
2             3   999.729508  -1.062771
3             4   998.032769   2.825189
4             5  1000.224070   1.922950
..          ...          ...        ...
212         213  1008.172922   0.439297
213         214  1008.065041   1.618013
214         215  1008.314656   1.634026
215         216  1010.632401  -0.595490
216         217  1010.156055  -0.664508

[217 rows x 3 columns]
            TIMESTAMP     FORECAST        SE         LO80         HI80  \
0 2018-03-10 01:00:00  1009.516975  1.363804  1007.769190  1011.264760   
1 2018-03-10 02:00:00  1009.074657  1.878050  1006.667838  1011.481477   
2 2018-03-10 03:00:00  1009.058655  2.925398  1005.309605  1012.807705   
3 2018-03-10 04:00:00  1008.693399  3.614861  1004.060767  1013.326031   
4 2018-03-10 05:00:00  1008.648988  4.556635  1002.809424  1014.488553   

          LO95         HI95  
0  1006.843969  1012.

In [6]:
# ARIMA Timestamp support test
from hana_ml.algorithms.pal.tsa.arima import ARIMA

arimax = ARIMA(order=(1, 0, 0), seasonal_order=(1, 0, 0, 2), method='mle', thread_ratio=1.0)
arimax.fit(df_arimax, key='INDEX', endog= 'Y', exog='EX')
print(arima.fitted_.collect())
result = arimax.predict(predict_arimax_df, forecast_method='innovations_algorithm',forecast_length=5, allow_new_index=True)
print(result.collect())
result = arima.predict(predict_arimax_df, forecast_method='innovations_algorithm',forecast_length=5, allow_new_index=False)
print(result.collect())


     INDEX(INT)       FITTED  RESIDUALS
0             1  1005.148296  -4.563478
1             2  1000.627228  -1.107854
2             3   999.729508  -1.062771
3             4   998.032769   2.825189
4             5  1000.224070   1.922950
..          ...          ...        ...
212         213  1008.172922   0.439297
213         214  1008.065041   1.618013
214         215  1008.314656   1.634026
215         216  1010.632401  -0.595490
216         217  1010.156055  -0.664508

[217 rows x 3 columns]
                 INDEX     FORECAST        SE         LO80         HI80  \
0  2018-03-10 01:00:00  1013.204511  1.062121  1011.843348  1014.565674   
1  2018-03-10 02:00:00  1013.205186  1.502064  1011.280212  1015.130159   
2  2018-03-10 03:00:00  1013.209064  1.868262  1010.814789  1015.603340   
3  2018-03-10 04:00:00  1013.210744  2.173617  1010.425142  1015.996347   
4  2018-03-10 05:00:00  1013.203637  2.442090  1010.073971  1016.333302   
5  2018-03-10 06:00:00  1013.208388  2.683840 

In [7]:
# Auto ARIMA Timestamp support test
from hana_ml.algorithms.pal.tsa.auto_arima import AutoARIMA

autoarima = AutoARIMA(search_strategy=1, allow_linear=1, thread_ratio=1.0)
autoarima.fit(df)
result= autoarima.predict(forecast_method='innovations_algorithm', forecast_length=5, allow_new_index=True)
print(result.collect())
result= autoarima.predict(forecast_method='innovations_algorithm', forecast_length=5, allow_new_index=False)
print(result.collect())

            TIMESTAMP  FORECAST        SE         LO80         HI80  \
0 2018-03-10 01:00:00   1009.49  1.103046  1008.076389  1010.903611   
1 2018-03-10 02:00:00   1009.49  1.559942  1007.490853  1011.489147   
2 2018-03-10 03:00:00   1009.49  1.910531  1007.041555  1011.938445   
3 2018-03-10 04:00:00   1009.49  2.206092  1006.662779  1012.317221   
4 2018-03-10 05:00:00   1009.49  2.466485  1006.329071  1012.650929   

          LO95         HI95  
0  1007.328070  1011.651930  
1  1006.432569  1012.547431  
2  1005.745427  1013.234573  
3  1005.166140  1013.813860  
4  1004.655778  1014.324222  
   TIMESTAMP  FORECAST        SE         LO80         HI80         LO95  \
0          0   1009.49  1.103046  1008.076389  1010.903611  1007.328070   
1          1   1009.49  1.559942  1007.490853  1011.489147  1006.432569   
2          2   1009.49  1.910531  1007.041555  1011.938445  1005.745427   
3          3   1009.49  2.206092  1006.662779  1012.317221  1005.166140   
4          4   100

In [8]:
# Online ARIMA Test
from hana_ml.algorithms.pal.tsa import online_algorithms

oarima = online_algorithms.OnlineARIMA(order=(4,0,8), output_fitted=True, learning_rate=0.00001)
oarima.partial_fit(df)
result = oarima.predict(forecast_length=5, allow_new_index=False)
print(result.collect())
result = oarima.predict(forecast_length=5, allow_new_index=True)
print(result.collect())

   ID     FORECAST
0   0  1342.116602
1   1  1358.147168
2   2  1375.551742
3   3  1395.214327
4   4  1416.677054
                   ID     FORECAST
0 2018-03-10 01:00:00  1342.116602
1 2018-03-10 02:00:00  1358.147168
2 2018-03-10 03:00:00  1375.551742
3 2018-03-10 04:00:00  1395.214327
4 2018-03-10 05:00:00  1416.677054


In [9]:
# VectorARIMA test case
from hana_ml.algorithms.pal.tsa.vector_arima import VectorARIMA
dates = pd.date_range('2018-03-01', '2018-03-10',freq='H')
T = (dates.max()-dates.min()).days / 365
N = dates.size
start_price = 1000
y_varima = pd.DataFrame()
y_varima['INDEX'] = dates
y_varima['Y1'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
y_varima['Y2'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
df_varima = create_dataframe_from_pandas(conn, y_varima, '#VARIMA_TIMESTAMP', force=True).sort("INDEX")
print(df_varima.collect())


# vectorARIMA test for timestamp
varima = VectorARIMA(model_type='VAR', output_fitted=True)
varima.fit(df_varima, key='INDEX')

result_dict, result = varima.predict(forecast_length=5, allow_new_index=False)
print(varima.is_index_int)
print(varima.forecast_start)
print(varima.timedelta)
print(result.collect())
print(result_dict['Y1'].collect())

result_dict, result = varima.predict(forecast_length=5, allow_new_index=True)
print(result.collect())
print(result_dict['Y1'].collect())


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s]


                  INDEX           Y1           Y2
0   2018-03-01 00:00:00   999.250259   998.950531
1   2018-03-01 01:00:00   999.379673   998.593447
2   2018-03-01 02:00:00   999.625414   997.875242
3   2018-03-01 03:00:00   999.783385   999.853495
4   2018-03-01 04:00:00   999.937931  1000.040072
..                  ...          ...          ...
212 2018-03-09 20:00:00  1003.593286  1006.726557
213 2018-03-09 21:00:00  1003.381375  1008.112802
214 2018-03-09 22:00:00  1004.463229  1008.377012
215 2018-03-09 23:00:00  1005.477144  1008.508672
216 2018-03-10 00:00:00  1005.667370  1006.165946

[217 rows x 3 columns]
False
2018-03-10 01:00:00
3600.0
  COLNAME  IDX     FORECAST        SE         LO95         HI95
0      Y1    0  1005.143036  0.982225  1003.217875  1007.068196
1      Y1    1   918.072435  1.453688   915.223206   920.921663
2      Y1    2   836.820494  1.761110   833.368718   840.272270
3      Y1    3   763.129758  1.978059   759.252762   767.006754
4      Y1    4   696.51

In [10]:
# VectorARIMA test case
from hana_ml.algorithms.pal.tsa.vector_arima import VectorARIMA
dates = pd.date_range('2018-03-01', '2018-03-10',freq='H')
T = (dates.max()-dates.min()).days / 365
N = dates.size
start_price = 1000
y_varimax = pd.DataFrame()
y_varimax['INDEX'] = dates
y_varimax['Y1'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
y_varimax['Y2'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
y_varimax['EX'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
df_varimax = create_dataframe_from_pandas(conn, y_varimax, '#VARIMAX_TIMESTAMP', force=True).sort("INDEX")
#print(df_varimax.collect())

# vector predict
dates = pd.date_range('2018-03-10', '2018-03-11', freq='H')
T = (dates.max()-dates.min()).days / 365
N = dates.size
start_price = 1000
predict_varimax = pd.DataFrame()
predict_varimax['INDEX'] = dates
predict_varimax['EX'] = geometric_brownian_motion(T, N, sigma=0.1, S0=start_price)
predict_varimax_df = create_dataframe_from_pandas(conn, predict_varimax, '#VARIMAX_PREDICT_TIMESTAMP', force=True).sort("INDEX")
#print(predict_varimax_df.collect())

# vector ARIMA test for timestamp support
varimax = VectorARIMA(model_type='VAR', output_fitted=True)
varimax.fit(df_varimax, key='INDEX', endog = ['Y1', 'Y2'], exog=['EX'])

result_dict, result = varimax.predict(predict_varimax_df, forecast_length=5, allow_new_index=False)
#print(varimax.is_index_int)
#print(varimax.forecast_start)
#print(varimax.timedelta)
#print(result.collect())
print(result_dict['Y1'].collect())
print(result.count())
print(result_dict['Y1'].count())
print(result.get_table_structure())
print(result_dict['Y1'].get_table_structure())
result_dict, result = varimax.predict(predict_varimax_df, forecast_length=5, allow_new_index=True)
#print(result.collect())
#print(varimax.is_index_int)
#print(varimax.forecast_start)
#print(varimax.timedelta)
print(result_dict['Y1'].collect())
print(result.get_table_structure())
print(result_dict['Y1'].get_table_structure())


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.68it/s]


    IDX     FORECAST        SE         LO95         HI95
0     1  1009.936552  1.084459  1007.811013  1012.062091
1     2   962.303343  1.515318   959.333320   965.273366
2     3   918.117271  1.835082   914.520512   921.714031
3     4   877.136522  2.096634   873.027120   881.245924
4     5   839.043049  2.320772   834.494335   843.591763
5     6   803.591701  2.518280   798.655872   808.527531
6     7   770.581423  2.695606   765.298036   775.864811
7     8   739.868443  2.856963   734.268795   745.468090
8     9   711.291307  3.005277   705.400965   717.181650
9    10   684.651911  3.142670   678.492277   690.811545
10   11   659.742557  3.270738   653.331910   666.153203
11   12   636.431881  3.390711   629.786088   643.077674
12   13   614.658075  3.503559   607.791099   621.525051
13   14   594.260592  3.610066   587.184861   601.336322
14   15   575.148361  3.710872   567.875051   582.421670
15   16   557.218481  3.806508   549.757725   564.679237
16   17   540.371775  3.897422 

In [11]:
# Test for timestamp support for exponetial smoothing
from hana_ml.algorithms.pal.tsa.exponential_smoothing import AutoExponentialSmoothing, BrownExponentialSmoothing, Croston
from hana_ml.algorithms.pal.tsa.exponential_smoothing import SingleExponentialSmoothing, DoubleExponentialSmoothing, TripleExponentialSmoothing

# Single ES
sesm = SingleExponentialSmoothing(expost_flag=True, forecast_num=4)
forecast = sesm.fit_predict(df, key='INDEX')
print(forecast.collect())
print(forecast.count())

sesm = SingleExponentialSmoothing(expost_flag=False, forecast_num=4)
forecast = sesm.fit_predict(df, key='INDEX')
print(forecast.collect())
print(forecast.count())
# Double ES
desm = DoubleExponentialSmoothing(expost_flag=True, forecast_num=4)
forecast = desm.fit_predict(df, key='INDEX')
print(forecast.collect())

desm = DoubleExponentialSmoothing(expost_flag=False, forecast_num=4)
forecast = desm.fit_predict(df, key='INDEX')
print(forecast.collect())

# Triple ES
tesm = TripleExponentialSmoothing(forecast_num=5, expost_flag=True)
forecast = tesm.fit_predict(df, key='INDEX')
print(forecast.collect())

tesm = TripleExponentialSmoothing(forecast_num=5, expost_flag=False)
forecast = tesm.fit_predict(df, key='INDEX')
print(forecast.collect())

# auto ES
auto_exp_smooth = AutoExponentialSmoothing(model_selection = True, forecast_num=3)
forecast = auto_exp_smooth.fit_predict(df, key='INDEX')
print(forecast.collect())

# Brown
brown_exp_smooth = BrownExponentialSmoothing(forecast_num=6, expost_flag=True)
forecast = brown_exp_smooth.fit_predict(df, key='INDEX')
print(forecast.collect())
brown_exp_smooth = BrownExponentialSmoothing(forecast_num=6, expost_flag=False)
forecast = brown_exp_smooth.fit_predict(df, key='INDEX')
print(forecast.collect())

# Croston
croston = Croston(forecast_num=4, expost_flag=True)
forcast = croston.fit_predict(df, key='INDEX')
print(forecast.collect())

croston = Croston(forecast_num=4, expost_flag=False)
forcast = croston.fit_predict(df, key='INDEX')
print(forecast.collect())


              TIMESTAMP        VALUE    PI1_LOWER    PI1_UPPER    PI2_LOWER  \
0   2018-03-01 01:00:00  1000.584817          NaN          NaN          NaN   
1   2018-03-01 02:00:00  1000.478273          NaN          NaN          NaN   
2   2018-03-01 03:00:00  1000.297120          NaN          NaN          NaN   
3   2018-03-01 04:00:00  1000.353203          NaN          NaN          NaN   
4   2018-03-01 05:00:00  1000.532585          NaN          NaN          NaN   
..                  ...          ...          ...          ...          ...   
215 2018-03-10 00:00:00  1011.631479          NaN          NaN          NaN   
216 2018-03-10 01:00:00  1011.417486  1008.757309  1014.077662  1007.349097   
217 2018-03-10 02:00:00  1011.417486  1008.744042  1014.090930  1007.328806   
218 2018-03-10 03:00:00  1011.417486  1008.730839  1014.104132  1007.308615   
219 2018-03-10 04:00:00  1011.417486  1008.717702  1014.117270  1007.288522   

       PI2_UPPER  
0            NaN  
1            

In [12]:
#LR_seasonal_adjust test
from hana_ml.algorithms.pal.tsa.lr_seasonal_adjust import LR_seasonal_adjust
lr = LR_seasonal_adjust(forecast_length=3, expost_flag=False)
forecast = lr.fit_predict(df, key='INDEX', endog='Y')
print(forecast.collect())

lr = LR_seasonal_adjust(forecast_length=3, expost_flag=True)
forecast = lr.fit_predict(df, key='INDEX', endog='Y')
print(forecast.collect())

            TIMESTAMP        VALUE
0 2018-03-10 01:00:00  1012.870630
1 2018-03-10 02:00:00  1012.941479
2 2018-03-10 03:00:00  1013.012328
              TIMESTAMP        VALUE
0   2018-03-01 00:00:00   997.496486
1   2018-03-01 01:00:00   997.567334
2   2018-03-01 02:00:00   997.638183
3   2018-03-01 03:00:00   997.709031
4   2018-03-01 04:00:00   997.779880
..                  ...          ...
215 2018-03-09 23:00:00  1012.728933
216 2018-03-10 00:00:00  1012.799782
217 2018-03-10 01:00:00  1012.870630
218 2018-03-10 02:00:00  1012.941479
219 2018-03-10 03:00:00  1013.012328

[220 rows x 2 columns]
