In [None]:
from hana_ml import dataframe
from hana_ml.algorithms.pal.utility import DataSets, Settings
url, port, user, pwd = Settings.load_config("../../config/e2edata.ini", 'api')

connection_context = dataframe.ConnectionContext(url, port, user, pwd)

In [None]:
from hana_ml.algorithms.pal.pipeline import Pipeline
from hana_ml.algorithms.pal.decomposition import PCA
from hana_ml.algorithms.pal.preprocessing import Imputer
from hana_ml.algorithms.pal.trees import HybridGradientBoostingClassifier
from hana_ml.algorithms.pal.utility import DataSets, Settings
from hana_ml.algorithms.pal.unified_classification import UnifiedClassification
my_pipeline = Pipeline([
                    ('PCA', PCA(scaling=True, scores=True)),
                    ('HGBT_Classifier', HybridGradientBoostingClassifier(
                                            n_estimators=4, split_threshold=0,
                                            learning_rate=0.5, fold_num=5,
                                            max_depth=6))])


In [None]:
diabetes_full, diabetes_train, diabetes_test, _ = DataSets.load_diabetes_data(connection_context)

diabetes_test_m = diabetes_test.drop("CLASS").save("DIABETES_TEST_TBL", force=True)

In [None]:
my_pipeline.enable_hana_execution()
connection_context.drop_table("MY_MODEL")
my_pipeline.fit(diabetes_train, key="ID", label="CLASS", model_table_name='MY_MODEL')


In [None]:
my_pipeline.predict(diabetes_test_m, key="ID")

In [None]:
my_pipeline.create_apply_func('my_apply_func', diabetes_test.drop("CLASS"), force=True)

In [None]:
connection_context.sql('SELECT * FROM "my_apply_func"({})'.format("DIABETES_TEST_TBL")).collect()

In [None]:
my_pipeline.create_amdp_class(amdp_name="my_amdp", training_dataset="train_cds", apply_dataset="apply_cds")

In [None]:
my_pipeline.build_amdp_class()

In [None]:
print(my_pipeline.amdp_template)

In [None]:
my_pipeline.write_amdp_file()

In [None]:
my_pipeline.get_fit_parameters()

In [None]:
cv_values = {}
cv_values['learning_rate'] = [0.1, 0.4, 0.7, 1.0]
cv_values['n_estimators'] = [4, 6, 8, 10]
cv_values['split_threshold'] = [0.1, 0.4, 0.7, 1.0]
hgc = UnifiedClassification(func='HybridGradientBoostingTree',
                            param_search_strategy='grid',
                            resampling_method='cv',
                            evaluation_metric='error_rate',
                            ref_metric=['auc'],
                            fold_num=5,
                            random_state=1,
                            param_values=cv_values)
hgc.fit(diabetes_train, key='ID', label='CLASS',
        partition_method='stratified',
        partition_random_state=1,
        stratified_column='CLASS')

In [None]:
from hana_ml.visualizers.unified_report import UnifiedReport

In [None]:
ur = UnifiedReport(hgc)

In [None]:
ur.build().display()

In [None]:
ur.set_model_report_style('v1')
ur.build().display()

In [None]:
ts_data = DataSets.load_covid_data(connection_context).to_datetime(cols={"Date": "MM/DD/YYYY"})
ts_data2 = DataSets.load_shampoo_data(connection_context)

In [None]:
import math
ts_train = ts_data.head(math.floor(ts_data.count() * 0.7)).dropna()
ts_test = ts_data.tail(math.floor(ts_data.count() * 0.3)).dropna()
ts_train2 = ts_data2.head(math.floor(ts_data2.count() * 0.7)).dropna()
ts_test2 = ts_data2.tail(math.floor(ts_data2.count() * 0.3)).dropna()

#### Auto-ML Time Series Report

- Data Analyzer
- Forecast Analysis
- Best Pipeline Report

In [None]:
from hana_ml.algorithms.pal.auto_ml import AutomaticTimeSeries
auto_ts = AutomaticTimeSeries()
auto_ts.disable_workload_class_check()
auto_ts.fit(ts_train, key="Date", endog="Increase rate")

In [None]:
auto_ts.predict(ts_test.deselect("Increase rate"), key="Date")

In [None]:
auto_ts.forecast_result.collect()

In [None]:


UnifiedReport(auto_ts).build().display()

In [None]:
from hana_ml.algorithms.pal.tsa.additive_model_forecast import AdditiveModelForecast

amf = AdditiveModelForecast()
amf.fit(data=ts_train, endog="Increase rate")
amf.predict(data=ts_test, show_explainer=True)

UnifiedReport(amf).build().display()

In [None]:
from hana_ml.algorithms.pal.tsa.arima import ARIMA

arima = ARIMA(background_size=10)
arima.fit(data=ts_train, key="Date", endog="Increase rate", exog=['Confirmed', 'Recovered', 'Deaths'])
result = arima.predict(data=ts_test.deselect("Increase rate"), key="Date", show_explainer=True, allow_new_index=True)

In [None]:
UnifiedReport(arima).build().display()

In [None]:
from hana_ml.algorithms.pal.tsa.arima import ARIMA

arima = ARIMA(background_size=10)
arima.fit(data=ts_train2, key="ID", endog="SALES")


In [None]:
arima.fitted_.collect()

In [None]:
arima.predict(forecast_length=10, allow_new_index=True)

In [None]:
UnifiedReport(arima).build().display()

In [None]:
from hana_ml.algorithms.pal.tsa.auto_arima import AutoARIMA

auto_arima = AutoARIMA(background_size=10)
auto_arima.fit(data=ts_train, key="Date", endog="Increase rate", exog=['Confirmed', 'Recovered', 'Deaths'])
result = auto_arima.predict(data=ts_test.deselect("Increase rate"), key="Date", show_explainer=True, allow_new_index=True)

In [None]:
UnifiedReport(auto_arima).build().display()

In [None]:
from hana_ml.algorithms.pal.tsa.exponential_smoothing import SingleExponentialSmoothing, CrostonTSB
ctsb = SingleExponentialSmoothing(forecast_num=4)
ctsb.fit_predict(ts_train.dropna(), key="Date", endog="Increase rate")

In [None]:
UnifiedReport(ctsb).build().display()

In [None]:
ses = SingleExponentialSmoothing(forecast_num=4)
ses.fit_predict(ts_train.dropna(), key="Date", endog="Increase rate")

In [None]:
UnifiedReport(ses).build().display()

In [None]:
from hana_ml.algorithms.pal.tsa.bsts import BSTS

bsts = BSTS()
bsts.fit(ts_train.dropna(), key="Date", endog="Increase rate", exog=['Confirmed', 'Recovered', 'Deaths'])
bsts.predict(data=ts_test.deselect("Increase rate"), key="Date")

In [None]:
UnifiedReport(bsts).build().display()

In [None]:
from hana_ml.algorithms.pal.tsa.rnn import GRUAttention

attention = GRUAttention()
attention.fit(ts_train2.dropna(), key="ID", endog="SALES")


In [None]:
UnifiedReport(attention).build().display()

In [None]:
from hana_ml.algorithms.pal.tsa.lstm import LSTM

lstm = LSTM()
lstm.fit(ts_train2.dropna(), key="ID", endog="SALES")

In [None]:
UnifiedReport(lstm).build().display()

In [None]:
url, port, user, pwd = "580983e1-8619-4c61-838a-206f6ff5871d.hna1.canary-eu10.hanacloud.ondemand.com", 443, "CHENX", "Abcd1234"
conn = dataframe.ConnectionContext(url, port, user, pwd)
# target table
tbl_name="Test"
conn.drop_table(tbl_name)
conn.create_table(table=tbl_name, table_structure={'ID':'INTEGER', 'NAME':'VARCHAR(10)'})
print(conn.table(tbl_name).collect())

In [None]:
# import test.csv into Test table
dir = 'hdlfs://f5a2357e-8a46-49df-9f62-afbc6a5c66d2.files.hdl.canary-eu10.hanacloud.ondemand.com/test.csv'
import_csv_from(connection_context=conn,
                directory=dir,
                schema=user,
                table=tbl_name,
                threads=10,
                record_delimiter='\n',
                field_delimiter=',',
                escape_character='/',
                column_list_first_row=True,
                credential='CXCredential')

In [None]:
# azure
import_csv_from(connection_context=conn,
                directory='azure://hanaadlsregression:sv=2019-02-02&ss=bfqt&srt=sco&sp=rwdlacup&se=2021-02-17T15:15:08Z&st=2020-02-17T07:15:08Z&spr=https&sig=5WNoL4YEZubOvbXXXXXXX@my-container/DEMO_TBL1.csv',
                schema='IMEX_DEMO',
                table='DEMO_TBL1',
                threads=4,
                field_delimiter=',',
                escape_character=None,
                credential=None)

In [None]:
# amazon
import_csv_from(connection_context=conn,
                directory='s3-eu-central-1://AKIAxxxxxxxxxx:xl6WWxxxxxxxxxx@my-demo/My_lineitem.csv',
                schema=None,
                table='LINEITEM',
                field_delimiter=',',
                escape_character=None,
                credential=None)

In [None]:
# google
import_csv_from(connection_context=conn,
                directory='gs://open-buildings-data/v1/score_thresholds_s2_level_4.csv',
                schema="IMEX",
                table="score_thresholds_s2_level_4",
                field_delimiter=',',
                escape_character=None,
                credential=None)