In [None]:
# Connection
import pandas as pd
from hana_ml import dataframe

try:
    import configparser
except ImportError:
    import ConfigParser as configparser

settings = configparser.ConfigParser()
settings.read("../../config/e2edata.ini")


#### Isolation Forest

In [None]:
url = settings.get("hana", "url")
port = settings.getint("hana", "port")
user = settings.get("hana", "user")
pwd = settings.get("hana", "passwd")

connection_context = dataframe.ConnectionContext(url, port, user, pwd)

In [None]:

# https://help.sap.com/viewer/DRAFT/319d36de4fd64ac3afbf91b1fb3ce8de/2021_4_QRC/en-US/11345d90db35416e94de653248ac2184.html
    
from hana_ml.algorithms.pal.preprocessing import IsolationForest

df = dataframe.create_dataframe_from_pandas(connection_context,
                                            pandas_df=pd.DataFrame({"ID": [0, 1, 2 ,3, 4, 5, 6, 7],
                                                                    "V000": [-2, -1, -1, 1, 1, 2, 6, -4],
                                                                    "V001": [-1, -1, -2, 1, 2, 1, 3, 7]}),
                                            table_name="#IF_DEMO",
                                            force=True)

clf = IsolationForest(random_state=2,
                      thread_ratio=0)

res = clf.fit_predict(data=df,
                      key="ID",
                      contamination=0.25)

In [None]:
# -1 for outliers and 1 for inliers.
print(res.collect())
connection_context

#### automl

In [None]:
url = settings.get("automl", "url")
port = settings.getint("automl", "port")
user = settings.get("automl", "user")
pwd = settings.get("automl", "passwd")

conn_dev3 = dataframe.ConnectionContext(url, port, user, pwd)

In [None]:
df = dataframe.create_dataframe_from_pandas(conn_dev3,
                                            pd.read_csv("../datasets/digits.csv"),
                                            table_name="#demo_automl_tbl",
                                            force=True).deselect("Unnamed: 0")
df = df.rename_columns({"ID": "LABEL"}).dropna().cast({"LABEL": "VARCHAR(500)"})

In [None]:
import uuid
from hana_ml.algorithms.pal.auto_ml import AutomaticClassification

progress_id = "automl_{}".format(uuid.uuid1())
auto_c = AutomaticClassification(generations=2, 
                                 population_size=5,
                                 offspring_size=5,
                                 elite_number=2,
                                 mutation_rate=0.1,
                                 progress_indicator_id=progress_id)

In [None]:
from hana_ml.visualizers.automl_progress import PipelineProgressStatusMonitor

progress_status_monitor = PipelineProgressStatusMonitor(connection_context=dataframe.ConnectionContext(url, port, user, pwd), automatic_obj=auto_c)

progress_status_monitor.start()
auto_c.enable_workload_class("PAL_AUTOML_WORKLOAD")
auto_c.fit(df)

In [None]:
from hana_ml.visualizers.automl_report import BestPipelineReport
BestPipelineReport(auto_c).generate_notebook_iframe()

In [None]:
df_test = df.head(10).deselect("LABEL")
res = auto_c.predict(df_test)
print(res.collect())

In [None]:
pipeline = auto_c.best_pipeline_.collect().iat[0, 1]

auto_c.fit(df, pipeline=pipeline)

res = auto_c.predict(df_test)
print(res.collect())

In [None]:
conn_dev3.close()

#### create model state

In [None]:
from hana_ml.algorithms.pal.utility import DataSets
full_set, diabetes_train, diabetes_test, _ = DataSets.load_diabetes_data(connection_context)

In [None]:
from hana_ml.algorithms.pal.unified_classification import UnifiedClassification
cv_values = {}
cv_values['learning_rate'] = [0.1, 0.4, 0.7, 1.0]
cv_values['n_estimators'] = [4, 6, 8, 10]
cv_values['split_threshold'] = [0.1, 0.4, 0.7, 1.0]
hgc = UnifiedClassification(func='HybridGradientBoostingTree',
                            param_search_strategy='grid',
                            resampling_method='cv',
                            evaluation_metric='error_rate',
                            ref_metric=['auc'],
                            fold_num=5,
                            random_state=1,
                            param_values=cv_values)
hgc.fit(diabetes_train, key='ID', label='CLASS',
        partition_method='stratified',
        partition_random_state=1,
        stratified_column='CLASS')

In [None]:
hgc.create_model_state()

In [None]:
print(hgc.state.collect())

In [None]:
features = diabetes_train.columns
features.remove('CLASS')
features.remove('ID')
pred_res = hgc.predict(diabetes_test, key='ID', features=features)


In [None]:
print(hgc.get_predict_execute_statement())

#### dataframe functions

##### to_datetime

In [None]:


df_datetime = dataframe.create_dataframe_from_pandas(connection_context,
                                                     pandas_df=pd.DataFrame({"datetime": ["12/01/2022 23:11:24"]}),
                                                     table_name="#to_datetime_demo",
                                                     table_structure={"datetime": "VARCHAR(50)"},
                                                     force=True)

In [None]:
df_datetime.get_table_structure()

In [None]:
print(df_datetime.to_datetime({"datetime": "MM/DD/YYYY HH24:MI:SS"}).get_table_structure())
print(df_datetime.to_datetime({"datetime": "MM/DD/YYYY HH24:MI:SS"}).collect())

##### to_tail

In [None]:
df_to_tail = dataframe.create_dataframe_from_pandas(connection_context,
                                                    pandas_df=pd.DataFrame({"LABEL": ["1"], "FEATURE": [2], "ID": [3]}),
                                                    table_name="#to_tail_demo",
                                                    force=True)
print(df_to_tail.collect())

In [None]:
print(df_to_tail.to_tail("LABEL").to_head("ID").collect())

##### tail

In [None]:
full_set, training_set, validation_set, test_set = DataSets.load_iris_data(connection_context)

In [None]:
print(full_set.tail(10, ref_col="ID").collect())

##### generate_feature

In [None]:
full_set.generate_feature(targets=["SEPALLENGTHCM", "SEPALWIDTHCM"],
                          group_by="SPECIES",
                          trans_func="LAG",
                          order_by="ID",
                          trans_param=[[1, 3], [1, 4]]).head(10).collect()

##### upsert and append option in create_dataframe_from_pandas

In [None]:
df_upsert = dataframe.create_dataframe_from_pandas(connection_context,
                                                   pandas_df=pd.DataFrame({"ID": [1, 2], "FEATURE": [2, 3], "LABEL": ["3", "4"]}),
                                                   table_name="#upsert_demo",
                                                   primary_key="ID",
                                                   force=True)

In [None]:
print(df_upsert.collect())

In [None]:
df_upsert = dataframe.create_dataframe_from_pandas(connection_context,
                                                   pandas_df=pd.DataFrame({"ID": [2], "FEATURE": [10], "LABEL": [None]}),
                                                   table_name="#upsert_demo",
                                                   primary_key="ID",
                                                   upsert=True)

In [None]:
print(df_upsert.collect())

In [None]:
df_upsert = dataframe.create_dataframe_from_pandas(connection_context,
                                                   pandas_df=pd.DataFrame({"ID": [3], "FEATURE": [10], "LABEL": [None]}),
                                                   table_name="#upsert_demo",
                                                   primary_key="ID",
                                                   append=True)

In [None]:
print(df_upsert.collect())