In [0]:
!pip install hana_ml
!pip install mlflow

In [0]:
dbutils.library.restartPython()

In [0]:
import os
import hana_ml
from hana_ml import dataframe
import mlflow

print("hana_ml version:", hana_ml.__version__)
print("mlflow version:", mlflow.__version__)


In [0]:

scope = "<scope>" #as defined in create_secrets
os.environ['HANA_ADDRESS'] = dbutils.secrets.get(scope=scope, key="hana_url")
os.environ['HANA_PORT']    = dbutils.secrets.get(scope=scope, key="hana_port")
os.environ['HANA_UNAME']   = dbutils.secrets.get(scope=scope, key="hana_user")
os.environ['HANA_PASS']    = dbutils.secrets.get(scope=scope, key="hana_password")
import hana_ml.dataframe as dataframe
cc = dataframe.ConnectionContext(
    address=os.environ['HANA_ADDRESS'],
    port=os.environ['HANA_PORT'],
    user=os.environ['HANA_UNAME'],
    password=os.environ['HANA_PASS']
)
if cc.connection.isconnected(): 
    print(f'User {os.environ["HANA_UNAME"]} connected to HANA successfully')
    print(f"HANA Version: {cc.hana_version()}")

#### Get a trains and test dataset
This dataset will be used for Automatic Regression

In [0]:
from hana_ml.algorithms.pal.utility import DataSets, Settings
# Load Dataset
bike_dataset = DataSets.load_bike_data(cc)

# number of rows and number of columns
print("Shape of datset: {}".format(bike_dataset.shape))

# columns
print(bike_dataset.columns)

# types of each column
print(bike_dataset.dtypes())

# print the first 3 rows of dataset
print(bike_dataset.head(3).collect())

#### Split the dataset into train and test

In [0]:
# Add a ID column for AutomaticRegression, the last column is the label
bike_dataset = bike_dataset.add_id('ID', ref_col='days_since_2011')

# Split the dataset into training and test dataset
cols = bike_dataset.columns
cols.remove('cnt')
bike_data = bike_dataset[cols + ['cnt']]

bike_train = bike_data.filter('ID <= 600')
bike_test = bike_data.filter('ID > 600')
print(bike_train.head(3).collect())
print(bike_test.head(3).collect())

### Setp mlflow

In [0]:
mlflow.set_tracking_uri("databricks")

experiment_name = '<experiment_name>' 
mlflow.set_experiment(experiment_name)
print(f"Tracking Server URI: '{mlflow.get_tracking_uri()}'")

#### Train the model with automl from hana-ml for predicting the bike sales based on different factors and log the experiments easily via mlflow

In [0]:
# AutomaticRegression
from hana_ml.algorithms.pal.auto_ml import AutomaticClassification, AutomaticRegression


auto_r = AutomaticRegression(generations=2,
                             population_size=15,
                             offspring_size=5)

# enable_workload_class
#auto_r.enable_workload_class(workload_class_name="PAL_AUTOML_WORKLOAD")
auto_r.disable_workload_class_check()
try:
   
    with mlflow.start_run(run_name="hana-ml-autoreg-bike") as run:
        auto_r.enable_mlflow_autologging(is_exported=True)
       
        auto_r.fit(bike_train, key="ID")
        runid = run.info.run_id
except Exception as e:
    raise e

#### Test the model for prediction on holdout dataset

In [0]:
res = auto_r.predict(bike_test.deselect('cnt'), key="ID")
print(res.collect())

### Load the model from mlflow for prediction

In [0]:
from hana_ml.model_storage import ModelStorage
bikemodel = ModelStorage.load_mlflow_model(connection_context=cc, model_uri='runs:/{}/model'.format(runid))
res = bikemodel.predict(bike_test.deselect('cnt') , key="ID")
print(res.collect())
bike_test.deselect('cnt').save("INFERENCE_BIKE_DATA_TBL") #Saving this here for using later via the Serving Endpoint