In [1]:
import hana_ml
from hana_ml import dataframe
from data_load_utils import Settings
from hana_ml.algorithms.pal.unified_regression import UnifiedRegression

url, port, user, pwd = Settings.load_config("../../config/e2edata.ini")
connection_context = dataframe.ConnectionContext(url, port, user, pwd)

ModuleNotFoundError: No module named 'shapely'. Required by graph/spatial module.


## Load Data

In [2]:
def createTableAndLoadData(table_name, create_table_sql, data):
    cursor = connection_context.connection.cursor()
    cursor.execute(create_table_sql) # .format(table_name)
    cursor.executemany("INSERT INTO {} VALUES ({})".format(table_name,', '.join(['?']*len(data[0]))), data)
    
def createTableAndLoadDataAsBatch(table_name_list, create_table_sql_list, data_list):
    for i in range(0, len(table_name_list)):
        try:
            createTableAndLoadData(table_name_list[i], create_table_sql_list[i], data_list[i])
        except Exception as e:
            print('{} table is exists.'.format(table_name_list[i]))    

In [3]:
func_list = ['dt', 'exp', 'geo', 'glm', 'hgbt',
             'log', 'mlp', 'mlr', 'pol', 'rdt',
             'svm']

tableDef = {
    'DATA_UNIR_DT_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_DT_TBL(' +
        '"OUTLOOK" VARCHAR(20),' +
        '"TEMP" INTEGER,' +
        '"HUMIDITY" DOUBLE,' +
        '"WINDY" VARCHAR(10),' +
        '"CLASS" DOUBLE);',
    'DATA_UNIR_EXP_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_EXP_TBL(' +
        '"ID" INTEGER, ' +
        '"X1" DOUBLE, ' +
        '"X2" DOUBLE, ' +
        '"Y" DOUBLE);',
    'DATA_UNIR_GEO_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_GEO_TBL(' +
        '"ID" INTEGER, ' +
        '"X1" DOUBLE, ' +
        '"Y" DOUBLE);',
    'DATA_UNIR_GLM_TBL' : 
        'CREATE COLUMN TABLE DATA_UNIR_GLM_TBL(' +
        '"ID" INTEGER, ' +
        '"X" INTEGER, ' +
        '"Y" INTEGER);',
    'DATA_UNIR_HGBT_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_HGBT_TBL(' +
        '"ATT1" VARCHAR(10), ' +
        '"ATT2" DOUBLE, ' +
        '"ATT3" DOUBLE, ' +
        '"ATT4" DOUBLE, ' +
        '"TARGET" DOUBLE);',
    'DATA_UNIR_LOG_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_LOG_TBL(' +
        '"ID" INTEGER, ' +
        '"X1" DOUBLE, ' +
        '"Y" DOUBLE);',     
    'DATA_UNIR_MLP_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_MLP_TBL(' +
        '"V000" INTEGER, ' +
        '"V001" DOUBLE, ' +
        '"V002" VARCHAR(10), ' +
        '"V003" INTEGER, ' +
        '"T001" DOUBLE, ' +
        '"T002" DOUBLE, ' +
        '"T003" DOUBLE);',
    'DATA_UNIR_MLR_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_MLR_TBL(' +
        '"ID" INTEGER, ' +
        '"X1" DOUBLE, ' +
        '"X2" VARCHAR (100), ' +
        '"X3" INTEGER, ' +
        '"Y" DOUBLE);',
    'DATA_UNIR_POL_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_POL_TBL(' +
        '"ID" INTEGER, ' +
        '"X1" DOUBLE, ' +
        '"Y" DOUBLE);',
    'DATA_UNIR_RDT_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_RDT_TBL(' +
        '"OUTLOOK" VARCHAR(20), ' +
        '"TEMP" INTEGER, ' +
        '"HUMIDITY" DOUBLE, ' +
        '"WINDY" VARCHAR(10), '
        '"CLASS" INTEGER);',
    'DATA_UNIR_SVM_TBL' :
        'CREATE COLUMN TABLE DATA_UNIR_SVM_TBL (' +
        '"ID" INTEGER, ' +
        '"ATTRIBUTE1" DOUBLE, ' + 
        '"ATTRIBUTE2" DOUBLE, ' + 
        '"ATTRIBUTE3" DOUBLE, ' +
        '"ATTRIBUTE4" VARCHAR(10),' +
        '"LABEL" INTEGER);'
}

data_list = [[('Sunny', 75, 70, 'Yes', 1),
              ('Sunny', 80, 90, 'Yes', 0),
              ('Sunny', 85, 85, 'No', 0),
              ('Sunny', 72, 95, 'No', 0),
              ('Sunny', 69, 70, 'No', 1),
              ('Overcast', 72, 90, 'Yes', 1),
              ('Overcast', 83, 78, 'No', 0),
              ('Overcast', 64, 65, 'Yes', 1),
              ('Overcast', 81, 75, 'No', 1),
              ('Rain', 71, 80, 'Yes', 0),
              ('Rain', 65, 70, 'Yes', 0),
              ('Rain', 75, 80, 'No', 1),
              ('Rain', 68, 80, 'No', 1),
              ('Rain', 70, 96, 'No', 0)],
             [(0, 0.13, 0.33, 0.5),
              (1, 0.14, 0.34, 0.15),
              (2, 0.15, 0.36, 0.25),
              (3, 0.16, 0.35, 0.35),
              (4, 0.17, 0.37, 0.45),
              (5, 0.18, 0.38, 0.55),
              (6, 0.19, 0.39, 0.65),
              (7, 0.19, 0.31, 0.75),
              (8, 0.11, 0.32, 0.85),
              (9, 0.12, 0.33, 0.95)],
             [(0, 1, 5),
              (1, 2, 20),
              (2, 3, 43),
              (3, 4, 89),
              (4, 5, 166),
              (5, 6, 247),
              (6, 7, 403)],
             [(1, -1, 0),
              (2, -1, 0),
              (3, 0, 1),
              (4, 0, 1),
              (5, 0, 1),
              (6, 0, 1),
              (7, 1, 2),
              (8, 1, 2),
              (9, 1, 2)],
             [('19.76', 6235,  100.0, 100.0, 25.10),
              ('17.85', 46230, 43.67, 84.53, 19.23),
              ('19.96', 7360,  65.51, 81.57, 21.42),
              ('16.80', 28715, 45.16, 93.33, 18.11),
              ('18.20', 21934, 49.20, 83.07, 19.24),
              ('16.71', 1337,  74.84, 94.99, 19.31),
              ('18.81', 17881, 70.66, 92.34, 20.07),
              ('20.74', 2319,  63.93, 95.08, 24.35),
              ('16.56', 18040, 14.45, 61.24, 17.60),
              ('18.55', 1147,  68.58, 97.90, 20.13),
              ('17.40', 2176,  53.33, 97.50, 18.40),
              ('17.62', 13267, 25.16, 56.86, 18.96),
              ('21.24', 3581,  35.76, 63.58, 25.75),
              ('18.23', 15104, 47.72, 95.29, 19.40),
              ('16.86', 47009, 17.21, 100.0, 18.64),
              ('17.45', 10139, 43.15, 89.40, 19.10),
              ('17.66', 6147,  67.73, 92.54, 20.00),
              ('18.30', 23089, 33.27, 67.53, 19.31),
              ('16.58', 20550, 26.61, 98.32, 20.49),
              ('17.51', 9450,  61.35, 86.72, 17.07),
              ('21.17', 1028,  100.0, 100.0, 20.61),
              ('16.92', 3848,  5.350, 65.58, 15.73),
              ('16.96', 15656, 20.53, 93.72, 18.70),
              ('18.24', 7725,  50.59, 96.63, 18.99)],
             [(0, 1, 5),
              (1, 2, 20),
              (2, 3, 43),
              (3, 4, 89),
              (4, 5, 166),
              (5, 6, 247),
              (6, 7, 403)],
             [(1, 1.71, 'AC', 0, 12.7, 2.8, 3.06),
              (10, 1.78, 'CA', 5, 12.1, 8.0, 2.65),
              (17, 2.36, 'AA', 6, 10.1, 2.8, 3.24),
              (12, 3.15, 'AA', 2, 28.1, 5.6, 2.24),
              (7, 1.05, 'CA', 3, 19.8, 7.1, 1.98),
              (6, 1.50, 'CA', 2, 23.2, 4.9, 2.12),
              (9, 1.97, 'CA', 6, 24.5, 4.2, 1.05),
              (5, 1.26, 'AA', 1, 13.6, 5.1, 2.78),
              (12, 2.13, 'AC', 4, 13.2, 1.9, 1.34),
              (18, 1.87, 'AC', 6, 25.5, 3.6, 2.14)],
             [(0, 0.0, 'A', 1, -6.879),
              (1, 0.50, 'A', 1, -3.449),
              (2, 0.54, 'B', 1, 6.635),
              (3, 1.04, 'B', 1, 11.844),
              (4, 1.50, 'A', 1, 2.786),
              (5, 0.04, 'B', 2, 2.389),
              (6, 2.00, 'A', 2, -0.011),
              (7, 2.04, 'B', 2, 8.839),
              (8, 1.54, 'B', 1, 4.689),
              (9, 1.00, 'A', 2, -5.507)],
             [(0, 1, 5),
              (1, 2, 20),
              (2, 3, 43),
              (3, 4, 89),
              (4, 5, 166),
              (5, 6, 247),
              (6, 7, 403)],
             [('Sunny', 75, 70.1, 'Yes', 1),
              ('Sunny', 80, 90.1, 'Yes', 0),
              ('Sunny', 85, 85.1, 'No', 0),
              ('Sunny', 72, 95.1, 'No', 0),
              ('Sunny', 69, 70.1, 'No', 1),
              ('Overcast', 72, 90.1, 'Yes', 1),
              ('Overcast', 83, 78.1, 'No', 0),
              ('Overcast', 64, 65.1, 'Yes', 1),
              ('Overcast', 81, 75.1, 'No', 1),
              ('Rain', 71, 80.1, 'Yes', 0),
              ('Rain', 65, 70.1, 'Yes', 0),
              ('Rain', 75, 80.1, 'No', 1),
              ('Rain', 68, 80.1, 'No', 1),
              ('Rain', 70, 96.1, 'No', 0)],
             [(0,1,10,100,'A',1),
              (1,1.1,10.1,100,'A',1),
              (2,1.2,10.2,100,'A',1),
              (3,1.3,10.4,100,'A',1),
              (4,1.2,10.3,100,'AB',1),
              (5,4,40,400,'AB',1),
              (6,4.1,40.1,400,'AB',1),
              (7,4.2,40.2,400,'AB',1),
              (8,4.3,40.4,400,'AB',2),
              (9,4.2,40.3,400,'AB',2),
              (10,9,90,900,'B',2),
              (11,9.1,90.1,900,'A',2),
              (12,9.2,90.2,900,'B',2),
              (13,9.3,90.4,900,'A',2),
              (14,9.2,90.3,900,'A',2)]]

In [9]:
table_name_list = list(tableDef.keys())
create_table_sql_list = list(tableDef.values())
createTableAndLoadDataAsBatch(table_name_list, create_table_sql_list, data_list)
function_index = -1

DATA_UNIR_DT_TBL table is exists.
DATA_UNIR_EXP_TBL table is exists.
DATA_UNIR_GEO_TBL table is exists.
DATA_UNIR_GLM_TBL table is exists.
DATA_UNIR_HGBT_TBL table is exists.
DATA_UNIR_LOG_TBL table is exists.
DATA_UNIR_MLP_TBL table is exists.
DATA_UNIR_MLR_TBL table is exists.
DATA_UNIR_POL_TBL table is exists.
DATA_UNIR_RDT_TBL table is exists.
DATA_UNIR_SVM_TBL table is exists.


## Decision Tree Regression

In [10]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,OUTLOOK,TEMP,HUMIDITY,WINDY,CLASS
0,Sunny,75,70.0,Yes,1.0
1,Sunny,80,90.0,Yes,0.0
2,Sunny,85,85.0,No,0.0
3,Sunny,72,95.0,No,0.0
4,Sunny,69,70.0,No,1.0
5,Overcast,72,90.0,Yes,1.0
6,Overcast,83,78.0,No,0.0
7,Overcast,64,65.0,Yes,1.0
8,Overcast,81,75.0,No,1.0
9,Rain,71,80.0,Yes,0.0


In [11]:
dt_params = dict(model_format = 'json',# pmml
                 allow_missing_dependent = True,#
                 percentage = 1,#
                 use_surrogate = True,#
                 split_threshold = 1e-5,
                 min_records_of_parent = 2,
                 min_records_of_leaf = 1,
                 thread_ratio = 0.5)
udtr = UnifiedRegression(func = 'DecisionTree', **dt_params)
udtr.fit(data = tbl, partition_method = 'random',
         partition_random_state=2, output_partition_result = True)

In [12]:
udtr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,0,,"{""CurrentVersion"":""VERSION1.3"",""DataDictionary..."


In [8]:
# udtr.generate_html_report('DecisionTree')

In [13]:
udtr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


## Exponential Regression

In [14]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,ID,X1,X2,Y
0,0,0.13,0.33,0.5
1,1,0.14,0.34,0.15
2,2,0.15,0.36,0.25
3,3,0.16,0.35,0.35
4,4,0.17,0.37,0.45
5,5,0.18,0.38,0.55
6,6,0.19,0.39,0.65
7,7,0.19,0.31,0.75
8,8,0.11,0.32,0.85
9,9,0.12,0.33,0.95


In [15]:
exp_params = dict(adjusted_r2 = True,
                  decomposition = 'lu')
uexpr = UnifiedRegression(func = 'ExponentialRegression',
                          **exp_params)
uexpr.fit(data = tbl, key = 'ID',
          label = 'Y',
          partition_method = 'random',
          partition_random_state = 2)

In [16]:
uexpr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,1,,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."


In [17]:
# uexpr.generate_html_report('ExponentialRegression')

In [18]:
uexpr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


## Geometric Regression

In [19]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,ID,X1,Y
0,0,1.0,5.0
1,1,2.0,20.0
2,2,3.0,43.0
3,3,4.0,89.0
4,4,5.0,166.0
5,5,6.0,247.0
6,6,7.0,403.0


In [24]:
geo_params = dict(decomposition='lu', adjusted_r2=False)
ugeor = UnifiedRegression(func = 'GeometricRegression',
                          **geo_params)
par_params = dict(partition_method='random',
                  training_percent=0.5,
                  output_partition_result=True,
                  partition_random_state=2)
ugeor.fit(data = tbl,
          key = 'ID',
          label = 'Y',
          **par_params)

In [27]:
ugeor.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,1,,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."


In [22]:
# ugeor.generate_html_report('GeometricRegression')

In [28]:
ugeor.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


## Generalized Linear Models Regression

In [29]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,ID,X,Y
0,1,-1,0
1,2,-1,0
2,3,0,1
3,4,0,1
4,5,0,1
5,6,0,1
6,7,1,2
7,8,1,2
8,9,1,2


In [30]:
glm_params = dict(solver='irls', family='poisson', link='LOG',
                  tol = 1e-8, handle_missing_fit = 'skip',
                  max_iter = 100, significance_level = 0.05,
                  quasilikelihood = False,
                  thread_ratio=0.5)
par_params = dict(partition_method='random',
                  training_percent=0.5,
                  output_partition_result=True,
                  partition_random_state=2)
uglmr = UnifiedRegression(func = 'GLM', **glm_params)
uglmr.fit(data = tbl, key = 'ID',
          label = 'Y', **par_params)

In [31]:
uglmr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,0,,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
1,1,,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
2,2,,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."


In [32]:
# uglmr.generate_html_report('GLM')

In [33]:
uglmr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


## HGBT Regression

In [34]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,ATT1,ATT2,ATT3,ATT4,TARGET
0,19.76,6235.0,100.0,100.0,25.1
1,17.85,46230.0,43.67,84.53,19.23
2,19.96,7360.0,65.51,81.57,21.42
3,16.8,28715.0,45.16,93.33,18.11
4,18.2,21934.0,49.2,83.07,19.24
5,16.71,1337.0,74.84,94.99,19.31
6,18.81,17881.0,70.66,92.34,20.07
7,20.74,2319.0,63.93,95.08,24.35
8,16.56,18040.0,14.45,61.24,17.6
9,18.55,1147.0,68.58,97.9,20.13


In [35]:
hgbt_params = dict(max_depth=6,
                   alpha=0,
                   base_score=0,
                   col_subsample_split=1,
                   col_subsample_tree=1,
                   lamb=1,
                   min_sample_weight_leaf=1,
                   min_samples_leaf=1,
                   max_w_in_split=0,
                   subsample=1,
                   split_method='Exact',
                   resampling_method='cv',
                   fold_num=5,
                   evaluation_metric='rmse',
                   ref_metric='mae',
                   search_strategy='grid',
                   random_state=1,
                   param_range = dict(
                       n_estimators=[10, 2, 20],
                       learning_rate=[0.1, 0.2, 1.0],
                       split_threshold=[0,0.2,1.0]
                   ))
par_params = dict(partition_method='random',
                  training_percent=0.5,
                  partition_random_state=2,
                  output_partition_result=True)
uhgbtr = UnifiedRegression(func='HybridGradientBoostingTree',
                           **hgbt_params)
uhgbtr.fit(data = tbl,
           label = 'TARGET',
           **par_params)

In [36]:
uhgbtr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,1,-1,"{""model_version"":""2.0"",""nclass"":1,""nrow"":12,""p..."
1,2,0,"{""height"":0,""nnode"":1,""nodes"":[{""ch"":[],""gn"":4..."
2,3,1,"{""height"":0,""nnode"":1,""nodes"":[{""ch"":[],""gn"":2..."
3,4,2,"{""height"":0,""nnode"":1,""nodes"":[{""ch"":[],""gn"":1..."
4,5,3,"{""height"":1,""nnode"":3,""nodes"":[{""ch"":[1,2],""gn..."
5,6,4,"{""height"":2,""nnode"":5,""nodes"":[{""ch"":[1,2],""gn..."
6,7,5,"{""height"":3,""nnode"":7,""nodes"":[{""ch"":[1,2],""gn..."
7,8,6,"{""height"":3,""nnode"":7,""nodes"":[{""ch"":[1,2],""gn..."
8,9,7,"{""height"":2,""nnode"":5,""nodes"":[{""ch"":[1,2],""gn..."
9,10,8,"{""height"":2,""nnode"":5,""nodes"":[{""ch"":[1,2],""gn..."


In [37]:
# uhgbtr.generate_html_report('HybridGradientBoostingTree')

In [39]:
uhgbtr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


## Logarithmic Regression

In [40]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,ID,X1,Y
0,0,1.0,5.0
1,1,2.0,20.0
2,2,3.0,43.0
3,3,4.0,89.0
4,4,5.0,166.0
5,5,6.0,247.0
6,6,7.0,403.0


In [41]:
log_params = dict(decomposition='lu',
                  thread_ratio=0.5)
par_params = dict(partition_method='random',
                  training_percent=0.5,
                  partition_random_state=2,
                  output_partition_result=True)
ulogr = UnifiedRegression(func='LogarithmicRegression',
                          **log_params)
ulogr.fit(data = tbl,
          key = 'ID',
          label = 'Y',
          **par_params)

In [42]:
ulogr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,1,,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."


In [43]:
# ulogr.generate_html_report('LogarithmicRegression')

In [44]:
ulogr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


##  Multi-layer Perceptron (MLP)

In [45]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,V000,V001,V002,V003,T001,T002,T003
0,1,1.71,AC,0,12.7,2.8,3.06
1,10,1.78,CA,5,12.1,8.0,2.65
2,17,2.36,AA,6,10.1,2.8,3.24
3,12,3.15,AA,2,28.1,5.6,2.24
4,7,1.05,CA,3,19.8,7.1,1.98
5,6,1.5,CA,2,23.2,4.9,2.12
6,9,1.97,CA,6,24.5,4.2,1.05
7,5,1.26,AA,1,13.6,5.1,2.78
8,12,2.13,AC,4,13.2,1.9,1.34
9,18,1.87,AC,6,25.5,3.6,2.14


In [46]:
mlp_params = dict(thread_ratio=0.5,
                  hidden_layer_size = [10,5],
                  activation='sin_asymmetric',
                  output_activation='sin_asymmetric',
                  learning_rate=0.01, batch_size=1,
                  momentum=0.00001, training_style='batch',
                  max_iter=10000, normalization='z-transform',
                  weight_init='normal')
par_params = dict(partition_method='random', training_percent=0.5,
                  output_partition_result=True)
umlpr = UnifiedRegression(func='MLP', **mlp_params)
umlpr.fit(data = tbl, label = "T001",
          **par_params)

In [47]:
umlpr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,1,,"{""CurrentVersion"":""1.0"",""DataDictionary"":[{""da..."
1,2,,"m"":5,""weight"":-0.1626542576649863},{""from"":6,""..."


In [48]:
# umlpr.generate_html_report('MLP')

In [49]:
umlpr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


## Linear Regression

In [50]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,ID,X1,X2,X3,Y
0,0,0.0,A,1,-6.879
1,1,0.5,A,1,-3.449
2,2,0.54,B,1,6.635
3,3,1.04,B,1,11.844
4,4,1.5,A,1,2.786
5,5,0.04,B,2,2.389
6,6,2.0,A,2,-0.011
7,7,2.04,B,2,8.839
8,8,1.54,B,1,4.689
9,9,1.0,A,2,-5.507


In [51]:
mlr_params = dict(solver = 'qr',
                  adjusted_r2=False,
                  thread_ratio=0.5)
par_params = dict(partition_method='random', training_percent=0.7,
                  partition_random_state=2,
                  output_partition_result=True)
umlr = UnifiedRegression(func='LinearRegression', **mlr_params)
umlr.fit(data = tbl, key = 'ID',
         label = 'Y',
         **par_params)

In [52]:
umlr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,1,,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."


In [53]:
# umlr.generate_html_report('LinearRegression')

In [55]:
umlr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


## Polynomial Regression

In [56]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,ID,X1,Y
0,0,1.0,5.0
1,1,2.0,20.0
2,2,3.0,43.0
3,3,4.0,89.0
4,4,5.0,166.0
5,5,6.0,247.0
6,6,7.0,403.0


In [57]:
polr_params = dict(decomposition='lu',
                   thread_ratio=0.5,
                   degree=3)
par_params = dict(partition_method='random',
                  training_percent=0.5,
                  partition_random_state=2,
                  output_partition_result=True)
upolr = UnifiedRegression(func='PolynomialRegression', **polr_params)
upolr.fit(data = tbl,
          key = 'ID', label = 'Y',
          **par_params)

In [58]:
upolr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,1,,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."


In [59]:
# upolr.generate_html_report('PolynomialRegression')

In [60]:
upolr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


## RandomForest Regression

In [61]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,OUTLOOK,TEMP,HUMIDITY,WINDY,CLASS
0,Sunny,75,70.1,Yes,1
1,Sunny,80,90.1,Yes,0
2,Sunny,85,85.1,No,0
3,Sunny,72,95.1,No,0
4,Sunny,69,70.1,No,1
5,Overcast,72,90.1,Yes,1
6,Overcast,83,78.1,No,0
7,Overcast,64,65.1,Yes,1
8,Overcast,81,75.1,No,1
9,Rain,71,80.1,Yes,0


In [62]:
rdtr_params = dict(thread_ratio=0.5, n_estimators=300,
                   allow_missing_dependent = True,
                   sample_fraction=1,
                   max_features=3, random_state=2,
                   split_threshold=1e-5,
                   min_samples_leaf=1)
par_params = dict(partition_method='random', training_percent=0.5,
                  partition_random_state=2)
urdtr = UnifiedRegression(func='RandomForest', **rdtr_params)
urdtr.fit(data = tbl,
          label = 'CLASS',
          **par_params)

In [63]:
urdtr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,0,-1,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
1,1,0,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
2,2,1,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
3,3,2,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
4,4,3,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
...,...,...,...
296,296,295,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
297,297,296,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
298,298,297,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."
299,299,298,"<PMML version=""4.0"" xmlns=""http://www.dmg.org/..."


In [64]:
# urdtr.generate_html_report('RandomForest')

In [66]:
urdtr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!


## Support Vector Regression

In [67]:
function_index += 1
tbl = connection_context.table(table_name_list[function_index])
tbl.collect()

Unnamed: 0,ID,ATTRIBUTE1,ATTRIBUTE2,ATTRIBUTE3,ATTRIBUTE4,LABEL
0,0,1.0,10.0,100.0,A,1
1,1,1.1,10.1,100.0,A,1
2,2,1.2,10.2,100.0,A,1
3,3,1.3,10.4,100.0,A,1
4,4,1.2,10.3,100.0,AB,1
5,5,4.0,40.0,400.0,AB,1
6,6,4.1,40.1,400.0,AB,1
7,7,4.2,40.2,400.0,AB,1
8,8,4.3,40.4,400.0,AB,2
9,9,4.2,40.3,400.0,AB,2


In [68]:
svr_params = dict(thread_ratio = 0.5,
                  c = 100,
                  handle_missing=True,
                  kernel='rbf',
                  regression_eps=0.1,
                  scale_label=True,
                  scale_info='standardization',
                  shrink=True,
                  compression=True)#compression defaults to True?
par_params = dict(partition_method='random',
                  training_percent=0.8,
                  partition_random_state=2)
usvr = UnifiedRegression(func='SVM', **svr_params)
usvr.fit(data = tbl,
         key='ID',
         label = 'LABEL',
         **par_params)

In [69]:
usvr.model_.collect()

Unnamed: 0,ROW_INDEX,PART_INDEX,MODEL_CONTENT
0,0,,"{""C"":100.0,""CONTENT"":""{\""impute_model\"":{\""col..."


In [70]:
# usvr.generate_html_report('SVM')

In [71]:
usvr.generate_notebook_iframe_report()

[31mIn order to review the unified regression model report better, you need to adjust the size of the left area or hide the left area temporarily!
