In [None]:
import os
import sys
import pickle
from copy import deepcopy
from collections import defaultdict

import numpy as np
import pandas as pd

import h2o
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.grid.grid_search import H2OGridSearch
import helper_functions as helper

In [2]:
# start a h2o instance to fit GLM models
# pick settings so things run fast but don't use all system resources
h2o.init(nthreads = 3, max_mem_size = "12G")

Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
; OpenJDK 64-Bit Server VM (build 25.152-b12, mixed mode)56-b12)
  Starting server from S:\ProgramData\Anaconda3\lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\AARONB~1\AppData\Local\Temp\tmpj0okuy9f
  JVM stdout: C:\Users\AARONB~1\AppData\Local\Temp\tmpj0okuy9f\h2o_Aaron_Blackwell_started_from_python.out
  JVM stderr: C:\Users\AARONB~1\AppData\Local\Temp\tmpj0okuy9f\h2o_Aaron_Blackwell_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O cluster uptime:,02 secs
H2O cluster timezone:,Australia/Sydney
H2O data parsing timezone:,UTC
H2O cluster version:,3.26.0.10
H2O cluster version age:,16 days
H2O cluster name:,H2O_from_python_Aaron_Blackwell_qcaavi
H2O cluster total nodes:,1
H2O cluster free memory:,10.67 Gb
H2O cluster total cores:,4
H2O cluster allowed cores:,3


In [48]:
def import_data(data):    
    return pd.read_csv("train_test_data/" + data + ".csv", index_col="uid").drop("Unnamed: 0", axis="columns")        
X_train = import_data("X_train")
X_test = import_data("X_test")
y_train = import_data("y_train")
y_test = import_data("y_test")

y_train = y_train[[e for e in y_train.columns if 'post' in e]]
y_test  =  y_test[[e for e in y_test.columns  if 'post' in e]]
train_cols = X_train.columns.tolist()

In [49]:
# remove rows where targets all all NA
train = pd.concat([y_train, X_train], axis = 1)
train = train.dropna(how = 'all', subset = ['panas_pos_imp_post', 'panas_neg_imp_post', 'panas_pos_imp_post'])
test  = pd.concat([y_test, X_test], axis = 1)
test = test.dropna(how = 'all', subset = ['panas_pos_imp_post', 'panas_neg_imp_post', 'panas_pos_imp_post'])

In [50]:
# h2o likes to convert mostly na values into categories. so
# we copy the pandas type mapping across
col_types = dict(train.dtypes)
replacements = {'float64': 'real',
                'int64': 'int'}
for e in col_types:
    col_types[e] = replacements[str(col_types[e])]
    
train_h2o = h2o.H2OFrame(train, column_types = col_types)
test_h2o  = h2o.H2OFrame(test, column_types = col_types)

Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%


In [None]:
all_models = dict()
best_models = dict()
# restrict search to only imputated cases where imputation actually occurs
targets = ['flourishing_scale_raw_class_post', 
           'flourishing_scale_raw_post', 
           'panas_neg_raw_class_post', 
           'panas_neg_raw_post',
           'panas_pos_raw_class_post',
           'panas_pos_raw_post',
           'panas_pos_imp_class_post',
           'panas_pos_imp_post',
          ]
for target in targets:
    print(f"searching for best model for target {target}")
    if 'class' in target: 
        families = ['binomial']
        metric_name = 'logloss'
    else:
        #TODO: tweedie_variance_power and tweedie_link_power (for tweedie) to work
        families = ["gaussian", "tweedie", "gamma", "poisson", "negativebinomial"]
        metric_name = 'mse'
    output_models = defaultdict(pd.DataFrame)
    best_metic_value = np.Inf
    best_family = None
    best_model = None
    for features in ['all', 'wk_10', 'wk_9-10']:
        if features == 'all':
            x_cols = train_cols
        elif features == 'wk_10':
            x_cols = [e for e in train_cols if 'wk_10' in e]
        elif features == 'wk_9-10':
            x_cols = [e for e in train_cols if 'wk_9' in e or 'wk_10' in e]
        else:
            raise ValueError('feature set not encoded')
        for family in families:
            print(f"searching for best model in {family} family")
            hyper_parameters = {'alpha': list(np.arange(0,1.1,0.1))}

            # h2o grid search doesn't support searching tweedie distribution over the 
            # space of canonical link functions so we define a custom search to support
            # this
            if family == "tweedie":
                # define a simple space (noting that both Guassian, Poisson and Gamma)
                # are already covered in other cases
                tweedie_variance_powers = [1.1, 1.3, 1.5, 1.7, 1.9]
            else:
                tweedie_variance_powers = [0]
            if family == "negativebinomial":
                hyper_parameters['theta'] = [1e-10, 1e-8, 1e-4, 1e-2, 0.1, 0.5, 1]

            for vp in tweedie_variance_powers:
                h2o_glm = H2OGeneralizedLinearEstimator(family = family, nfolds = 5, seed = 20191106, 
                                                        # tweedie parameters are ignored if not tweedie distn.
                                                        tweedie_variance_power = vp,
                                                        tweedie_link_power = 1.0 - vp)
                gs = H2OGridSearch(h2o_glm, hyper_parameters)

                gs.train(y = target, x = x_cols, training_frame = train_h2o)
                glm_grid_models = gs.get_grid(sort_by = 'mse')

                num_models = len(list(glm_grid_models.get_grid()))
                
                model_results = {
                    'response': target,
                    'family': family,
                    'alpha': [glm_grid_models.get_hyperparams(e)[0] for e in range(num_models)],
                    'best_lambda': [e.actual_params['lambda'][0] for e in glm_grid_models],
                    'metric_name': metric_name,
                    'features': features
                }
                if 'class' in target:
                    model_results['metric_value'] = list(glm_grid_models.get_grid(sort_by="mse").logloss(xval=True).values())
                else:
                    model_results['metric_value'] = list(glm_grid_models.get_grid(sort_by="mse").mse(xval=True).values())
                
                if family == "tweedie":
                    model_results['tweedie_power'] = vp
                elif family == "negativebinomial":
                    model_results['theta'] = [glm_grid_models.get_hyperparams(e)[1] for e in range(num_models)]
                # keep track of all models
                output_models[family] = output_models[family].append(pd.DataFrame(model_results), ignore_index = True)

                family_best_model = glm_grid_models.models[0]

                if 'class' in target:
                    if family_best_model.logloss(xval=True) < best_metic_value:
                        print(f"!! Classification new best model is {family} with {features} features !!")
                        print(f"old value {best_metic_value}, new value {family_best_model.logloss(xval=True)}")
                        best_model = family_best_model
                        best_metic_value = family_best_model.logloss(xval=True)
                        best_family = family
                else:
                    if family_best_model.mse(xval=True) < best_metic_value:
                        print(f"!! Regression new best model is {family} with {features} features !!")
                        print(f"old value {best_metic_value}, new value {family_best_model.mse(xval=True)}")
                        best_model = family_best_model
                        best_metic_value = family_best_model.mse(xval=True)
                        best_family = family
    all_models[target] = deepcopy(output_models)
    h2o.save_model(model=best_model, path=f"./fitted_models/h2o_glm/{target}", force=True)
    best_models[target] = {'best_model': best_model,
                           'metric_value': metric_name,
                           'best_metic_value': best_metic_value,
                           'best_family': best_family,
                           'features': 'features'}

searching for best model for target flourishing_scale_raw_class_post
searching for best model in binomial family
glm Grid Build progress: |████████████████████████████████████████████████| 100%
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
!! Classification new best model is binomial with all features !!
old value inf, new value 2.5403543680483622
searching for best model in binomial family
glm Grid Build progress: |████████████████████████████████████████████████| 100%
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
Hyperparameters: [alpha]
!! Classification new

Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperpar

Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperpar

Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperpar

Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperparameters: [alpha, theta]
Hyperpar

In [96]:
with open('fitted_models/h2o_glm/best_models.pkl', 'wb') as out_file:
    pickle.dump(best_models, out_file, protocol=pickle.HIGHEST_PROTOCOL)

In [99]:

all_models_pd = dict()
for e in all_models:
    all_models_pd[e] = pd.concat(all_models[e], ignore_index = True)
all_models_pd = pd.concat(all_models_pd, ignore_index = True, sort=False)
all_models_pd.sort_values(by=['response', 'metric_value'], inplace = True)
all_models_pd.to_csv("./fitted_models/h2o_glm/glm_cv_results.csv")
all_models_pd                      

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """


Unnamed: 0,response,family,alpha,best_lambda,metric_name,metric_value,features,theta,tweedie_power
0,flourishing_scale_raw_class_post,binomial,1.0,0.021929,logloss,3.176140e-01,all,,
1,flourishing_scale_raw_class_post,binomial,0.9,0.024366,logloss,3.303776e-01,all,,
2,flourishing_scale_raw_class_post,binomial,0.8,0.027412,logloss,3.483137e-01,all,,
11,flourishing_scale_raw_class_post,binomial,0.1,0.199049,logloss,3.534764e-01,wk_10,,
3,flourishing_scale_raw_class_post,binomial,0.7,0.031328,logloss,3.639210e-01,all,,
4,flourishing_scale_raw_class_post,binomial,0.3,0.073098,logloss,3.696136e-01,all,,
5,flourishing_scale_raw_class_post,binomial,0.2,0.109647,logloss,3.721480e-01,all,,
6,flourishing_scale_raw_class_post,binomial,0.6,0.036549,logloss,3.732310e-01,all,,
7,flourishing_scale_raw_class_post,binomial,0.1,0.219293,logloss,3.752880e-01,all,,
8,flourishing_scale_raw_class_post,binomial,0.4,0.054823,logloss,3.766773e-01,all,,


In [100]:
for model in best_models:
    print(f'--------------- {model} ---------------')
    print(best_models[model]['best_model'])

--------------- flourishing_scale_raw_class_post ---------------
Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  Grid_GLM_Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex_model_python_1574509614939_7667_model_2


GLM Model: summary


Unnamed: 0,Unnamed: 1,family,link,regularization,number_of_predictors_total,number_of_active_predictors,number_of_iterations,training_frame
0,,binomial,logit,"Elastic Net (alpha = 0.1, lambda = 0.199 )",47,24,5,Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex




ModelMetricsBinomialGLM: glm
** Reported on train data. **

MSE: 0.129071859811097
RMSE: 0.3592657231230068
LogLoss: 0.42639762952682
Null degrees of freedom: 27
Residual degrees of freedom: 3
Null deviance: 38.81624211135692
Residual deviance: 23.878267253501917
AIC: 73.87826725350192
AUC: 0.9464285714285714
pr_auc: 0.8866301830096587
Gini: 0.8928571428571428

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.3928962529952592: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,13.0,1.0,0.0714,(1.0/14.0)
1,1,2.0,12.0,0.1429,(2.0/14.0)
2,Total,15.0,13.0,0.1071,(3.0/28.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.392896,0.888889,12.0
1,max f2,0.34937,0.902778,15.0
2,max f0point5,0.568045,0.948276,10.0
3,max accuracy,0.568045,0.892857,10.0
4,max precision,0.946181,1.0,0.0
5,max recall,0.325405,1.0,17.0
6,max specificity,0.946181,1.0,0.0
7,max absolute_mcc,0.568045,0.8044,10.0
8,max min_per_class_accuracy,0.392896,0.857143,12.0
9,max mean_per_class_accuracy,0.568045,0.892857,10.0



Gains/Lift Table: Avg response rate: 50.00 %, avg score: 50.00 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.035714,0.93459,2.0,2.0,1.0,0.946181,1.0,0.946181,0.071429,0.071429,100.0,100.0
1,,2,0.035714,0.922999,0.0,2.0,0.0,0.0,1.0,0.946181,0.0,0.071429,-100.0,100.0
2,,3,0.035714,0.911408,0.0,2.0,0.0,0.0,1.0,0.946181,0.0,0.071429,-100.0,100.0
3,,4,0.071429,0.90331,2.0,2.0,1.0,0.906212,1.0,0.926196,0.071429,0.142857,100.0,100.0
4,,5,0.071429,0.89805,0.0,2.0,0.0,0.0,1.0,0.926196,0.0,0.142857,-100.0,100.0
5,,6,0.107143,0.837824,2.0,2.0,1.0,0.888074,1.0,0.913489,0.071429,0.214286,100.0,100.0
6,,7,0.178571,0.748844,2.0,2.0,1.0,0.791843,1.0,0.86483,0.142857,0.357143,100.0,100.0
7,,8,0.214286,0.722569,2.0,2.0,1.0,0.744014,1.0,0.844694,0.071429,0.428571,100.0,100.0
8,,9,0.321429,0.630209,2.0,2.0,1.0,0.692378,1.0,0.793922,0.214286,0.642857,100.0,100.0
9,,10,0.428571,0.448266,1.333333,1.833333,0.666667,0.57466,0.916667,0.739107,0.142857,0.785714,33.333333,83.333333




ModelMetricsBinomialGLM: glm
** Reported on cross-validation data. **

MSE: 0.3534763697476386
RMSE: 0.5945387874206682
LogLoss: 1.6200123334959866
Null degrees of freedom: 27
Residual degrees of freedom: 4
Null deviance: 46.259095375307126
Residual deviance: 90.72069067264819
AIC: 138.72069067264817
AUC: 0.4336734693877551
pr_auc: 0.48519781147049046
Gini: -0.13265306122448983

Confusion Matrix (Act/Pred) for max f1 @ threshold = 7.260760337663205e-09: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,1.0,13.0,0.9286,(13.0/14.0)
1,1,0.0,14.0,0.0,(0.0/14.0)
2,Total,1.0,27.0,0.4643,(13.0/28.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,7.26076e-09,0.682927,24.0
1,max f2,7.26076e-09,0.843373,24.0
2,max f0point5,7.26076e-09,0.57377,24.0
3,max accuracy,0.8236905,0.571429,1.0
4,max precision,0.9381832,1.0,0.0
5,max recall,7.26076e-09,1.0,24.0
6,max specificity,0.9381832,1.0,0.0
7,max absolute_mcc,0.1394672,0.316228,17.0
8,max min_per_class_accuracy,0.3490735,0.428571,11.0
9,max mean_per_class_accuracy,0.8236905,0.571429,1.0



Gains/Lift Table: Avg response rate: 50.00 %, avg score: 32.90 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.035714,0.9049803,2.0,2.0,1.0,0.938183,1.0,0.938183,0.071429,0.071429,100.0,100.0
1,,2,0.035714,0.8717775,0.0,2.0,0.0,0.0,1.0,0.938183,0.0,0.071429,-100.0,100.0
2,,3,0.035714,0.8385746,0.0,2.0,0.0,0.0,1.0,0.938183,0.0,0.071429,-100.0,100.0
3,,4,0.071429,0.797618,2.0,2.0,1.0,0.823691,1.0,0.880937,0.071429,0.142857,100.0,100.0
4,,5,0.071429,0.7503616,0.0,2.0,0.0,0.0,1.0,0.880937,0.0,0.142857,-100.0,100.0
5,,6,0.107143,0.5893916,0.0,1.333333,0.0,0.660737,0.666667,0.807537,0.0,0.142857,-100.0,33.333333
6,,7,0.178571,0.5405808,1.0,1.2,0.5,0.561214,0.6,0.709008,0.071429,0.214286,0.0,20.0
7,,8,0.214286,0.5035128,2.0,1.333333,1.0,0.539869,0.666667,0.680818,0.071429,0.285714,100.0,33.333333
8,,9,0.392857,0.4653773,0.0,0.727273,0.0,0.47321,0.363636,0.586451,0.0,0.285714,-100.0,-27.272727
9,,10,0.428571,0.4137937,2.0,0.833333,1.0,0.434049,0.416667,0.573751,0.071429,0.357143,100.0,-16.666667




Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,accuracy,0.7051282,0.34113166,1.0,0.6923077,0.6666667,0.16666667,1.0
1,auc,0.57222223,0.4310481,1.0,0.3611111,0.5,0.0,1.0
2,err,0.2948718,0.34113166,0.0,0.30769232,0.33333334,0.8333333,0.0
3,err_count,2.0,2.345208,0.0,4.0,1.0,5.0,0.0
4,f0point5,0.6986521,0.33625764,1.0,0.73770493,0.5555556,0.2,1.0
5,f1,0.75411254,0.29667738,1.0,0.8181818,0.6666667,0.2857143,1.0
6,f2,0.8503401,0.20763628,1.0,0.9183673,0.8333333,0.5,1.0
7,lift_top_group,1.0888889,1.0195618,2.0,1.4444444,0.0,0.0,2.0
8,logloss,1.0125163,1.047211,0.18905483,2.8457172,0.7152446,0.70973504,0.6028299
9,max_per_class_error,0.5,0.5,0.0,1.0,0.5,1.0,0.0



See the whole table with table.as_data_frame()

Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,iterations,negative_log_likelihood,objective
0,,2019-11-24 16:34:47,0.000 sec,0,19.408121,0.646937
1,,2019-11-24 16:34:47,0.001 sec,1,12.824299,0.504703
2,,2019-11-24 16:34:47,0.001 sec,2,12.259682,0.501104
3,,2019-11-24 16:34:47,0.001 sec,3,12.213482,0.501078
4,,2019-11-24 16:34:47,0.002 sec,4,11.943678,0.499112
5,,2019-11-24 16:34:47,0.002 sec,5,11.939134,0.499111



--------------- flourishing_scale_raw_post ---------------
Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  Grid_GLM_Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex_model_python_1574509614939_8044_model_2


GLM Model: summary


Unnamed: 0,Unnamed: 1,family,link,regularization,number_of_predictors_total,number_of_active_predictors,number_of_iterations,training_frame
0,,tweedie,tweedie,"Elastic Net (alpha = 0.1, lambda = 52.275 )",478,0,1,Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex




ModelMetricsRegressionGLM: glm
** Reported on train data. **

MSE: 85.43241967630117
RMSE: 9.242965956677606
MAE: 6.712733300196111
RMSLE: 0.2623146310602922
R^2: -2.5420236182682743e-07
Mean Residual Deviance: 1.5569863500070145
Null degrees of freedom: 27
Residual degrees of freedom: 27
Null deviance: 43.59560813524874
Residual deviance: 43.59561780019641
AIC: NaN

ModelMetricsRegressionGLM: glm
** Reported on cross-validation data. **

MSE: 102.94873911401153
RMSE: 10.146365808209929
MAE: 7.367509408302879
RMSLE: 0.2903712011171041
R^2: -0.20503159893974066
Mean Residual Deviance: 1.9418331976990546
Null degrees of freedom: 27
Residual degrees of freedom: 25
Null deviance: 57.621200066417515
Residual deviance: 54.37132953557353
AIC: NaN

Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,mae,8.068201,4.1698866,14.75215,6.8015094,5.9540143,3.8333333,9.0
1,mean_residual_deviance,2.274336,2.7050693,7.090931,1.2178779,0.83898574,0.8026165,1.4212692
2,mse,119.704315,122.522484,335.80365,71.41549,50.202976,44.535217,96.56423
3,null_deviance,11.52424,11.096257,18.48358,27.745813,2.7447207,4.6026726,4.044414
4,r2,-0.1909043,0.22366646,-0.5728509,-0.06075036,-0.013064534,-0.11570477,-0.192151
5,residual_deviance,10.874266,11.197364,28.363724,15.832412,2.516957,4.815699,2.8425384
6,rmse,10.072261,4.7767515,18.324947,8.450768,7.085406,6.673471,9.82671
7,rmsle,0.27205467,0.178827,0.59057605,0.21456791,0.17258416,0.18102705,0.20151818



Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,iterations,negative_log_likelihood,objective
0,,2019-11-24 16:35:55,0.000 sec,0,9218.430048,307.281002
1,,2019-11-24 16:35:55,0.349 sec,1,9218.430052,307.281002



--------------- panas_neg_raw_class_post ---------------
Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  Grid_GLM_Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex_model_python_1574509614939_8258_model_9


GLM Model: summary


Unnamed: 0,Unnamed: 1,family,link,regularization,number_of_predictors_total,number_of_active_predictors,number_of_iterations,training_frame
0,,binomial,logit,"Elastic Net (alpha = 0.8, lambda = 0.02531 )",47,15,4,Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex




ModelMetricsBinomialGLM: glm
** Reported on train data. **

MSE: 0.060960523839030056
RMSE: 0.2469018506188847
LogLoss: 0.24710402454971797
Null degrees of freedom: 29
Residual degrees of freedom: 14
Null deviance: 41.053905907818
Residual deviance: 14.826241472983074
AIC: 46.82624147298307
AUC: 0.9705882352941176
pr_auc: 0.915879194815181
Gini: 0.9411764705882353

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.6100459560902506: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,12.0,1.0,0.0769,(1.0/13.0)
1,1,0.0,17.0,0.0,(0.0/17.0)
2,Total,12.0,18.0,0.0333,(1.0/30.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.610046,0.971429,12.0
1,max f2,0.610046,0.988372,12.0
2,max f0point5,0.610046,0.955056,12.0
3,max accuracy,0.610046,0.966667,12.0
4,max precision,0.975035,1.0,0.0
5,max recall,0.610046,1.0,12.0
6,max specificity,0.975035,1.0,0.0
7,max absolute_mcc,0.610046,0.9337,12.0
8,max min_per_class_accuracy,0.640658,0.923077,11.0
9,max mean_per_class_accuracy,0.610046,0.961538,12.0



Gains/Lift Table: Avg response rate: 56.67 %, avg score: 56.67 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.033333,0.962092,1.764706,1.764706,1.0,0.975035,1.0,0.975035,0.058824,0.058824,76.470588,76.470588
1,,2,0.033333,0.94915,0.0,1.764706,0.0,0.0,1.0,0.975035,0.0,0.058824,-100.0,76.470588
2,,3,0.033333,0.936207,0.0,1.764706,0.0,0.0,1.0,0.975035,0.0,0.058824,-100.0,76.470588
3,,4,0.066667,0.929835,1.764706,1.764706,1.0,0.930405,1.0,0.95272,0.058824,0.117647,76.470588,76.470588
4,,5,0.066667,0.928803,0.0,1.764706,0.0,0.0,1.0,0.95272,0.0,0.117647,-100.0,76.470588
5,,6,0.1,0.919921,1.764706,1.764706,1.0,0.926844,1.0,0.944095,0.058824,0.176471,76.470588,76.470588
6,,7,0.166667,0.906279,1.764706,1.764706,1.0,0.917999,1.0,0.933656,0.117647,0.294118,76.470588,76.470588
7,,8,0.2,0.877131,1.764706,1.764706,1.0,0.886655,1.0,0.925823,0.058824,0.352941,76.470588,76.470588
8,,9,0.466667,0.80705,1.544118,1.638655,0.875,0.817276,0.928571,0.863796,0.411765,0.764706,54.411765,63.865546
9,,10,0.5,0.772884,1.764706,1.647059,1.0,0.773369,0.933333,0.857768,0.058824,0.823529,76.470588,64.705882




ModelMetricsBinomialGLM: glm
** Reported on cross-validation data. **

MSE: 0.26322676561089114
RMSE: 0.51305629867578
LogLoss: 0.867350853028571
Null degrees of freedom: 29
Residual degrees of freedom: 14
Null deviance: 42.7860568728306
Residual deviance: 52.04105118171426
AIC: 84.04105118171427
AUC: 0.6425339366515838
pr_auc: 0.6049991820691374
Gini: 0.2850678733031675

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.09642642202756857: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,4.0,9.0,0.6923,(9.0/13.0)
1,1,1.0,16.0,0.0588,(1.0/17.0)
2,Total,5.0,25.0,0.3333,(10.0/30.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.096426,0.761905,22.0
1,max f2,0.006507,0.876289,26.0
2,max f0point5,0.643435,0.705882,14.0
3,max accuracy,0.643435,0.666667,14.0
4,max precision,0.997783,1.0,0.0
5,max recall,0.006507,1.0,26.0
6,max specificity,0.997783,1.0,0.0
7,max absolute_mcc,0.096426,0.330911,22.0
8,max min_per_class_accuracy,0.67084,0.615385,13.0
9,max mean_per_class_accuracy,0.643435,0.660633,14.0



Gains/Lift Table: Avg response rate: 56.67 %, avg score: 55.34 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.033333,0.992448,1.764706,1.764706,1.0,0.997783,1.0,0.997783,0.058824,0.058824,76.470588,76.470588
1,,2,0.033333,0.987112,0.0,1.764706,0.0,0.0,1.0,0.997783,0.0,0.058824,-100.0,76.470588
2,,3,0.033333,0.981777,0.0,1.764706,0.0,0.0,1.0,0.997783,0.0,0.058824,-100.0,76.470588
3,,4,0.066667,0.97031,0.0,0.882353,0.0,0.979385,0.5,0.988584,0.0,0.058824,-100.0,-11.764706
4,,5,0.066667,0.953862,0.0,0.882353,0.0,0.0,0.5,0.988584,0.0,0.058824,-100.0,-11.764706
5,,6,0.1,0.867209,1.764706,1.176471,1.0,0.922667,0.666667,0.966612,0.058824,0.117647,76.470588,17.647059
6,,7,0.166667,0.840977,0.882353,1.058824,0.5,0.86014,0.6,0.924023,0.058824,0.176471,-11.764706,5.882353
7,,8,0.2,0.802049,1.764706,1.176471,1.0,0.807075,0.666667,0.904532,0.058824,0.235294,76.470588,17.647059
8,,9,0.3,0.796879,1.176471,1.176471,0.666667,0.800793,0.666667,0.869952,0.117647,0.352941,17.647059,17.647059
9,,10,0.4,0.762211,1.176471,1.176471,0.666667,0.777808,0.666667,0.846916,0.117647,0.470588,17.647059,17.647059




Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,accuracy,0.8717949,0.17578602,1.0,0.6923077,1.0,0.6666667,1.0
1,auc,0.7988095,0.2886874,1.0,0.61904764,1.0,0.375,1.0
2,err,0.12820514,0.17578602,0.0,0.30769232,0.0,0.33333334,0.0
3,err_count,1.2,1.7888544,0.0,4.0,0.0,2.0,0.0
4,f0point5,0.8744361,0.17308754,1.0,0.65789473,1.0,0.71428573,1.0
5,f1,0.9028571,0.1364267,1.0,0.71428573,1.0,0.8,1.0
6,f2,0.9380682,0.096096665,1.0,0.78125,1.0,0.90909094,1.0
7,lift_top_group,1.5333333,0.5451809,2.0,2.1666667,1.5,1.0,1.0
8,logloss,0.7490576,0.36479613,0.33845457,1.10115,0.47210854,1.1478432,0.6857315
9,max_per_class_error,0.2857143,0.44031528,0.0,0.42857143,0.0,1.0,0.0



See the whole table with table.as_data_frame()

Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,iterations,negative_log_likelihood,objective
0,,2019-11-24 17:05:12,0.000 sec,0,20.526953,0.684232
1,,2019-11-24 17:05:12,0.001 sec,1,9.496157,0.433238
2,,2019-11-24 17:05:12,0.001 sec,2,7.880476,0.414165
3,,2019-11-24 17:05:12,0.001 sec,3,7.458268,0.412096
4,,2019-11-24 17:05:12,0.001 sec,4,7.413121,0.412061



--------------- panas_neg_raw_post ---------------
Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  Grid_GLM_Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex_model_python_1574509614939_8636_model_7


GLM Model: summary


Unnamed: 0,Unnamed: 1,family,link,regularization,number_of_predictors_total,number_of_active_predictors,number_of_iterations,training_frame
0,,tweedie,tweedie,"Elastic Net (alpha = 0.6, lambda = 2.1457 )",478,15,3,Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex




ModelMetricsRegressionGLM: glm
** Reported on train data. **

MSE: 5.967201626840837
RMSE: 2.442785628507102
MAE: 1.9268696359552655
RMSLE: 0.1108735793187298
R^2: 0.9051503600403252
Mean Residual Deviance: 0.1084345350254622
Null degrees of freedom: 29
Residual degrees of freedom: 14
Null deviance: 33.891101883388046
Residual deviance: 3.253036050763866
AIC: NaN

ModelMetricsRegressionGLM: glm
** Reported on cross-validation data. **

MSE: 71.45499093348911
RMSE: 8.45310540177331
MAE: 6.7400567190289795
RMSLE: 0.5557835368531879
R^2: -0.13578869748220868
Mean Residual Deviance: 3.044175620474905
Null degrees of freedom: 29
Residual degrees of freedom: 15
Null deviance: 35.49514669344816
Residual deviance: 91.32526861424715
AIC: NaN

Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,mae,7.131229,1.5169634,7.6468854,6.63636,7.1044936,5.0493546,9.21905
1,mean_residual_deviance,3.0409684,3.5856242,9.404514,1.7248828,1.495342,0.70711815,1.8729851
2,mse,76.96198,32.70547,74.21146,78.51574,68.46081,36.262535,127.35937
3,null_deviance,7.0990295,6.1043477,6.7942076,17.67618,2.736203,4.6864324,3.6021242
4,r2,-0.38083673,0.41011283,-0.22719905,-0.18538147,-0.57180434,0.07019142,-0.9899901
5,residual_deviance,18.265053,22.753708,56.427086,22.423477,4.486026,4.2427087,3.7459702
6,rmse,8.611364,1.8729609,8.614607,8.86091,8.274105,6.021838,11.28536
7,rmsle,0.48889303,0.2748733,0.9671483,0.43709928,0.38683164,0.26433787,0.389048



Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,iterations,negative_log_likelihood,objective
0,,2019-11-24 17:07:56,0.000 sec,0,1234.116926,41.137231
1,,2019-11-24 17:07:56,0.080 sec,1,1218.862107,40.766711
2,,2019-11-24 17:07:57,0.707 sec,2,1218.715587,40.754499
3,,2019-11-24 17:07:58,1.343 sec,3,1218.797893,40.752882



--------------- panas_pos_raw_class_post ---------------
Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  Grid_GLM_Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex_model_python_1574509614939_8849_model_2


GLM Model: summary


Unnamed: 0,Unnamed: 1,family,link,regularization,number_of_predictors_total,number_of_active_predictors,number_of_iterations,training_frame
0,,binomial,logit,"Elastic Net (alpha = 0.1, lambda = 0.1593 )",47,31,6,Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex




ModelMetricsBinomialGLM: glm
** Reported on train data. **

MSE: 0.168946270095053
RMSE: 0.4110307410584432
LogLoss: 0.5138794169986807
Null degrees of freedom: 28
Residual degrees of freedom: -3
Null deviance: 40.168046876921956
Residual deviance: 29.805006185923475
AIC: 93.80500618592347
AUC: 0.8738095238095237
pr_auc: 0.8364554584749092
Gini: 0.7476190476190474

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.4511172168418607: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,8.0,6.0,0.4286,(6.0/14.0)
1,1,1.0,14.0,0.0667,(1.0/15.0)
2,Total,9.0,20.0,0.2414,(7.0/29.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.451117,0.8,14.0
1,max f2,0.379437,0.892857,18.0
2,max f0point5,0.59001,0.882353,8.0
3,max accuracy,0.59001,0.793103,8.0
4,max precision,0.954702,1.0,0.0
5,max recall,0.379437,1.0,18.0
6,max specificity,0.954702,1.0,0.0
7,max absolute_mcc,0.59001,0.648074,8.0
8,max min_per_class_accuracy,0.584258,0.666667,10.0
9,max mean_per_class_accuracy,0.59001,0.8,8.0



Gains/Lift Table: Avg response rate: 51.72 %, avg score: 51.72 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.034483,0.940583,1.933333,1.933333,1.0,0.954702,1.0,0.954702,0.066667,0.066667,93.333333,93.333333
1,,2,0.034483,0.926465,0.0,1.933333,0.0,0.0,1.0,0.954702,0.0,0.066667,-100.0,93.333333
2,,3,0.034483,0.912347,0.0,1.933333,0.0,0.0,1.0,0.954702,0.0,0.066667,-100.0,93.333333
3,,4,0.068966,0.887857,1.933333,1.933333,1.0,0.906018,1.0,0.93036,0.066667,0.133333,93.333333,93.333333
4,,5,0.068966,0.854939,0.0,1.933333,0.0,0.0,1.0,0.93036,0.0,0.133333,-100.0,93.333333
5,,6,0.103448,0.739024,1.933333,1.933333,1.0,0.792509,1.0,0.88441,0.066667,0.2,93.333333,93.333333
6,,7,0.137931,0.727715,1.933333,1.933333,1.0,0.732311,1.0,0.846385,0.066667,0.266667,93.333333,93.333333
7,,8,0.172414,0.689952,1.933333,1.933333,1.0,0.719181,1.0,0.820944,0.066667,0.333333,93.333333,93.333333
8,,9,0.275862,0.590205,1.933333,1.933333,1.0,0.621701,1.0,0.746228,0.2,0.533333,93.333333,93.333333
9,,10,0.37931,0.548882,1.288889,1.757576,0.666667,0.586798,0.909091,0.702747,0.133333,0.666667,28.888889,75.757576




ModelMetricsBinomialGLM: glm
** Reported on cross-validation data. **

MSE: 0.36667157876990947
RMSE: 0.6055341268416747
LogLoss: 1.9435593723715916
Null degrees of freedom: 28
Residual degrees of freedom: 3
Null deviance: 40.81952642064006
Residual deviance: 112.72644118527121
AIC: 164.7264411852712
AUC: 0.33333333333333337
pr_auc: 0.45094831309677563
Gini: -0.33333333333333326

Confusion Matrix (Act/Pred) for max f1 @ threshold = 1.0613189804896013e-11: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,0.0,14.0,1.0,(14.0/14.0)
1,1,0.0,15.0,0.0,(0.0/15.0)
2,Total,0.0,29.0,0.4828,(14.0/29.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,1.061319e-11,0.681818,26.0
1,max f2,1.061319e-11,0.842697,26.0
2,max f0point5,1.061319e-11,0.572519,26.0
3,max accuracy,0.914866,0.551724,1.0
4,max precision,0.9982207,1.0,0.0
5,max recall,1.061319e-11,1.0,26.0
6,max specificity,0.9982207,1.0,0.0
7,max absolute_mcc,0.3423623,0.498914,17.0
8,max min_per_class_accuracy,0.5638985,0.333333,8.0
9,max mean_per_class_accuracy,0.914866,0.566667,1.0



Gains/Lift Table: Avg response rate: 51.72 %, avg score: 45.40 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.034483,0.9740478,1.933333,1.933333,1.0,0.998221,1.0,0.998221,0.066667,0.066667,93.333333,93.333333
1,,2,0.034483,0.9498749,0.0,1.933333,0.0,0.0,1.0,0.998221,0.0,0.066667,-100.0,93.333333
2,,3,0.034483,0.9257021,0.0,1.933333,0.0,0.0,1.0,0.998221,0.0,0.066667,-100.0,93.333333
3,,4,0.068966,0.8884309,1.933333,1.933333,1.0,0.914866,1.0,0.956543,0.066667,0.133333,93.333333,93.333333
4,,5,0.068966,0.8405172,0.0,1.933333,0.0,0.0,1.0,0.956543,0.0,0.133333,-100.0,93.333333
5,,6,0.103448,0.7030821,0.0,1.288889,0.0,0.749647,0.666667,0.887578,0.0,0.133333,-100.0,28.888889
6,,7,0.172414,0.6374155,0.966667,1.16,0.5,0.668018,0.6,0.799754,0.066667,0.2,-3.333333,16.0
7,,8,0.206897,0.630048,0.0,0.966667,0.0,0.636093,0.5,0.772477,0.0,0.2,-100.0,-3.333333
8,,9,0.275862,0.5688848,0.966667,0.966667,0.5,0.585045,0.5,0.725619,0.066667,0.266667,-3.333333,-3.333333
9,,10,0.37931,0.5546607,0.644444,0.878788,0.333333,0.563898,0.454545,0.681513,0.066667,0.333333,-35.555556,-12.121212




Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,accuracy,0.6276923,0.24264595,0.6,0.53846157,0.6666667,0.33333334,1.0
1,auc,0.4011905,0.3892238,0.0,0.3809524,0.5,0.125,1.0
2,err,0.3723077,0.24264595,0.4,0.46153846,0.33333334,0.6666667,0.0
3,err_count,2.6,2.408319,2.0,6.0,1.0,4.0,0.0
4,f0point5,0.66885906,0.22277342,0.65217394,0.59322035,0.71428573,0.3846154,1.0
5,f1,0.75,0.18027757,0.75,0.7,0.8,0.5,1.0
6,f2,0.8718776,0.103775054,0.88235295,0.85365856,0.90909094,0.71428573,1.0
7,lift_top_group,1.0714285,0.99488485,0.0,1.8571428,1.5,0.0,2.0
8,logloss,2.0445223,2.8246284,7.0891676,0.9251515,0.72537774,0.93711305,0.5458012
9,max_per_class_error,0.8,0.4472136,1.0,1.0,1.0,1.0,0.0



See the whole table with table.as_data_frame()

Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,iterations,negative_log_likelihood,objective
0,,2019-11-24 17:36:33,0.000 sec,0,20.084023,0.669467
1,,2019-11-24 17:36:33,0.001 sec,1,15.37522,0.576001
2,,2019-11-24 17:36:33,0.001 sec,2,15.069824,0.573209
3,,2019-11-24 17:36:33,0.001 sec,3,15.029942,0.573124
4,,2019-11-24 17:36:33,0.001 sec,4,15.029069,0.573124
5,,2019-11-24 17:36:33,0.004 sec,5,14.90386,0.571986
6,,2019-11-24 17:36:33,0.004 sec,6,14.902503,0.571986



--------------- panas_pos_raw_post ---------------
Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  Grid_GLM_Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex_model_python_1574509614939_9226_model_4


GLM Model: summary


Unnamed: 0,Unnamed: 1,family,link,regularization,number_of_predictors_total,number_of_active_predictors,number_of_iterations,training_frame
0,,tweedie,tweedie,"Elastic Net (alpha = 0.3, lambda = 12.49 )",478,0,0,Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex




ModelMetricsRegressionGLM: glm
** Reported on train data. **

MSE: 44.83472057074911
RMSE: 6.695873398650031
MAE: 5.678953626634958
RMSLE: 0.23558797247425584
R^2: 6.661338147750939e-16
Mean Residual Deviance: 1.1264926252097145
Null degrees of freedom: 28
Residual degrees of freedom: 28
Null deviance: 32.668286131080805
Residual deviance: 32.66828613108172
AIC: NaN

ModelMetricsRegressionGLM: glm
** Reported on cross-validation data. **

MSE: 87.914825525994
RMSE: 9.376290605884291
MAE: 6.956514456521045
RMSLE: 0.6631540627292802
R^2: -0.9608648031443512
Mean Residual Deviance: 9.690815265865975
Null degrees of freedom: 28
Residual degrees of freedom: 22
Null deviance: 36.296670274908905
Residual deviance: 281.03364271011327
AIC: NaN

Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,mae,6.632054,4.1754203,13.872347,6.381348,4.6732616,4.733312,3.5
1,mean_residual_deviance,10.861708,22.46932,51.05035,1.3656051,0.6842805,0.88989455,0.31840515
2,mse,86.66095,119.9731,299.54025,56.30147,30.158672,33.70622,13.598127
3,null_deviance,7.259334,7.776992,8.094769,20.339035,2.864726,4.1599593,0.83818215
4,r2,-1.221153,1.7452997,-4.2367177,-0.016554372,-1.1541909,-0.58825105,-0.1100512
5,residual_deviance,56.20673,111.47395,255.25175,17.752867,2.0528417,5.3393674,0.6368103
6,rmse,7.9591246,5.3982964,17.307232,7.503431,5.491691,5.8057055,3.687564
7,rmsle,0.4537608,0.59775364,1.5190521,0.25172,0.1645836,0.21755701,0.115891255



Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,iterations,negative_log_likelihood,objective
0,,2019-11-24 17:38:00,0.000 sec,0,6737.120629,224.570688



--------------- panas_pos_imp_class_post ---------------
Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  Grid_GLM_Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex_model_python_1574509614939_9440_model_2


GLM Model: summary


Unnamed: 0,Unnamed: 1,family,link,regularization,number_of_predictors_total,number_of_active_predictors,number_of_iterations,training_frame
0,,binomial,logit,"Elastic Net (alpha = 0.1, lambda = 0.1638 )",47,31,6,Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex




ModelMetricsBinomialGLM: glm
** Reported on train data. **

MSE: 0.16567419647551834
RMSE: 0.40703095272413664
LogLoss: 0.5039381936327878
Null degrees of freedom: 29
Residual degrees of freedom: -2
Null deviance: 41.45539855882907
Residual deviance: 30.236291617967268
AIC: 94.23629161796727
AUC: 0.8683035714285714
pr_auc: 0.8370690648897885
Gini: 0.7366071428571428

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.46762228171712034: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,9.0,5.0,0.3571,(5.0/14.0)
1,1,2.0,14.0,0.125,(2.0/16.0)
2,Total,11.0,19.0,0.2333,(7.0/30.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.467622,0.8,13.0
1,max f2,0.371499,0.898876,19.0
2,max f0point5,0.590555,0.859375,11.0
3,max accuracy,0.590555,0.8,11.0
4,max precision,0.959005,1.0,0.0
5,max recall,0.371499,1.0,19.0
6,max specificity,0.959005,1.0,0.0
7,max absolute_mcc,0.590555,0.627376,11.0
8,max min_per_class_accuracy,0.590555,0.6875,11.0
9,max mean_per_class_accuracy,0.590555,0.808036,11.0



Gains/Lift Table: Avg response rate: 53.33 %, avg score: 53.33 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.033333,0.94323,1.875,1.875,1.0,0.959005,1.0,0.959005,0.0625,0.0625,87.5,87.5
1,,2,0.033333,0.927455,0.0,1.875,0.0,0.0,1.0,0.959005,0.0,0.0625,-100.0,87.5
2,,3,0.033333,0.911681,0.0,1.875,0.0,0.0,1.0,0.959005,0.0,0.0625,-100.0,87.5
3,,4,0.066667,0.900888,1.875,1.875,1.0,0.904609,1.0,0.931807,0.0625,0.125,87.5,87.5
4,,5,0.066667,0.894143,0.0,1.875,0.0,0.0,1.0,0.931807,0.0,0.125,-100.0,87.5
5,,6,0.1,0.815985,1.875,1.875,1.0,0.88135,1.0,0.914988,0.0625,0.1875,87.5,87.5
6,,7,0.166667,0.742366,1.875,1.875,1.0,0.775726,1.0,0.859283,0.125,0.3125,87.5,87.5
7,,8,0.2,0.699647,1.875,1.875,1.0,0.741688,1.0,0.839684,0.0625,0.375,87.5,87.5
8,,9,0.3,0.601112,1.25,1.666667,0.666667,0.634952,0.888889,0.77144,0.125,0.5,25.0,66.666667
9,,10,0.4,0.573802,1.875,1.71875,1.0,0.593905,0.916667,0.727056,0.1875,0.6875,87.5,71.875




ModelMetricsBinomialGLM: glm
** Reported on cross-validation data. **

MSE: 0.35810850478688466
RMSE: 0.5984216780723144
LogLoss: 1.8842780193618547
Null degrees of freedom: 29
Residual degrees of freedom: 5
Null deviance: 42.298318517293
Residual deviance: 113.05668301962245
AIC: 163.05668301962245
AUC: 0.3482142857142857
pr_auc: 0.4691774721780892
Gini: -0.3035714285714286

Confusion Matrix (Act/Pred) for max f1 @ threshold = 1.044885428973339e-11: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,0.0,14.0,1.0,(14.0/14.0)
1,1,0.0,16.0,0.0,(0.0/16.0)
2,Total,0.0,30.0,0.4667,(14.0/30.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,1.044885e-11,0.695652,27.0
1,max f2,1.044885e-11,0.851064,27.0
2,max f0point5,1.044885e-11,0.588235,27.0
3,max accuracy,0.9240945,0.533333,1.0
4,max precision,0.9987234,1.0,0.0
5,max recall,1.044885e-11,1.0,27.0
6,max specificity,0.9987234,1.0,0.0
7,max absolute_mcc,0.3299133,0.466569,18.0
8,max min_per_class_accuracy,0.5516002,0.375,11.0
9,max mean_per_class_accuracy,0.9240945,0.5625,1.0



Gains/Lift Table: Avg response rate: 53.33 %, avg score: 46.73 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.033333,0.977081,1.875,1.875,1.0,0.998723,1.0,0.998723,0.0625,0.0625,87.5,87.5
1,,2,0.033333,0.9554386,0.0,1.875,0.0,0.0,1.0,0.998723,0.0,0.0625,-100.0,87.5
2,,3,0.033333,0.9337962,0.0,1.875,0.0,0.0,1.0,0.998723,0.0,0.0625,-100.0,87.5
3,,4,0.066667,0.8968137,1.875,1.875,1.0,0.924094,1.0,0.961409,0.0625,0.125,87.5,87.5
4,,5,0.066667,0.8473672,0.0,1.875,0.0,0.0,1.0,0.961409,0.0,0.125,-100.0,87.5
5,,6,0.1,0.6901606,0.0,1.25,0.0,0.753589,0.666667,0.892136,0.0,0.125,-100.0,25.0
6,,7,0.166667,0.6491523,0.9375,1.125,0.5,0.669116,0.6,0.802928,0.0625,0.1875,-6.25,12.5
7,,8,0.2,0.6340141,0.0,0.9375,0.0,0.638073,0.5,0.775452,0.0,0.1875,-100.0,-6.25
8,,9,0.3,0.6009894,1.25,1.041667,0.666667,0.627964,0.555556,0.726289,0.125,0.3125,25.0,4.166667
9,,10,0.466667,0.5516002,0.375,0.803571,0.2,0.567893,0.428571,0.669719,0.0625,0.375,-62.5,-19.642857




Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,accuracy,0.64102566,0.24257548,0.6666667,0.53846157,0.6666667,0.33333334,1.0
1,auc,0.44642857,0.33881545,0.25,0.35714287,0.5,0.125,1.0
2,err,0.35897437,0.24257548,0.33333334,0.46153846,0.33333334,0.6666667,0.0
3,err_count,2.6,2.408319,2.0,6.0,1.0,4.0,0.0
4,f0point5,0.68128145,0.22334144,0.71428573,0.59322035,0.71428573,0.3846154,1.0
5,f1,0.76,0.18165903,0.8,0.7,0.8,0.5,1.0
6,f2,0.8772252,0.105129875,0.90909094,0.85365856,0.90909094,0.71428573,1.0
7,lift_top_group,1.3714286,0.7976368,1.5,1.8571428,1.5,0.0,2.0
8,logloss,1.8110459,2.3215232,5.953022,0.92400235,0.7319102,0.92746454,0.5188305
9,max_per_class_error,0.8,0.4472136,1.0,1.0,1.0,1.0,0.0



See the whole table with table.as_data_frame()

Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,iterations,negative_log_likelihood,objective
0,,2019-11-24 18:11:37,0.000 sec,0,20.727699,0.690923
1,,2019-11-24 18:11:37,0.001 sec,1,15.679988,0.585328
2,,2019-11-24 18:11:37,0.001 sec,2,15.33375,0.581908
3,,2019-11-24 18:11:37,0.001 sec,3,15.284895,0.581811
4,,2019-11-24 18:11:37,0.001 sec,4,15.283238,0.581811
5,,2019-11-24 18:11:37,0.002 sec,5,15.120515,0.580351
6,,2019-11-24 18:11:37,0.002 sec,6,15.118146,0.58035



--------------- panas_pos_imp_post ---------------
Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  Grid_GLM_Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex_model_python_1574509614939_9833_model_35


GLM Model: summary


Unnamed: 0,Unnamed: 1,family,link,regularization,number_of_predictors_total,number_of_active_predictors,number_of_iterations,training_frame
0,,negativebinomial,log,"Elastic Net (alpha = 0.1, lambda = 2.1392 )",47,24,6,Key_Frame__upload_8a2e2651a46e7835ce81ece2ed24da8.hex




ModelMetricsRegressionGLM: glm
** Reported on train data. **

MSE: 19.0274678553048
RMSE: 4.3620485847024675
MAE: 3.628664331513762
RMSLE: 0.15034469332434386
R^2: 0.5843110721969529
Mean Residual Deviance: 0.5161531176559319
Null degrees of freedom: 29
Residual degrees of freedom: 5
Null deviance: 37.24941823174868
Residual deviance: 15.484593529677957
AIC: 229.3286446699923

ModelMetricsRegressionGLM: glm
** Reported on cross-validation data. **

MSE: 95.23984635771176
RMSE: 9.759090447255408
MAE: 7.394507607723188
RMSLE: 0.5595555809679155
R^2: -1.0806840887935873
Mean Residual Deviance: 6.28221304851128
Null degrees of freedom: 29
Residual degrees of freedom: 12
Null deviance: 38.33052952739586
Residual deviance: 188.4663914553384
AIC: 388.31044259565266

Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,mae,7.459499,3.3760016,10.1946535,6.268614,11.371713,6.5337906,2.9287238
1,mean_residual_deviance,6.20264,10.484555,24.89986,1.6465404,2.6335325,1.5237678,0.3094989
2,mse,100.36338,80.41161,195.31998,60.38851,175.81834,58.04743,12.242615
3,null_deviance,7.6661057,7.4500613,10.535039,19.088634,2.056067,5.985532,0.66525793
4,r2,-3.1112971,4.8202276,-2.1730683,-0.09034814,-11.558454,-1.735219,0.0006028674
5,residual_deviance,37.69328,62.8903,149.39917,21.405025,7.9005976,9.142607,0.6189978
6,rmse,9.224835,4.3683205,13.975693,7.7710047,13.259651,7.618886,3.4989448
7,rmsle,0.41666797,0.4097932,1.1382723,0.26291674,0.30384338,0.26370144,0.114606



Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,iterations,negative_log_likelihood,objective
0,,2019-11-24 18:33:13,0.000 sec,0,100.546735,3.351558
1,,2019-11-24 18:33:13,0.002 sec,1,90.155599,3.124782
2,,2019-11-24 18:33:13,0.004 sec,2,90.08545,3.124151
3,,2019-11-24 18:33:13,0.004 sec,3,90.085115,3.124151
4,,2019-11-24 18:33:13,0.006 sec,4,89.67228,3.118774
5,,2019-11-24 18:33:13,0.008 sec,5,89.671895,3.118774
6,,2019-11-24 18:33:13,0.009 sec,6,89.664322,3.118771





In [101]:
# make predictions for all models
all_predictions = dict()
for model in best_models:
    all_predictions[model] = best_models[model]['best_model'].predict(test_h2o)

glm prediction progress: |████████████████████████████████████████████████| 100%
glm prediction progress: |████████████████████████████████████████████████| 100%
glm prediction progress: |████████████████████████████████████████████████| 100%
glm prediction progress: |████████████████████████████████████████████████| 100%
glm prediction progress: |████████████████████████████████████████████████| 100%
glm prediction progress: |████████████████████████████████████████████████| 100%
glm prediction progress: |████████████████████████████████████████████████| 100%
glm prediction progress: |████████████████████████████████████████████████| 100%


In [102]:
# large number of models are just the constant model
# that is using linear regression we can't beat a straight line 
# without further feature engineering
# this makes sense (since non-linear behaviour is expected)
for model in best_models:
    print(f'--------------- {model} ---------------')
    print(all_predictions[model])

--------------- flourishing_scale_raw_class_post ---------------


predict,p0,p1
0,0.674595,0.325405
1,0.46531,0.53469
0,0.802689,0.197311
0,0.699222,0.300778
1,0.334784,0.665216
0,0.674595,0.325405
0,0.665073,0.334927
1,0.285583,0.714417
0,0.670316,0.329684



--------------- flourishing_scale_raw_post ---------------


predict
43.1739
43.1739
43.1739
43.1739
43.1739
43.1739
43.1739
43.1739
43.1739



--------------- panas_neg_raw_class_post ---------------


predict,p0,p1
1,0.19295,0.80705
0,0.418134,0.581866
1,0.133344,0.866656
0,0.488755,0.511245
1,0.122205,0.877795
1,0.19295,0.80705
1,0.0330482,0.966952
0,0.641737,0.358263
1,0.035881,0.964119



--------------- panas_neg_raw_post ---------------


predict
18.5745
16.3427
23.0745
17.6918
20.6853
16.6676
21.5524
20.1503
16.8255



--------------- panas_pos_raw_class_post ---------------


predict,p0,p1
1,0.525982,0.474018
1,0.475403,0.524597
0,0.757357,0.242643
0,0.626017,0.373983
1,0.302664,0.697336
1,0.525982,0.474018
0,0.579823,0.420177
1,0.359626,0.640374
0,0.641207,0.358793



--------------- panas_pos_raw_post ---------------


predict
29.3103
29.3103
29.3103
29.3103
29.3103
29.3103
29.3103
29.3103
29.3103



--------------- panas_pos_imp_class_post ---------------


predict,p0,p1
1,0.532378,0.467622
1,0.44491,0.55509
0,0.761994,0.238006
0,0.608341,0.391659
1,0.323016,0.676984
1,0.532378,0.467622
0,0.598196,0.401804
1,0.35816,0.64184
0,0.650723,0.349277



--------------- panas_pos_imp_post ---------------


predict
28.1871
30.9118
21.8521
25.9013
32.6991
28.1871
27.9981
29.3958
29.075





In [103]:
for target in best_models:
    print(f'--------------- {target} ---------------')
    print(best_models[target]['best_model'].model_performance(test_data=test_h2o))

--------------- flourishing_scale_raw_class_post ---------------

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.2217936809674068
RMSE: 0.4709497648023691
LogLoss: 0.6459930272047623
Null degrees of freedom: 8
Residual degrees of freedom: -16
Null deviance: 12.476649250079015
Residual deviance: 11.62787448968572
AIC: 61.62787448968572
AUC: 0.575
pr_auc: 0.4560019841269841
Gini: 0.1499999999999999

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.6652164872428995: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,5.0,0.0,0.0,(0.0/5.0)
1,1,2.0,2.0,0.5,(2.0/4.0)
2,Total,7.0,2.0,0.2222,(2.0/9.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.665216,0.666667,1.0
1,max f2,0.197311,0.8,7.0
2,max f0point5,0.665216,0.833333,1.0
3,max accuracy,0.665216,0.777778,1.0
4,max precision,0.714417,1.0,0.0
5,max recall,0.197311,1.0,7.0
6,max specificity,0.714417,1.0,0.0
7,max absolute_mcc,0.665216,0.597614,1.0
8,max min_per_class_accuracy,0.665216,0.5,1.0
9,max mean_per_class_accuracy,0.665216,0.75,1.0



Gains/Lift Table: Avg response rate: 44.44 %, avg score: 41.42 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.111111,0.710481,2.25,2.25,1.0,0.714417,1.0,0.714417,0.25,0.25,125.0,125.0
1,,2,0.111111,0.706545,0.0,2.25,0.0,0.0,1.0,0.714417,0.0,0.25,-100.0,125.0
2,,3,0.111111,0.702609,0.0,2.25,0.0,0.0,1.0,0.714417,0.0,0.25,-100.0,125.0
3,,4,0.111111,0.698673,0.0,2.25,0.0,0.0,1.0,0.714417,0.0,0.25,-100.0,125.0
4,,5,0.111111,0.694737,0.0,2.25,0.0,0.0,1.0,0.714417,0.0,0.25,-100.0,125.0
5,,6,0.111111,0.675057,0.0,2.25,0.0,0.0,1.0,0.714417,0.0,0.25,-100.0,125.0
6,,7,0.222222,0.639111,2.25,2.25,1.0,0.665216,1.0,0.689817,0.25,0.5,125.0,125.0
7,,8,0.222222,0.586901,0.0,2.25,0.0,0.0,1.0,0.689817,0.0,0.5,-100.0,125.0
8,,9,0.333333,0.454785,0.0,1.5,0.0,0.53469,0.666667,0.638108,0.0,0.5,-100.0,50.0
9,,10,0.444444,0.333878,0.0,1.125,0.0,0.334927,0.5,0.562313,0.0,0.5,-100.0,12.5




--------------- flourishing_scale_raw_post ---------------

ModelMetricsRegressionGLM: glm
** Reported on test data. **

MSE: 51.01091711130338
RMSE: 7.142192738319471
MAE: 6.130434585083499
RMSLE: 0.17871916681387506
R^2: -0.03972931203210339
Mean Residual Deviance: 0.8691133274931793
Null degrees of freedom: 8
Residual degrees of freedom: 8
Null deviance: 7.823884417539089
Residual deviance: 7.822019947438613
AIC: NaN

--------------- panas_neg_raw_class_post ---------------

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.3060696030373567
RMSE: 0.553235576438606
LogLoss: 0.8558578325976377
Null degrees of freedom: 8
Residual degrees of freedom: -7
Null deviance: 12.369824569664342
Residual deviance: 15.405440986757478
AIC: 47.405440986757476
AUC: 0.575
pr_auc: 0.5267857142857142
Gini: 0.1499999999999999

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.5112448331446564: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,1.0,3.0,0.75,(3.0/4.0)
1,1,0.0,5.0,0.0,(0.0/5.0)
2,Total,1.0,8.0,0.3333,(3.0/9.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.511245,0.769231,6.0
1,max f2,0.511245,0.892857,6.0
2,max f0point5,0.964119,0.769231,1.0
3,max accuracy,0.964119,0.666667,1.0
4,max precision,0.966952,1.0,0.0
5,max recall,0.511245,1.0,6.0
6,max specificity,0.966952,1.0,0.0
7,max absolute_mcc,0.964119,0.478091,1.0
8,max min_per_class_accuracy,0.964119,0.4,1.0
9,max mean_per_class_accuracy,0.964119,0.7,1.0



Gains/Lift Table: Avg response rate: 55.56 %, avg score: 74.90 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.111111,0.966725,1.8,1.8,1.0,0.966952,1.0,0.966952,0.2,0.2,80.0,80.0
1,,2,0.111111,0.966499,0.0,1.8,0.0,0.0,1.0,0.966952,0.0,0.2,-100.0,80.0
2,,3,0.111111,0.966272,0.0,1.8,0.0,0.0,1.0,0.966952,0.0,0.2,-100.0,80.0
3,,4,0.111111,0.966045,0.0,1.8,0.0,0.0,1.0,0.966952,0.0,0.2,-100.0,80.0
4,,5,0.111111,0.965819,0.0,1.8,0.0,0.0,1.0,0.966952,0.0,0.2,-100.0,80.0
5,,6,0.111111,0.964686,0.0,1.8,0.0,0.0,1.0,0.966952,0.0,0.2,-100.0,80.0
6,,7,0.222222,0.946854,1.8,1.8,1.0,0.964119,1.0,0.965535,0.2,0.4,80.0,80.0
7,,8,0.222222,0.912325,0.0,1.8,0.0,0.0,1.0,0.965535,0.0,0.4,-100.0,80.0
8,,9,0.333333,0.87334,0.0,1.2,0.0,0.877795,0.666667,0.936289,0.0,0.4,-100.0,20.0
9,,10,0.444444,0.854735,0.0,0.9,0.0,0.866656,0.5,0.918881,0.0,0.4,-100.0,-10.0




--------------- panas_neg_raw_post ---------------

ModelMetricsRegressionGLM: glm
** Reported on test data. **

MSE: 51.38315829368675
RMSE: 7.16820467716197
MAE: 5.36174149749776
RMSLE: 0.2826889069067607
R^2: -0.13903552867778446
Mean Residual Deviance: 0.8531411710651938
Null degrees of freedom: 8
Residual degrees of freedom: -7
Null deviance: 6.388652186803928
Residual deviance: 7.6782705395867445
AIC: NaN

--------------- panas_pos_raw_class_post ---------------

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.27744878427625175
RMSE: 0.5267340735857627
LogLoss: 0.7556542644676982
Null degrees of freedom: 8
Residual degrees of freedom: -23
Null deviance: 12.694335777760172
Residual deviance: 13.601776760418568
AIC: 77.60177676041857
AUC: 0.3888888888888889
pr_auc: 0.17261904761904762
Gini: -0.2222222222222222

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.6973360492180994: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,6.0,0.0,0.0,(0.0/6.0)
1,1,2.0,1.0,0.6667,(2.0/3.0)
2,Total,8.0,1.0,0.2222,(2.0/9.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.697336,0.5,0.0
1,max f2,0.242643,0.714286,7.0
2,max f0point5,0.697336,0.714286,0.0
3,max accuracy,0.697336,0.777778,0.0
4,max precision,0.697336,1.0,0.0
5,max recall,0.242643,1.0,7.0
6,max specificity,0.697336,1.0,0.0
7,max absolute_mcc,0.697336,0.5,0.0
8,max min_per_class_accuracy,0.697336,0.333333,0.0
9,max mean_per_class_accuracy,0.697336,0.666667,0.0



Gains/Lift Table: Avg response rate: 33.33 %, avg score: 46.73 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.111111,0.692779,3.0,3.0,1.0,0.697336,1.0,0.697336,0.333333,0.333333,200.0,200.0
1,,2,0.111111,0.688222,0.0,3.0,0.0,0.0,1.0,0.697336,0.0,0.333333,-100.0,200.0
2,,3,0.111111,0.683665,0.0,3.0,0.0,0.0,1.0,0.697336,0.0,0.333333,-100.0,200.0
3,,4,0.111111,0.679108,0.0,3.0,0.0,0.0,1.0,0.697336,0.0,0.333333,-100.0,200.0
4,,5,0.111111,0.674551,0.0,3.0,0.0,0.0,1.0,0.697336,0.0,0.333333,-100.0,200.0
5,,6,0.111111,0.651766,0.0,3.0,0.0,0.0,1.0,0.697336,0.0,0.333333,-100.0,200.0
6,,7,0.222222,0.617218,0.0,1.5,0.0,0.640374,0.5,0.668855,0.0,0.333333,-100.0,50.0
7,,8,0.222222,0.570908,0.0,1.5,0.0,0.0,0.5,0.668855,0.0,0.333333,-100.0,50.0
8,,9,0.333333,0.504365,0.0,1.0,0.0,0.524597,0.333333,0.620769,0.0,0.333333,-100.0,0.0
9,,10,0.555556,0.474018,0.0,0.6,0.0,0.474018,0.2,0.562068,0.0,0.333333,-100.0,-40.0




--------------- panas_pos_raw_post ---------------

ModelMetricsRegressionGLM: glm
** Reported on test data. **

MSE: 37.88941736028551
RMSE: 6.155438031552711
MAE: 4.616858237547905
RMSLE: 0.252307092948434
R^2: -0.3701083956174642
Mean Residual Deviance: 1.0772310095981033
Null degrees of freedom: 8
Residual degrees of freedom: 8
Null deviance: 9.695079086382645
Residual deviance: 9.695079086382929
AIC: NaN

--------------- panas_pos_imp_class_post ---------------

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.2773241683555768
RMSE: 0.5266157691862036
LogLoss: 0.7559887953453186
Null degrees of freedom: 8
Residual degrees of freedom: -23
Null deviance: 12.917332581097009
Residual deviance: 13.607798316215735
AIC: 77.60779831621574
AUC: 0.3888888888888889
pr_auc: 0.17261904761904762
Gini: -0.2222222222222222

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.6769839262890389: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,6.0,0.0,0.0,(0.0/6.0)
1,1,2.0,1.0,0.6667,(2.0/3.0)
2,Total,8.0,1.0,0.2222,(2.0/9.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.676984,0.5,0.0
1,max f2,0.238006,0.714286,7.0
2,max f0point5,0.676984,0.714286,0.0
3,max accuracy,0.676984,0.777778,0.0
4,max precision,0.676984,1.0,0.0
5,max recall,0.238006,1.0,7.0
6,max specificity,0.676984,1.0,0.0
7,max absolute_mcc,0.676984,0.5,0.0
8,max min_per_class_accuracy,0.676984,0.333333,0.0
9,max mean_per_class_accuracy,0.676984,0.666667,0.0



Gains/Lift Table: Avg response rate: 33.33 %, avg score: 46.55 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.111111,0.674172,3.0,3.0,1.0,0.676984,1.0,0.676984,0.333333,0.333333,200.0,200.0
1,,2,0.111111,0.671361,0.0,3.0,0.0,0.0,1.0,0.676984,0.0,0.333333,-100.0,200.0
2,,3,0.111111,0.668549,0.0,3.0,0.0,0.0,1.0,0.676984,0.0,0.333333,-100.0,200.0
3,,4,0.111111,0.665738,0.0,3.0,0.0,0.0,1.0,0.676984,0.0,0.333333,-100.0,200.0
4,,5,0.111111,0.662926,0.0,3.0,0.0,0.0,1.0,0.676984,0.0,0.333333,-100.0,200.0
5,,6,0.111111,0.648869,0.0,3.0,0.0,0.0,1.0,0.676984,0.0,0.333333,-100.0,200.0
6,,7,0.222222,0.62449,0.0,1.5,0.0,0.64184,0.5,0.659412,0.0,0.333333,-100.0,50.0
7,,8,0.222222,0.58979,0.0,1.5,0.0,0.0,0.5,0.659412,0.0,0.333333,-100.0,50.0
8,,9,0.333333,0.520103,0.0,1.0,0.0,0.55509,0.333333,0.624638,0.0,0.333333,-100.0,0.0
9,,10,0.555556,0.467622,0.0,0.6,0.0,0.467622,0.2,0.561832,0.0,0.333333,-100.0,-40.0




--------------- panas_pos_imp_post ---------------

ModelMetricsRegressionGLM: glm
** Reported on test data. **

MSE: 54.06843750497589
RMSE: 7.3531243362924235
MAE: 5.966738140079889
RMSLE: 0.28870168806884644
R^2: -0.9551533204924274
Mean Residual Deviance: 1.6843525750972654
Null degrees of freedom: 8
Residual degrees of freedom: -16
Null deviance: 11.135365425332303
Residual deviance: 15.159173175875388
AIC: 112.99508313154182



In [104]:
all_coefs = list()
for target in best_models:
    all_coefs.append(
        pd.DataFrame.from_dict(best_models[target]['best_model'].coef(),
                               orient='index', 
                               columns = [target])
    )
    
all_coefs = pd.concat(all_coefs, axis = 1)
all_coefs.to_csv("./fitted_models/h2o_glm/glm_coefs.csv")
all_coefs

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  if __name__ == '__main__':


Unnamed: 0,flourishing_scale_raw_class_post,flourishing_scale_raw_post,panas_neg_raw_class_post,panas_neg_raw_post,panas_pos_raw_class_post,panas_pos_raw_post,panas_pos_imp_class_post,panas_pos_imp_post
Intercept,-1.602945,0.686243,-2.037220,0.399438,-1.606435,0.713342,-1.586781,3.035710
activity_running_ratio_wk_1,,0.000000,,0.000000,,0.000000,,
activity_running_ratio_wk_10,19.966503,0.000000,0.000000,0.000000,9.456084,0.000000,7.327408,0.136610
activity_running_ratio_wk_2,,0.000000,,0.000000,,0.000000,,
activity_running_ratio_wk_3,,0.000000,,0.000000,,0.000000,,
activity_running_ratio_wk_4,,0.000000,,0.000000,,0.000000,,
activity_running_ratio_wk_5,,0.000000,,0.000000,,0.000000,,
activity_running_ratio_wk_6,,0.000000,,0.000000,,0.000000,,
activity_running_ratio_wk_7,,0.000000,,0.000000,,0.000000,,
activity_running_ratio_wk_8,,0.000000,,0.000000,,0.000000,,


In [None]:
# TO DO create summary metrics and stuff
# only need to compare hyper-parameters for raw vs imputated at a high level

In [None]:
# get top 10 parameters per model
# auc curve
# pvo
# confusion matrix