In [23]:
import h2o
from h2o.automl import H2OAutoML, get_leaderboard

import mlflow
import mlflow.h2o
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

import pandas as pd
import json

from sklearn.metrics import f1_score, accuracy_score

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import warnings
warnings.filterwarnings("ignore")

In [24]:
# Start the H2O cluster (locally)
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O_cluster_uptime:,1 day 2 hours 19 mins
H2O_cluster_timezone:,Asia/Kolkata
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.2
H2O_cluster_version_age:,29 days
H2O_cluster_name:,tamilarasan
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,5.298 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


In [28]:
# Initialize MLFlow client
client = MlflowClient()

# Set up MlFlow experiment
experiment_name = 'automl-insurance-classification'

try:
    experiment_id = mlflow.create_experiment(experiment_name)
    experiment = client.get_experiment_by_name(experiment_name)
except:
    experiment = client.get_experiment_by_name(experiment_name)
    
mlflow.set_experiment(experiment_name)

# Print experiment details
print(f"Name: {experiment_name}")
print(f"Experiment_id: {experiment.experiment_id}")
print(f"Artifact Location: {experiment.artifact_location}")
print(f"Tags: {experiment.tags}")
print(f"Lifecycle_stage: {experiment.lifecycle_stage}")
print(f"Tracking uri: {mlflow.get_tracking_uri()}")

Name: automl-insurance-classification
Experiment_id: 1
Artifact Location: /home/tamilarasan/projects/lectures/mlflow/artifacts/1
Tags: {}
Lifecycle_stage: active
Tracking uri: http://localhost:5000


In [29]:
# Import data directly as H2O frame
main_frame = h2o.import_file(path='data/processed/train.csv')

# Save data types of columns in H2O frame (for matching with test set during prediction)
with open('data/processed/train_col_types.json', 'w') as fp:
    json.dump(main_frame.types, fp)


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [30]:
# Set predictor and target columns
target = 'Response'
predictors = [n for n in main_frame.col_names if n != target]

# Factorize target variable so that autoML tackles classification problem (instead of regression)
main_frame[target] = main_frame[target].asfactor()

# Visualize H2O frame structure
main_frame.head()

Product_Info_1,Product_Info_2,Product_Info_3,Product_Info_4,Product_Info_5,Product_Info_6,Product_Info_7,Ins_Age,Ht,Wt,BMI,Employment_Info_1,Employment_Info_2,Employment_Info_3,Employment_Info_4,Employment_Info_5,Employment_Info_6,InsuredInfo_1,InsuredInfo_2,InsuredInfo_3,InsuredInfo_4,InsuredInfo_5,InsuredInfo_6,InsuredInfo_7,Insurance_History_1,Insurance_History_2,Insurance_History_3,Insurance_History_4,Insurance_History_5,Insurance_History_7,Insurance_History_8,Insurance_History_9,Family_Hist_1,Family_Hist_2,Family_Hist_4,Medical_History_1,Medical_History_2,Medical_History_3,Medical_History_4,Medical_History_5,Medical_History_6,Medical_History_7,Medical_History_8,Medical_History_9,Medical_History_11,Medical_History_12,Medical_History_13,Medical_History_14,Medical_History_16,Medical_History_17,Medical_History_18,Medical_History_19,Medical_History_20,Medical_History_21,Medical_History_22,Medical_History_23,Medical_History_25,Medical_History_26,Medical_History_27,Medical_History_28,Medical_History_29,Medical_History_30,Medical_History_31,Medical_History_33,Medical_History_34,Medical_History_35,Medical_History_36,Medical_History_37,Medical_History_38,Medical_History_39,Medical_History_40,Medical_History_41,Medical_Keyword_1,Medical_Keyword_2,Medical_Keyword_3,Medical_Keyword_4,Medical_Keyword_5,Medical_Keyword_6,Medical_Keyword_7,Medical_Keyword_8,Medical_Keyword_9,Medical_Keyword_10,Medical_Keyword_11,Medical_Keyword_12,Medical_Keyword_13,Medical_Keyword_14,Medical_Keyword_15,Medical_Keyword_16,Medical_Keyword_17,Medical_Keyword_18,Medical_Keyword_19,Medical_Keyword_20,Medical_Keyword_21,Medical_Keyword_22,Medical_Keyword_23,Medical_Keyword_24,Medical_Keyword_25,Medical_Keyword_26,Medical_Keyword_27,Medical_Keyword_28,Medical_Keyword_29,Medical_Keyword_30,Medical_Keyword_31,Medical_Keyword_32,Medical_Keyword_33,Medical_Keyword_34,Medical_Keyword_35,Medical_Keyword_36,Medical_Keyword_37,Medical_Keyword_38,Medical_Keyword_39,Medical_Keyword_40,Medical_Keyword_41,Medical_Keyword_42,Medical_Keyword_43,Medical_Keyword_44,Medical_Keyword_45,Medical_Keyword_46,Medical_Keyword_47,Medical_Keyword_48,Response,Product_Info_2_Alpha,Product_Info_2_Num,Med_Keywords_Count
1,14,26,0.487179,2,1,1,0.164179,0.818182,0.435146,0.576961,0.027,9,1,0,2,0.15,1,2,1,2,1,1,1,2,1,3,2,0.000133333,1,3,2,3,0.376812,0.253521,22.0,491,2,2,1,3,2,2,1,3,2,3,3,1,3,1,1,2,1,2,3,1,3,3,1,3,2,3,3,3,1,2,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,3,1,0
1,5,26,0.230769,2,3,1,0.268657,0.781818,0.368201,0.517129,0.1,12,1,0,2,0.35,2,2,6,3,1,1,1,2,1,1,3,,3,2,3,2,,0.394366,,162,2,2,1,3,2,2,2,3,2,3,3,1,3,1,1,2,1,2,3,1,3,3,1,3,2,3,3,3,1,2,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,6,0
1,0,26,1.0,2,3,1,0.507463,0.654545,0.299163,0.545946,0.15,12,1,0,2,1.0,2,2,3,3,1,1,1,2,1,3,1,0.00233333,1,3,2,3,,,1.0,261,2,2,1,3,2,2,2,3,2,3,3,3,3,1,1,2,1,2,3,2,2,3,1,3,2,3,3,3,1,3,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,1,1
1,16,26,0.230769,2,3,1,0.134328,0.763636,0.215481,0.296359,0.042,9,1,0,2,,2,2,8,3,1,1,1,1,1,3,1,0.000666667,1,1,2,3,,,1.0,407,2,2,1,3,2,2,2,3,2,3,3,1,3,1,1,2,1,2,3,1,3,3,1,3,2,3,3,3,1,2,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,3,3,0
1,17,26,0.487179,2,3,1,0.492537,0.618182,0.276151,0.546823,0.12,9,1,0,2,0.12,1,2,3,3,1,1,1,1,1,3,1,0.001,1,1,2,3,,0.450704,0.0,132,2,1,1,3,2,2,2,3,2,3,3,1,3,1,1,2,1,2,3,2,2,3,1,1,2,3,3,3,1,3,2,1,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,3,4,2
1,16,26,1.0,2,3,1,0.58209,0.654545,0.278243,0.506623,0.115,9,1,0,2,1.0,1,2,8,3,1,2,1,1,1,3,2,0.00472,1,1,2,3,,0.661972,8.0,261,2,1,1,3,2,2,1,3,2,3,3,1,3,1,1,2,2,2,3,1,3,3,1,1,2,3,3,3,1,2,2,1,3,3,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,3,3,2
1,16,26,0.487179,2,1,1,0.283582,0.709091,0.225941,0.35742,0.075,9,1,0,2,0.05,1,2,3,3,1,1,1,1,1,3,1,0.000666667,1,1,2,3,0.463768,0.408451,1.0,491,2,1,1,3,2,2,2,3,2,3,3,3,3,1,1,2,1,2,3,1,3,3,1,3,2,3,3,1,1,2,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,0
1,14,26,0.230769,2,3,1,0.38806,0.690909,0.313808,0.52907,0.05,14,1,0,2,0.1,1,2,3,3,1,1,1,2,1,1,3,,3,2,3,3,0.608696,0.549296,11.0,565,2,2,1,3,3,1,2,3,2,3,2,1,3,1,1,2,1,2,1,1,3,3,1,3,2,3,3,3,1,2,1,1,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,3,1,2
1,17,26,0.0769231,2,3,1,0.268657,0.781818,0.320084,0.443418,0.057,9,1,0,2,0.1,1,2,3,3,1,1,1,2,1,1,3,,3,2,3,3,0.391304,0.366197,1.0,33,2,2,1,3,2,2,2,3,2,3,3,1,3,1,1,2,1,2,1,1,3,3,1,3,2,3,3,3,1,2,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,3,4,1
1,7,26,0.025641,2,3,1,0.552239,0.781818,0.424686,0.60366,0.175,12,1,0,2,0.4,1,2,6,3,1,1,1,2,1,3,1,0.000166667,1,3,2,2,,,1.0,3,2,2,1,3,2,2,2,3,2,3,3,3,3,1,1,2,1,2,1,1,3,3,1,3,2,3,3,3,1,2,2,1,3,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,8,2


In [33]:
mlflow.set_tracking_uri("http://localhost:5000")

# Wrap autoML training with MLflow
with mlflow.start_run(log_system_metrics=True):
    aml = H2OAutoML(
                    max_models=2, # Run AutoML for n base models
                    seed=42, 
                    balance_classes=True, # Our target classes are imbalanced, so we set this to True
                    sort_metric='logloss', # Sort models by logloss (main metric for multi-classification)
                    verbosity='info', # Turn on verbose info
                    exclude_algos = ['GBM','GLM', 'DRF','StackedEnsemble','DeepLearning'], # Specify which algorithms to exclude
                   )
    
    aml.train(x=predictors, y=target, training_frame=main_frame)
    
    # Set metrics to log
    mlflow.log_metric("log_loss", aml.leader.logloss())
    mlflow.log_metric("AUC", aml.leader.auc())
    
    # Log best model (mlflow.h2o module provides API for logging & loading H2O models)
    mlflow.h2o.log_model(aml.leader, 
                         artifact_path="model",
                         registered_model_name="h20-model"
                        )
    
    model_uri = mlflow.get_artifact_uri("model")
    print(model_uri)
    
    # Print and view AutoML Leaderboard
    lb = get_leaderboard(aml, extra_columns='ALL')
    print(lb.head(rows=lb.nrows))
    
    # Get IDs of current experiment run
    exp_id = experiment.experiment_id
    run_id = mlflow.active_run().info.run_id
    
    # Save leaderboard as CSV
    lb_path = f'mlruns/{exp_id}/{run_id}/artifacts/model/leaderboard.csv'
    lb.as_data_frame().to_csv(lb_path, index=False) 
    print(f'Leaderboard saved in {lb_path}')

2024/06/11 16:47:13 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


AutoML progress: |


16:47:13.223: Project: AutoML_23_20240611_164713
16:47:13.223: 5-fold cross-validation will be used.
16:47:13.223: Setting stopping tolerance adaptively based on the training frame: 0.004104535616689248
16:47:13.223: Build control seed: 42
16:47:13.223: training frame: Frame key: AutoML_23_20240611_164713_training_py_9_sid_9d7c    cols: 124    rows: 59357  chunks: 32    size: 6954768  checksum: -1088187092662504757
16:47:13.223: validation frame: NULL
16:47:13.223: leaderboard frame: NULL
16:47:13.223: blending frame: NULL
16:47:13.223: response column: Response
16:47:13.223: fold column: null
16:47:13.223: weights column: null
16:47:13.224: Loading execution steps: [{XGBoost : [def_2 (1g, 10w), def_1 (2g, 10w), def_3 (3g, 10w), grid_1 (4g, 90w), lr_search (7g, 30w)]}, {GLM : [def_1 (1g, 10w)]}, {DRF : [def_1 (2g, 10w), XRT (3g, 10w)]}, {GBM : [def_5 (1g, 10w), def_2 (2g, 10w), def_3 (2g, 10w), def_4 (2g, 10w), def_1 (3g, 10w), grid_1 (4g, 60w), lr_annealing (7g, 10w)]}, {DeepLearning

2024/06/11 16:55:28 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/06/11 16:55:28 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


model_id                               logloss    mean_per_class_error      rmse       mse    training_time_ms    predict_time_per_row_ms  algo
XGBoost_2_AutoML_23_20240611_164713    1.27888                0.624656  0.653082  0.426517               34437                   0.010921  XGBoost
XGBoost_1_AutoML_23_20240611_164713    1.29889                0.626317  0.653736  0.427371               52110                   0.013557  XGBoost
[2 rows x 8 columns]



OSError: Cannot save file into a non-existent directory: 'mlruns/1/e17e0dff88d643979443093f9e3547f6/artifacts/model'

In [32]:
# Get AutoML event log
log = aml.event_log
log

timestamp,level,stage,message,name,value
16:36:03.221,INFO,Workflow,Project: AutoML_22_20240611_163603,,
16:36:03.221,INFO,Validation,5-fold cross-validation will be used.,,
16:36:03.222,INFO,Validation,Setting stopping tolerance adaptively based on the training frame: 0.004104535616689248,,
16:36:03.222,INFO,Validation,Build control seed: 42,,
16:36:03.222,INFO,DataImport,training frame: Frame key: AutoML_22_20240611_163603_training_py_9_sid_9d7c cols: 124 rows: 59357 chunks: 32 size: 6954768 checksum: -1088187092662504757,,
16:36:03.222,INFO,DataImport,validation frame: NULL,,
16:36:03.222,INFO,DataImport,leaderboard frame: NULL,,
16:36:03.222,INFO,DataImport,blending frame: NULL,,
16:36:03.222,INFO,DataImport,response column: Response,,
16:36:03.222,INFO,DataImport,fold column: null,,


In [18]:
# Leader (best) model stored here
aml.leader

Unnamed: 0,number_of_trees
,43.0

1,2,3,4,5,6,7,8,Error,Rate
3550.0,280.0,9.0,39.0,229.0,757.0,365.0,978.0,0.4280651,"2,657 / 6,207"
221.0,3825.0,18.0,39.0,285.0,856.0,374.0,934.0,0.4162088,"2,727 / 6,552"
21.0,32.0,576.0,23.0,70.0,196.0,31.0,64.0,0.4313919,"437 / 1,013"
20.0,13.0,3.0,741.0,0.0,316.0,51.0,284.0,0.4810924,"687 / 1,428"
96.0,140.0,8.0,6.0,3896.0,605.0,247.0,434.0,0.2827688,"1,536 / 5,432"
198.0,151.0,3.0,58.0,193.0,7936.0,828.0,1866.0,0.2935102,"3,297 / 11,233"
103.0,50.0,5.0,30.0,35.0,993.0,4455.0,2356.0,0.4449981,"3,572 / 8,027"
55.0,37.0,1.0,50.0,27.0,620.0,387.0,18312.0,0.060393,"1,177 / 19,489"
4264.0,4528.0,623.0,986.0,4735.0,12279.0,6738.0,25228.0,0.2709621,"16,090 / 59,381"

k,hit_ratio
1,0.7290379
2,0.8915646
3,0.9527458
4,0.9821155
5,0.9935837
6,0.9982991
7,0.9997305
8,0.9999999

1,2,3,4,5,6,7,8,Error,Rate
1645.0,965.0,32.0,53.0,458.0,1263.0,564.0,1227.0,0.7349766,"4,562 / 6,207"
919.0,1739.0,37.0,71.0,700.0,1362.0,548.0,1176.0,0.7345849,"4,813 / 6,552"
84.0,92.0,93.0,66.0,231.0,324.0,35.0,88.0,0.9081935,"920 / 1,013"
48.0,45.0,13.0,236.0,2.0,523.0,74.0,487.0,0.8347339,"1,192 / 1,428"
332.0,558.0,77.0,12.0,2645.0,1056.0,283.0,469.0,0.5130707,"2,787 / 5,432"
678.0,560.0,36.0,141.0,510.0,5488.0,1403.0,2417.0,0.5114395,"5,745 / 11,233"
243.0,205.0,6.0,56.0,56.0,1609.0,3008.0,2844.0,0.6252647,"5,019 / 8,027"
165.0,134.0,2.0,140.0,42.0,1189.0,794.0,17023.0,0.1265329,"2,466 / 19,489"
4114.0,4298.0,296.0,775.0,4644.0,12814.0,6709.0,25731.0,0.4631785,"27,504 / 59,381"

k,hit_ratio
1,0.5368215
2,0.7322376
3,0.8474596
4,0.9180715
5,0.960644
6,0.9842038
7,0.993988
8,1.0000001

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
accuracy,0.5368216,0.0045314,0.5338891,0.5394914,0.5389862,0.5303974,0.5413439
aic,,0.0,,,,,
auc,,0.0,,,,,
err,0.4631784,0.0045314,0.466111,0.4605086,0.4610138,0.4696026,0.4586561
err_count,5500.8,53.89063,5536.0,5469.0,5475.0,5577.0,5447.0
loglikelihood,,0.0,,,,,
logloss,1.2818227,0.0063702,1.2876008,1.2821823,1.2743537,1.2884938,1.2764826
max_per_class_error,0.9090527,0.0198942,0.9320388,0.8842975,0.8978494,0.9263158,0.9047619
mean_per_class_accuracy,0.37623,0.0067192,0.3699901,0.3805688,0.3813523,0.3678591,0.3813797
mean_per_class_error,0.62377,0.0067192,0.63001,0.6194312,0.6186477,0.6321409,0.6186203

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_logloss,training_classification_error,training_auc,training_pr_auc
,2024-06-11 15:55:14,3 min 3.308 sec,0.0,0.875,2.0794415,0.6717974,,
,2024-06-11 15:55:19,3 min 7.868 sec,5.0,0.7022096,1.3277541,0.4067463,,
,2024-06-11 15:55:23,3 min 11.839 sec,10.0,0.6426959,1.1518377,0.381132,,
,2024-06-11 15:55:27,3 min 16.190 sec,15.0,0.6158506,1.0705576,0.3607214,,
,2024-06-11 15:55:31,3 min 20.308 sec,20.0,0.5970312,1.0109502,0.3410182,,
,2024-06-11 15:55:36,3 min 24.613 sec,25.0,0.582937,0.9658901,0.325626,,
,2024-06-11 15:55:40,3 min 28.835 sec,30.0,0.5700407,0.9241209,0.308853,,
,2024-06-11 15:55:44,3 min 33.058 sec,35.0,0.5594498,0.8905073,0.2955322,,
,2024-06-11 15:55:48,3 min 37.364 sec,40.0,0.549465,0.8598852,0.282582,,
,2024-06-11 15:55:51,3 min 40.283 sec,43.0,0.5416741,0.8366128,0.2709621,,

variable,relative_importance,scaled_importance,percentage
BMI,31002.0410156,1.0,0.1832077
Product_Info_4,10469.3408203,0.3376984,0.0618689
Medical_History_4,9464.2246094,0.3052775,0.0559292
Wt,9438.6738281,0.3044533,0.0557782
Ins_Age,7556.5546875,0.2437438,0.0446557
Employment_Info_6,7038.3720703,0.2270293,0.0415935
Family_Hist_4,6792.0522461,0.2190840,0.0401379
Employment_Info_1,6284.5332031,0.2027135,0.0371387
Medical_Keyword_15,6215.4028320,0.2004837,0.0367301
Medical_History_1,5978.6401367,0.1928467,0.0353310


In [22]:
# Import test data
test_frame = h2o.import_file(path='data/processed/test.csv')
test_frame.head()


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


Id,Product_Info_1,Product_Info_2,Product_Info_3,Product_Info_4,Product_Info_5,Product_Info_6,Product_Info_7,Ins_Age,Ht,Wt,BMI,Employment_Info_1,Employment_Info_2,Employment_Info_3,Employment_Info_4,Employment_Info_5,Employment_Info_6,InsuredInfo_1,InsuredInfo_2,InsuredInfo_3,InsuredInfo_4,InsuredInfo_5,InsuredInfo_6,InsuredInfo_7,Insurance_History_1,Insurance_History_2,Insurance_History_3,Insurance_History_4,Insurance_History_5,Insurance_History_7,Insurance_History_8,Insurance_History_9,Family_Hist_1,Family_Hist_2,Family_Hist_3,Family_Hist_4,Family_Hist_5,Medical_History_1,Medical_History_2,Medical_History_3,Medical_History_4,Medical_History_5,Medical_History_6,Medical_History_7,Medical_History_8,Medical_History_9,Medical_History_10,Medical_History_11,Medical_History_12,Medical_History_13,Medical_History_14,Medical_History_15,Medical_History_16,Medical_History_17,Medical_History_18,Medical_History_19,Medical_History_20,Medical_History_21,Medical_History_22,Medical_History_23,Medical_History_24,Medical_History_25,Medical_History_26,Medical_History_27,Medical_History_28,Medical_History_29,Medical_History_30,Medical_History_31,Medical_History_32,Medical_History_33,Medical_History_34,Medical_History_35,Medical_History_36,Medical_History_37,Medical_History_38,Medical_History_39,Medical_History_40,Medical_History_41,Medical_Keyword_1,Medical_Keyword_2,Medical_Keyword_3,Medical_Keyword_4,Medical_Keyword_5,Medical_Keyword_6,Medical_Keyword_7,Medical_Keyword_8,Medical_Keyword_9,Medical_Keyword_10,Medical_Keyword_11,Medical_Keyword_12,Medical_Keyword_13,Medical_Keyword_14,Medical_Keyword_15,Medical_Keyword_16,Medical_Keyword_17,Medical_Keyword_18,Medical_Keyword_19,Medical_Keyword_20,Medical_Keyword_21,Medical_Keyword_22,Medical_Keyword_23,Medical_Keyword_24,Medical_Keyword_25,Medical_Keyword_26,Medical_Keyword_27,Medical_Keyword_28,Medical_Keyword_29,Medical_Keyword_30,Medical_Keyword_31,Medical_Keyword_32,Medical_Keyword_33,Medical_Keyword_34,Medical_Keyword_35,Medical_Keyword_36,Medical_Keyword_37,Medical_Keyword_38,Medical_Keyword_39,Medical_Keyword_40,Medical_Keyword_41,Medical_Keyword_42,Medical_Keyword_43,Medical_Keyword_44,Medical_Keyword_45,Medical_Keyword_46,Medical_Keyword_47,Medical_Keyword_48
1,1,D3,26,0.487179,2,3,1,0.61194,0.781818,0.338912,0.472262,0.15,3,1,0.0,2,0.5,2,2,11,3,1,1,1,2,1,1,3,,3,2,3,3,,0.627451,0.760563,,2.0,16,2,2,1,3,1,2,2,,3,2,1,3,,1,2,1,1,2,1,2,1,,2,2,1,1,3,2,3,,3,3,1,3,2,1,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,A2,26,0.0769231,2,3,1,0.626866,0.727273,0.311715,0.484984,0.0,1,3,0.07,2,0.2,1,2,8,3,1,1,1,1,1,3,1,0.00166667,1,1,2,2,,0.529412,0.746479,,5.0,261,3,1,1,3,2,2,1,,3,2,3,3,110.0,3,3,1,1,2,1,2,3,,2,2,3,1,3,2,3,,3,3,1,3,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,D3,26,0.144667,2,3,1,0.58209,0.709091,0.320084,0.519103,0.143,9,1,0.0,2,0.45,1,2,3,3,1,1,1,2,1,1,3,,3,2,3,3,0.666667,,0.661972,,3.0,132,2,1,1,3,2,2,2,,3,2,3,3,240.0,1,3,1,1,2,1,2,3,,2,2,3,1,1,2,3,,1,3,1,3,2,1,3,3,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,1,A1,26,0.151709,2,1,1,0.522388,0.654545,0.267782,0.486962,0.21,9,1,0.0,2,1.0,2,2,3,3,1,1,1,1,1,3,1,0.000666667,2,1,2,2,,0.686275,0.676056,,,162,3,2,1,1,2,3,2,,3,2,3,3,,1,3,1,1,2,2,2,3,,1,3,3,2,3,2,3,,3,1,1,2,2,1,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1
12,1,A1,26,0.0769231,2,3,1,0.298507,0.672727,0.246862,0.428718,0.085,9,1,0.0,2,0.2,1,2,8,3,1,2,1,2,1,1,3,,3,2,3,2,0.449275,,0.380282,,18.0,181,3,1,1,3,2,2,2,,3,2,3,3,188.0,1,3,1,1,2,1,2,1,,1,3,3,1,1,2,3,,3,3,1,2,2,1,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13,1,D3,26,0.230769,2,3,1,0.567164,0.818182,0.299163,0.379754,0.075,9,1,0.0,2,0.4,1,2,8,3,1,1,1,2,1,1,3,,3,2,3,3,,0.647059,,0.553571,4.0,335,2,2,1,3,2,2,2,,3,2,3,3,,1,3,1,1,2,1,2,3,,2,2,3,1,3,2,3,,3,3,1,3,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
21,1,A3,26,1.0,2,3,1,0.298507,0.690909,0.225941,0.373628,0.14,9,1,0.0,2,1.0,2,2,3,3,1,1,1,2,1,3,1,0.00666667,1,3,2,2,0.449275,,0.422535,,21.0,112,2,1,1,3,2,2,1,,3,2,3,3,82.0,1,2,1,1,2,1,2,3,,1,3,3,1,3,2,3,,1,3,1,2,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
28,1,D4,26,0.25641,2,3,1,0.0597015,0.654545,0.215481,0.388655,0.025,9,1,0.0,2,0.0,1,2,2,3,1,2,1,2,1,1,3,,3,2,3,2,0.391304,,,0.133929,0.0,491,2,2,1,3,2,2,2,,3,2,3,3,,1,3,1,2,2,1,2,3,,1,3,3,1,3,2,3,,3,3,1,2,2,1,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
30,1,D3,26,0.0769231,2,3,1,0.61194,0.618182,0.225941,0.446361,0.035,9,1,0.0,2,,2,2,8,3,1,1,1,2,1,1,3,,3,2,3,3,,0.656863,,0.589286,2.0,112,2,2,1,3,2,2,2,,3,2,3,3,,1,3,1,1,2,1,2,1,,1,3,3,1,3,2,3,,3,3,1,2,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
36,1,A3,26,0.0769231,2,3,1,0.328358,0.781818,0.320084,0.443418,0.06,9,1,0.0,2,1.0,1,2,8,3,1,1,1,2,1,1,3,,3,2,3,3,0.637681,,0.464789,,,162,3,2,1,3,2,2,2,,3,2,3,3,,1,3,1,1,2,1,2,3,,1,3,3,1,3,2,3,,3,1,1,2,2,1,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# Drop ID column for test set
X_test_frame = test_frame.drop('Response')
y_test_frame = test_frame[:, 'Response']

In [6]:
# Get dataframe of all runs
all_experiments = [exp.experiment_id for exp in client.list_experiments()]
runs = mlflow.search_runs(experiment_ids=all_experiments, run_view_type=ViewType.ALL)

# Identify best model (experiment id and run id) amongst all runs in the experiment
run_id, exp_id = runs.loc[runs['metrics.log_loss'].idxmin()]['run_id'], runs.loc[runs['metrics.log_loss'].idxmin()]['experiment_id']
run_id, exp_id

NameError: name 'client' is not defined

In [7]:
# Load best model (AutoML leader)
best_model = mlflow.h2o.load_model(f"mlruns/{exp_id}/{run_id}/artifacts/model/")

# Generate predictions with best model (output is H2O frame)
preds_frame = best_model.predict(X_test_frame)

NameError: name 'mlflow' is not defined

In [8]:
# Get y values (ground truth and predicted)
y_pred = preds_frame.as_data_frame()['predict']
y_true = y_test_frame.as_data_frame()['Response']

NameError: name 'preds_frame' is not defined

In [9]:
from sklearn.metrics import f1_score, accuracy_score
f1_score(y_true, y_pred)

NameError: name 'y_true' is not defined

In [10]:
accuracy_score(y_true, y_pred)

NameError: name 'y_true' is not defined