### **Librerías**

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [3]:
import h2o
from h2o.automl import H2OAutoML

## **Datos**

In [4]:
data = load_breast_cancer()

In [5]:
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.DataFrame({'Target': data.target})

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)

In [7]:
train_df = pd.concat([X_train, y_train], axis=1)
train_df.sample()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target
424,9.742,19.12,61.93,289.7,0.1075,0.08333,0.008934,0.01967,0.2538,0.07029,...,23.17,71.79,380.9,0.1398,0.1352,0.02085,0.04589,0.3196,0.08009,1


In [8]:
test_df = pd.concat([X_test, y_test], axis=1)
test_df.sample()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target
464,13.17,18.22,84.28,537.3,0.07466,0.05994,0.04859,0.0287,0.1454,0.05549,...,23.89,95.1,687.6,0.1282,0.1965,0.1876,0.1045,0.2235,0.06925,1


## **Modelo**

In [9]:
# Inicializamos h2o
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
; OpenJDK 64-Bit Server VM Zulu11.43+55-CA (build 11.0.9.1+1-LTS, mixed mode)
  Starting server from C:\Users\jmart\Documents\Proyectos\01. Data Science\00. Generalidades\good_practices_DS\venv\Lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\jmart\AppData\Local\Temp\tmp2s35uw8e
  JVM stdout: C:\Users\jmart\AppData\Local\Temp\tmp2s35uw8e\h2o_jmart_started_from_python.out
  JVM stderr: C:\Users\jmart\AppData\Local\Temp\tmp2s35uw8e\h2o_jmart_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,03 secs
H2O_cluster_timezone:,America/Bogota
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.3
H2O_cluster_version_age:,10 days
H2O_cluster_name:,H2O_from_python_jmart_h8honh
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,1.926 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


In [10]:
h2o_train = h2o.H2OFrame(train_df)
h2o_test = h2o.H2OFrame(test_df)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [11]:
h2o_train['Target'] = h2o_train['Target'].asfactor()
h2o_test['Target'] = h2o_test['Target'].asfactor()

In [12]:
X_columns = h2o_train.drop('Target', axis=1).columns
y_columns = 'Target'

In [13]:
model = H2OAutoML(
    max_runtime_secs=60,
    seed=13
)

In [14]:
model.train(
    x=X_columns,
    y=y_columns, 
    training_frame=h2o_train
)

AutoML progress: |


17:04:49.926: AutoML: XGBoost is not available; skipping it.

███████████████████████████████████████████████████████████████| (done) 100%


key,value
Stacking strategy,cross_validation
Number of base models (used / total),3/5
# GBM base models (used / total),1/1
# GLM base models (used / total),1/1
# DRF base models (used / total),1/2
# DeepLearning base models (used / total),0/1
Metalearner algorithm,GLM
Metalearner fold assignment scheme,Random
Metalearner nfolds,5
Metalearner fold_column,

Unnamed: 0,0,1,Error,Rate
0,175.0,1.0,0.0057,(1.0/176.0)
1,0.0,279.0,0.0,(0.0/279.0)
Total,175.0,280.0,0.0022,(1.0/455.0)

metric,threshold,value,idx
max f1,0.6718933,0.9982111,265.0
max f2,0.6718933,0.9992837,265.0
max f0point5,0.6718933,0.9971408,265.0
max accuracy,0.6718933,0.9978022,265.0
max precision,0.9999966,1.0,0.0
max recall,0.6718933,1.0,265.0
max specificity,0.9999966,1.0,0.0
max absolute_mcc,0.6718933,0.9953728,265.0
max min_per_class_accuracy,0.7830659,0.9943182,264.0
max mean_per_class_accuracy,0.6718933,0.9971591,265.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.010989,0.9999866,1.6308244,1.6308244,1.0,0.9999923,1.0,0.9999923,0.0179211,0.0179211,63.0824373,63.0824373,0.0179211
2,0.021978,0.9999804,1.6308244,1.6308244,1.0,0.9999832,1.0,0.9999878,0.0179211,0.0358423,63.0824373,63.0824373,0.0358423
3,0.0307692,0.9999784,1.6308244,1.6308244,1.0,0.9999795,1.0,0.9999854,0.0143369,0.0501792,63.0824373,63.0824373,0.0501792
4,0.0417582,0.9999682,1.6308244,1.6308244,1.0,0.9999729,1.0,0.9999821,0.0179211,0.0681004,63.0824373,63.0824373,0.0681004
5,0.0505495,0.999966,1.6308244,1.6308244,1.0,0.9999669,1.0,0.9999794,0.0143369,0.0824373,63.0824373,63.0824373,0.0824373
6,0.1010989,0.9999378,1.6308244,1.6308244,1.0,0.9999553,1.0,0.9999674,0.0824373,0.1648746,63.0824373,63.0824373,0.1648746
7,0.1516484,0.9999031,1.6308244,1.6308244,1.0,0.9999235,1.0,0.9999528,0.0824373,0.2473118,63.0824373,63.0824373,0.2473118
8,0.2,0.9998196,1.6308244,1.6308244,1.0,0.9998612,1.0,0.9999306,0.078853,0.3261649,63.0824373,63.0824373,0.3261649
9,0.3010989,0.9995659,1.6308244,1.6308244,1.0,0.9997247,1.0,0.9998615,0.1648746,0.4910394,63.0824373,63.0824373,0.4910394
10,0.4,0.9988953,1.6308244,1.6308244,1.0,0.9993412,1.0,0.9997328,0.1612903,0.6523297,63.0824373,63.0824373,0.6523297

Unnamed: 0,0,1,Error,Rate
0,168.0,8.0,0.0455,(8.0/176.0)
1,3.0,276.0,0.0108,(3.0/279.0)
Total,171.0,284.0,0.0242,(11.0/455.0)

metric,threshold,value,idx
max f1,0.4318632,0.9804618,263.0
max f2,0.161833,0.9879603,275.0
max f0point5,0.622904,0.9825961,254.0
max accuracy,0.5505359,0.9758242,259.0
max precision,0.9999988,1.0,0.0
max recall,0.161833,1.0,275.0
max specificity,0.9999988,1.0,0.0
max absolute_mcc,0.4318632,0.9490254,263.0
max min_per_class_accuracy,0.6043827,0.9715909,256.0
max mean_per_class_accuracy,0.622904,0.9742994,254.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.010989,0.9999917,1.6308244,1.6308244,1.0,0.9999961,1.0,0.9999961,0.0179211,0.0179211,63.0824373,63.0824373,0.0179211
2,0.021978,0.9999844,1.6308244,1.6308244,1.0,0.9999888,1.0,0.9999924,0.0179211,0.0358423,63.0824373,63.0824373,0.0358423
3,0.0307692,0.9999763,1.6308244,1.6308244,1.0,0.9999781,1.0,0.9999883,0.0143369,0.0501792,63.0824373,63.0824373,0.0501792
4,0.0417582,0.9999605,1.6308244,1.6308244,1.0,0.9999699,1.0,0.9999835,0.0179211,0.0681004,63.0824373,63.0824373,0.0681004
5,0.0505495,0.9999526,1.6308244,1.6308244,1.0,0.9999562,1.0,0.9999787,0.0143369,0.0824373,63.0824373,63.0824373,0.0824373
6,0.1010989,0.9999152,1.6308244,1.6308244,1.0,0.9999356,1.0,0.9999572,0.0824373,0.1648746,63.0824373,63.0824373,0.1648746
7,0.1516484,0.99982,1.6308244,1.6308244,1.0,0.9998786,1.0,0.999931,0.0824373,0.2473118,63.0824373,63.0824373,0.2473118
8,0.2,0.9997352,1.6308244,1.6308244,1.0,0.9997761,1.0,0.9998935,0.078853,0.3261649,63.0824373,63.0824373,0.3261649
9,0.3010989,0.9991381,1.6308244,1.6308244,1.0,0.9995264,1.0,0.9997703,0.1648746,0.4910394,63.0824373,63.0824373,0.4910394
10,0.4,0.9972085,1.6308244,1.6308244,1.0,0.9984791,1.0,0.999451,0.1612903,0.6523297,63.0824373,63.0824373,0.6523297

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
accuracy,0.9826539,0.0091698,0.9888889,0.9797980,0.9888889,0.9879518,0.9677419
auc,0.9964086,0.0032511,0.9922839,0.9970339,0.9994856,0.9993873,0.9938524
err,0.0173461,0.0091698,0.0111111,0.0202020,0.0111111,0.0120482,0.0322581
err_count,1.6,0.8944272,1.0,2.0,1.0,1.0,3.0
f0point5,0.979966,0.0071306,0.9854015,0.9735974,0.9854015,0.9845560,0.9708738
f1,0.9861771,0.0067064,0.9908257,0.9833333,0.9908257,0.9902912,0.9756098
f2,0.9924744,0.0068762,0.9963099,0.993266,0.9963099,0.9960938,0.9803922
lift_top_group,1.6326681,0.0633872,1.6666666,1.6779661,1.6666666,1.627451,1.5245901
logloss,0.0756303,0.0191314,0.0808140,0.0801508,0.0519047,0.0632047,0.1020771
max_per_class_error,0.0398611,0.0156643,0.0277778,0.05,0.0277778,0.03125,0.0625


In [15]:
leaderboard = model.leaderboard
leaderboard

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse
StackedEnsemble_BestOfFamily_4_AutoML_1_20231230_170449,0.995601,0.0760852,0.996951,0.0281036,0.143633,0.0206306
StackedEnsemble_AllModels_1_AutoML_1_20231230_170449,0.995479,0.0780943,0.996955,0.0291524,0.1461,0.0213453
GBM_4_AutoML_1_20231230_170449,0.995398,0.0858732,0.997066,0.0291524,0.155693,0.0242404
StackedEnsemble_BestOfFamily_2_AutoML_1_20231230_170449,0.995092,0.0790819,0.996558,0.0231651,0.143694,0.0206481
GBM_grid_1_AutoML_1_20231230_170449_model_6,0.995011,0.0860014,0.996724,0.0324312,0.157083,0.024675
GLM_1_AutoML_1_20231230_170449,0.994888,0.0816401,0.996166,0.0242139,0.145972,0.0213079
GBM_grid_1_AutoML_1_20231230_170449_model_5,0.994848,0.0936099,0.996726,0.0348342,0.163909,0.0268661
StackedEnsemble_AllModels_2_AutoML_1_20231230_170449,0.994481,0.0774334,0.995761,0.0203242,0.139949,0.0195856
StackedEnsemble_BestOfFamily_3_AutoML_1_20231230_170449,0.99442,0.0837638,0.995956,0.0231651,0.147591,0.0217832
DeepLearning_grid_3_AutoML_1_20231230_170449_model_1,0.9944,0.095516,0.996167,0.0306391,0.158103,0.0249966


In [None]:
# h2o.shutdown()

H2O session _sid_a7ed closed.


  h2o.shutdown()
