In [None]:
import h2o
from h2o.automl import H2OAutoML
h2o.init()


In [None]:
# Import necessary libraries for data handling and machine learning
from dataclasses import dataclass
import pandas as pd
import h2o
from h2o.automl import H2OAutoML

# Create a data class to store my AutoML parameters
@dataclass
class H2OAutoMLParams:
    df: pd.DataFrame  # This will hold my DataFrame
    y: str            # This is my target column that I want to predict
    path: str = "./"  # I will save my model here

# Start my H2O session to initialize the H2O machine learning environment
h2o.init(max_mem_size="4G")  # I allocate 4 GB of memory to H2O; adjust if needed

# Load my dataset from a CSV file into a Pandas DataFrame
# I need to replace '/path/to/train_df.csv' with the actual path to the CSV on my local machine
train_df = pd.read_csv("train_df.csv")

# Now I'll set up my AutoML parameters, including the path to save the model
# I already know that my target variable for prediction is 'at_least_one_conversion'
params = H2OAutoMLParams(df=train_df, y='at_least_one_conversion')

# Convert my pandas DataFrame into an H2OFrame which is necessary for H2O to process it
h2o_df = h2o.H2OFrame(params.df)

# Identify my features and target variable
# 'at_least_one_conversion' is my target, so I remove it from the list of feature names
x = h2o_df.columns
x.remove(params.y)

# Now I initialize AutoML, setting a limit on the number of models to control runtime
# I choose a seed for reproducibility
aml = H2OAutoML(max_models=20, seed=1, exclude_algos=["DeepLearning"], verbosity="info")

# Train the model on my dataset
aml.train(x=x, y=params.y, training_frame=h2o_df)

# After training, I can view the leaderboard of models trained by AutoML
# This shows me the best performing models on my data
lb = aml.leaderboard
print(lb.head(rows=lb.nrows))  # Print the leaderboard

# I want to save the best performing model to use it later for predictions
best_model = aml.leader
best_model_path = best_model.download_mojo(path=params.path)
print(f"My best model is saved to: {best_model_path}")

# It's important to close the H2O connection after I'm done
h2o.shutdown(prompt=False)