In [9]:
!pip install h2o matplotlib seaborn
import h2o
import matplotlib.pyplot as plt
import seaborn as sns
from h2o.automl import H2OAutoML

# Always start with shutdown to clear previous sessions
#h2o.shutdown(prompt=False)
h2o.init()

# Use alternative Titanic dataset source
try:
    data = h2o.import_file("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
except Exception as e:
    print("Error loading data:", e)
    # Fallback to local download if URL fails
    import pandas as pd
    df = pd.read_csv("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
    data = h2o.H2OFrame(df)

# Prepare data - adjust column names to match new dataset
data['Survived'] = data['Survived'].asfactor()  # Capital 'S' in this dataset

# Verify columns
print("Columns:", data.columns)

# Update feature list based on actual columns
excluded_columns = ['Survived', 'Name', 'Ticket', 'Cabin', 'PassengerId']
x = [col for col in data.columns if col not in excluded_columns]
y = 'Survived'

# Split data
train, valid, test = data.split_frame(ratios=[0.6, 0.2], seed=123)

# Run AutoML
aml = H2OAutoML(max_runtime_secs=30, seed=123)
aml.train(x=x, y=y, training_frame=train, validation_frame=valid)

# Show results
print("\nLeaderboard:")
print(aml.leaderboard.head())

# Clean shutdown
h2o.cluster().shutdown()

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.25" 2024-10-15; OpenJDK Runtime Environment (build 11.0.25+9-post-Ubuntu-1ubuntu122.04); OpenJDK 64-Bit Server VM (build 11.0.25+9-post-Ubuntu-1ubuntu122.04, mixed mode, sharing)
  Starting server from /usr/local/lib/python3.11/dist-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpa522cdgt
  JVM stdout: /tmp/tmpa522cdgt/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmpa522cdgt/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,03 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.6
H2O_cluster_version_age:,2 months and 28 days
H2O_cluster_name:,H2O_from_python_unknownUser_ku3xqe
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.170 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Columns: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']
AutoML progress: |
10:35:14.246: User specified a validation frame with cross-validation still enabled. Please note that the models will still be validated using cross-validation only, the validation frame will be used to provide purely informative validation metrics on the trained models.

███████████████████████████████████████████████████████████████| (done) 100%

Leaderboard:
model_id                                                      auc    logloss     aucpr    mean_per_class_error      rmse       mse
GBM_2_AutoML_1_20250130_103514                           0.842573   0.453372  0.82083                 0.201628  0.377605  0.142586
StackedEnsemble_AllModels_1_AutoML_1_20250130_103514     0.842194   0.452356  0.820812                0.210998  0.377187  0.14227
GBM_