### Initializing H2O

In [1]:
pip install h2o pandas-gbq google-auth



In [2]:
# Install Java 11
# The following commands are for Debian-based systems (like Google Colab) and will not work on all local machines.
# Please ensure you have a Java Development Kit (JDK) version 11 or newer installed on your local system.
# You can check your Java version by running `java -version` in your terminal.

# !apt-get update --fix-missing
# !apt-get install -y openjdk-11-jdk-headless -qq > /dev/null
import os
# After installing Java, you may need to set the JAVA_HOME environment variable.
# The path below is an example for Ubuntu. You will need to find the correct path for your system.
# For example, on macOS with Homebrew, it might be something like: /usr/local/opt/openjdk@11
# os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64"
!java -version # This will check the version of java available in the system path.

0% [Working]            Hit:1 https://packages.cloud.google.com/apt gcsfuse-jammy InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to cloud.r-project.or                                                                               Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
                                                                               Hit:3 http://security.ubuntu.com/ubuntu jammy-security InRelease
0% [Waiting for headers] [Connected to cloud.r-project.org (3.160.5.97)] [Conne                                                                               Hit:4 https://cli.github.com/packages stable InRelease
0% [Waiting for headers] [Connected to cloud.r-project.org (3.160.5.97)] [Conne                                                                               Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
0% [Connected to cloud.r-project.org (3.160.5.97)] [Waiting for headers] [Waiti                      

In [3]:
import h2o
# Use all available cores. 
# The original notebook failed with a Java heap space error, suggesting 4GB might not be enough.
# If your local machine has more RAM, consider increasing max_mem_size (e.g., '8G', '16G').
h2o.init(nthreads=-1, max_mem_size='4G') 

h2o.no_progress()
print(h2o.cluster().get_status())

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.29" 2025-10-21; OpenJDK Runtime Environment (build 11.0.29+7-post-Ubuntu-1ubuntu122.04); OpenJDK 64-Bit Server VM (build 11.0.29+7-post-Ubuntu-1ubuntu122.04, mixed mode, sharing)
  Starting server from /usr/local/lib/python3.12/dist-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpy_jfc0ok
  JVM stdout: /tmp/tmpy_jfc0ok/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmpy_jfc0ok/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,04 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.9
H2O_cluster_version_age:,11 days
H2O_cluster_name:,H2O_from_python_unknownUser_rndyoq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,4 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


H2O_cluster_uptime    H2O_cluster_timezone    H2O_data_parsing_timezone    H2O_cluster_version    H2O_cluster_version_age    H2O_cluster_name                    H2O_cluster_total_nodes    H2O_cluster_free_memory    H2O_cluster_total_cores    H2O_cluster_allowed_cores    H2O_cluster_status    H2O_connection_url      H2O_connection_proxy                                                                       H2O_internal_security    Python_version
--------------------  ----------------------  ---------------------------  ---------------------  -------------------------  ----------------------------------  -------------------------  -------------------------  -------------------------  ---------------------------  --------------------  ----------------------  -----------------------------------------------------------------------------------------  -----------------------  ----------------
05 secs               Etc/UTC                 UTC                          3.46.0.9               11 d

### Load BigQuery Data

### Local Authentication with Google Cloud

To run this notebook locally and access BigQuery, you need to authenticate. The recommended way is to use the Google Cloud SDK.

1.  **Install Google Cloud SDK:** Follow the instructions at https://cloud.google.com/sdk/docs/install.
2.  **Authenticate:** Run the following command in your terminal and follow the prompts to log in with your Google account:
    ```bash
    gcloud auth application-default login
    ```
This command creates a credential file that `pandas-gbq` and other Google Cloud libraries will automatically use for authentication.

In [4]:
from pandas_gbq import read_gbq
import h2o

project_id = "basq-477923"
table_id = "basq-477923.justcgall_dates.justtcg_alldates_snapshot_20251201"

# Construct the SQL query to select all data from your BigQuery table
sql_query = f"""SELECT * FROM `{table_id}`"""

# Load the data into a pandas DataFrame
print(f"Loading data from {table_id} into a pandas DataFrame...")
pandas_df = read_gbq(sql_query, project_id=project_id, dialect='standard')
print("Data loaded into pandas DataFrame. Converting to H2OFrame...")

# Convert the pandas DataFrame to an H2OFrame
h2o_df = h2o.H2OFrame(pandas_df)

print("H2OFrame created successfully. Here's a summary:")
h2o_df.summary()

Loading data from basq-477923.justcgall_dates.justtcg_alldates_snapshot_20251201 into a pandas DataFrame...
Downloading: 100%|[32m██████████[0m|
Data loaded into pandas DataFrame. Converting to H2OFrame...
H2OFrame created successfully. Here's a summary:


  h2o_df.summary()


Unnamed: 0,game,set_name,card_id,card_name,number,rarity,set,set_name_api,tcgplayerId,details,variant_id,condition,printing,language,tcgplayerSkuId,price,lastUpdated,priceChange24hr,7d_priceChange,7d_avgPrice,7d_minPrice,7d_maxPrice,7d_stddevPopPrice,7d_covPrice,7d_iqrPrice,7d_trendSlope,7d_priceChangesCount,7d_priceRelativeToRange,30d_priceChange,30d_avgPrice,30d_minPrice,30d_maxPrice,30d_stddevPopPrice,30d_covPrice,30d_iqrPrice,30d_trendSlope,30d_priceChangesCount,30d_priceRelativeToRange,90d_priceChange,90d_avgPrice,90d_minPrice,90d_maxPrice,90d_stddevPopPrice,90d_covPrice,90d_iqrPrice,90d_trendSlope,90d_priceChangesCount,90d_priceRelativeToRange,1y_priceChange,1y_avgPrice,1y_minPrice,1y_maxPrice,1y_stddevPopPrice,1y_covPrice,1y_iqrPrice,1y_trendSlope,1y_priceChangesCount,1y_priceRelativeToRange,minPriceAllTime,minPriceAllTimeDate,maxPriceAllTime,maxPriceAllTimeDate
type,enum,enum,enum,enum,enum,enum,enum,enum,int,enum,string,enum,enum,enum,int,real,int,real,real,enum,real,real,real,real,real,real,int,enum,real,real,real,real,real,real,real,real,int,real,real,real,real,real,real,real,real,real,int,real,enum,enum,real,real,enum,enum,enum,enum,enum,enum,enum,enum,enum,enum
mins,,,,,,,,,255216.0,,,,,,5326870.0,0.01,1742989335.0,-46.2,-94.7,,0.01,0.01,0.0,0.0,0.0,-53.849575205974695,0.0,,-99.05,0.01,0.01,0.01,0.0,0.0,0.0,-67.7772248651742,0.0,0.0,-99.37,0.01,0.01,0.01,0.0,0.0,0.0,-67.7772248651742,0.0,0.0,,,0.0,0.01,,,,,,,,,,
mean,,,,,,,,,589361.463913241,,,,,,8271354.72661327,23.430140441268133,1762904581.6865215,-0.012499884157360606,-0.4909395293142359,,22.573471192373233,23.15289665177542,0.204967991525814,0.017045956305098642,0.27011467985208054,-0.009362014428824757,1.9814268500436283,,-1.2167284340721327,22.98555680946147,22.00556990023924,24.00616237078497,0.6603520967814741,0.04777987348471529,1.0022649270785717,-0.018450888862524575,7.769310674367394,0.39515808235167943,3.951665251192263,23.311901962065303,21.10408649449068,25.853123121815266,1.4239602796106385,0.09846427795196055,1.8448217147852881,-0.028394867569691895,23.47786265897879,0.39974888983133805,,,19.84216520546806,28.377843520172863,,,,,,,,,,
maxs,,,,,,,,,664010.0,,,,,,8996314.0,32999.99,1763712709.0,350.0,783.33,,32999.99,32999.99,1602.72,1.4331865449241918,3998.98,706.912765448021,21.0,,1950.0,31279.4,26500.0,32999.99,2867.64,1.6980904155855012,6499.99,277.38798024861296,74.0,1.0,46152.0,32085.93,26500.0,32999.99,2259.62,3.0071531055654863,2099.15,56.8781895482977,215.0,1.0,,,26500.0,32999.99,,,,,,,,,,
sigma,,,,,,,,,42270.04924560148,,,,,,405164.82703955274,272.9191815156414,2950240.0578833017,4.003279850332394,13.389882924661709,,255.7741738215011,265.88583309419084,7.927855885327787,0.04676230654268565,18.35168267284819,3.467353290743939,3.6961421906285286,,33.8648238077007,245.17015871231675,224.257605469355,265.33387414302365,18.956271374747505,0.09117554615419038,36.099035955758005,1.8056041001813798,13.10967246176217,0.433666752619105,294.47236715511855,241.3382969352126,216.89715034734806,266.40786780715905,17.783079611285384,0.16284574182870776,18.998116121107437,0.7086900642623695,39.131003078032144,0.4200458319432639,,,219.79574053399918,279.0862336014116,,,,,,,,,,
zeros,,,,,,,,,0,,0,,,,0,0,0,35642,23295,,0,0,25166,22385,32043,22365,26286,,14161,0,0,0,15080,12881,20949,12858,14975,12956,10379,0,0,0,10291,9366,15453,9329,7841,12429,,,24,0,,,,,,,,,,
missing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7053,0,0,4972,4708,0,4708,4708,4708,4708,0,4728,0,0,3828,3828,3828,3828,3828,3828,0,3851,0,16709,2213,2213,2212,2212,2213,2213,93,2250,93,11579,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-box-case,Phantasmal Flames Booster Box Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,655281.0,,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-box-case_sealed,Sealed,Normal,English,8942744.0,1669.26,1763703516.0,-1.62,-5.98,,1669.26,1775.52,23.56,0.013778420055386752,4.24,-8.73515077782309,17.0,,-19.76,1889.93,1669.26,2080.3,148.55,0.07859834379505344,315.02,-15.8553472844132,50.0,0.0,-16.54,2013.46,1669.26,2210.96,163.47,0.08118631381035077,229.66,-8.547386234310009,90.0,0.0,,,1669.26,2210.96,,,,,,,,,,
1,Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-bundle-case,Phantasmal Flames Booster Bundle Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,654162.0,,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-bundle-case_sealed,Sealed,Normal,English,8935542.0,1642.91,1763703516.0,0.0,-6.54,,1642.91,1757.96,36.66,0.02190122232601175,49.9,-15.3435564491779,4.0,,-14.73,1800.45,1642.91,1977.65,95.46,0.05302034258411328,141.72,-9.63520810330438,12.0,0.0,-26.96,1966.63,1642.91,2249.38,192.19,0.09772732301236954,247.22,-11.1113944473904,16.0,0.0,,,1642.91,2249.38,,,,,,,,,,
2,Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-elite-trainer-box-case,Phantasmal Flames Elite Trainer Box Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,654170.0,,pokemon-me02-phantasmal-flames-phantasmal-flames-elite-trainer-box-case_sealed,Sealed,Normal,English,8935604.0,1314.58,1763703516.0,-2.26,-12.27,,1314.58,1498.4,49.67,0.03581079839175261,57.76,-21.2016545810346,5.0,,-23.79,1566.72,1314.58,1724.96,142.53,0.09097455039484288,288.5,-14.9630286788253,10.0,0.0,-24.88,1639.53,1314.58,1749.93,130.94,0.07986622034321729,161.41,-7.0367539398493,14.0,0.0,,,1314.58,1749.93,,,,,,,,,,


### Data Preprocessing and Feature Selection

The target variables for AutoML are `7d_stddevPopPrice`, `priceChange24hr`, and `price`.

In [5]:
# Define all target variables
all_target_variables = ['7d_stddevPopPrice', 'priceChange24hr', 'price']

In [6]:
# Identify rows with missing values in any of the target variables
# We'll create a boolean mask for each target variable and combine them
missing_mask = h2o_df[all_target_variables[0]].isna()
for col in all_target_variables[1:]:
    missing_mask = missing_mask | h2o_df[col].isna()

# Get the number of rows with missing target values
num_missing_targets = h2o_df[missing_mask].nrow

if num_missing_targets > 0:
    print(f"Found {num_missing_targets} rows with missing values in at least one of the target variables: {all_target_variables}.")
    print("These rows will be removed for the AutoML training.")
    # Remove rows where any of the target variables are missing
    h2o_df_cleaned = h2o_df[~missing_mask]
    print(f"New H2OFrame size after removing missing target rows: {h2o_df_cleaned.nrow} rows.")
else:
    print(f"No missing values found in the target variables '{all_target_variables}'. No rows removed.")
    h2o_df_cleaned = h2o_df

Found 4972 rows with missing values in at least one of the target variables: ['7d_stddevPopPrice', 'priceChange24hr', 'price'].
These rows will be removed for the AutoML training.
New H2OFrame size after removing missing target rows: 43162 rows.


game,set_name,card_id,card_name,number,rarity,set,set_name_api,tcgplayerId,details,variant_id,condition,printing,language,tcgplayerSkuId,price,lastUpdated,priceChange24hr,7d_priceChange,7d_avgPrice,7d_minPrice,7d_maxPrice,7d_stddevPopPrice,7d_covPrice,7d_iqrPrice,7d_trendSlope,7d_priceChangesCount,7d_priceRelativeToRange,30d_priceChange,30d_avgPrice,30d_minPrice,30d_maxPrice,30d_stddevPopPrice,30d_covPrice,30d_iqrPrice,30d_trendSlope,30d_priceChangesCount,30d_priceRelativeToRange,90d_priceChange,90d_avgPrice,90d_minPrice,90d_maxPrice,90d_stddevPopPrice,90d_covPrice,90d_iqrPrice,90d_trendSlope,90d_priceChangesCount,90d_priceRelativeToRange,1y_priceChange,1y_avgPrice,1y_minPrice,1y_maxPrice,1y_stddevPopPrice,1y_covPrice,1y_iqrPrice,1y_trendSlope,1y_priceChangesCount,1y_priceRelativeToRange,minPriceAllTime,minPriceAllTimeDate,maxPriceAllTime,maxPriceAllTimeDate
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-box-case,Phantasmal Flames Booster Box Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,655281,,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-box-case_sealed,Sealed,Normal,English,8942740.0,1669.26,1763700000.0,-1.62,-5.98,,1669.26,1775.52,23.56,0.0137784,4.24,-8.73515,17,,-19.76,1889.93,1669.26,2080.3,148.55,0.0785983,315.02,-15.8553,50,0.0,-16.54,2013.46,1669.26,2210.96,163.47,0.0811863,229.66,-8.54739,90,0.0,,,1669.26,2210.96,,,,,,,,,,
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-bundle-case,Phantasmal Flames Booster Bundle Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,654162,,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-bundle-case_sealed,Sealed,Normal,English,8935540.0,1642.91,1763700000.0,0.0,-6.54,,1642.91,1757.96,36.66,0.0219012,49.9,-15.3436,4,,-14.73,1800.45,1642.91,1977.65,95.46,0.0530203,141.72,-9.63521,12,0.0,-26.96,1966.63,1642.91,2249.38,192.19,0.0977273,247.22,-11.1114,16,0.0,,,1642.91,2249.38,,,,,,,,,,
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-elite-trainer-box-case,Phantasmal Flames Elite Trainer Box Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,654170,,pokemon-me02-phantasmal-flames-phantasmal-flames-elite-trainer-box-case_sealed,Sealed,Normal,English,8935600.0,1314.58,1763700000.0,-2.26,-12.27,,1314.58,1498.4,49.67,0.0358108,57.76,-21.2017,5,,-23.79,1566.72,1314.58,1724.96,142.53,0.0909746,288.5,-14.963,10,0.0,-24.88,1639.53,1314.58,1749.93,130.94,0.0798662,161.41,-7.03675,14,0.0,,,1314.58,1749.93,,,,,,,,,,
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-sleeved-booster-case,Phantasmal Flames Sleeved Booster Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,655282,,pokemon-me02-phantasmal-flames-phantasmal-flames-sleeved-booster-case_sealed,Sealed,Normal,English,8942740.0,1263.91,1763700000.0,-1.23,-3.86,,1263.91,1314.72,14.74,0.0114201,21.61,-6.51873,7,,-9.72,1367.31,1263.91,1437.37,54.79,0.0400718,92.02,-4.6503,15,0.0,5.33,1369.85,1199.99,1437.37,47.32,0.034547,49.99,-0.334093,19,0.269273,,,1199.99,1437.37,,,,,,,,,,
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-mega-charizard-x-ex-125-094-special-illustration-rare,Mega Charizard X ex - 125/094,125/094,Special Illustration Rare,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,662184,,pokemon-me02-phantasmal-flames-mega-charizard-x-ex-125-094-special-illustration-rare_lightly-played_holofoil,Lightly Played,Holofoil,English,,696.13,1763700000.0,0.0,-0.55,,696.13,700.0,1.82,0.00261101,3.87,-0.961563,2,,-0.55,698.71,696.13,700.0,1.82,0.00261101,3.87,-0.961563,2,0.0,-0.55,698.71,696.13,700.0,1.82,0.00261101,3.87,-0.961563,2,0.0,,,696.13,700.0,,,,,,,,,,
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-mega-charizard-x-ex-125-094-special-illustration-rare,Mega Charizard X ex - 125/094,125/094,Special Illustration Rare,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,662184,,pokemon-me02-phantasmal-flames-mega-charizard-x-ex-125-094-special-illustration-rare_near-mint_holofoil,Near Mint,Holofoil,English,,693.7,1763700000.0,-10.71,-28.32,,693.7,967.72,56.22,0.0718221,20.54,-11.8376,20,,-18.39,809.79,693.7,967.72,76.9,0.0949685,60.44,-19.4474,23,0.0,-18.39,809.79,693.7,967.72,76.9,0.0949685,60.44,-19.4474,23,0.0,,,693.7,967.72,,,,,,,,,,
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-mega-charizard-x-ex-130-094-mega-hyper-rare,Mega Charizard X ex - 130/094,130/094,Mega Hyper Rare,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,662185,,pokemon-me02-phantasmal-flames-mega-charizard-x-ex-130-094-mega-hyper-rare_lightly-played_holofoil,Lightly Played,Holofoil,English,,682.5,1763700000.0,0.0,-18.75,,682.5,840.0,76.49,0.10301,157.5,-30.0994,2,,-18.75,742.5,682.5,840.0,76.49,0.10301,157.5,-30.0994,2,0.0,-18.75,742.5,682.5,840.0,76.49,0.10301,157.5,-30.0994,2,0.0,,,682.5,840.0,,,,,,,,,,
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-mega-charizard-x-ex-130-094-mega-hyper-rare,Mega Charizard X ex - 130/094,130/094,Mega Hyper Rare,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,662185,,pokemon-me02-phantasmal-flames-mega-charizard-x-ex-130-094-mega-hyper-rare_near-mint_holofoil,Near Mint,Holofoil,English,,518.59,1763700000.0,-8.18,-42.05,,518.59,894.95,94.67,0.14553,39.8,-37.9768,16,,-42.05,650.49,518.59,894.95,94.67,0.14553,39.8,-37.9768,16,0.0,-42.05,650.49,518.59,894.95,94.67,0.14553,39.8,-37.9768,16,0.0,,,518.59,894.95,,,,,,,,,,
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-box,Phantasmal Flames Booster Box,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,654137,,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-box_sealed,Sealed,Normal,English,8935460.0,272.8,1763700000.0,-2.84,-10.17,,267.67,309.94,12.88,0.0438176,23.22,-4.64519,21,,-15.28,305.33,267.67,323.17,13.03,0.0426696,15.2,-1.15224,71,0.0924324,-20.92,320.38,267.67,344.97,16.67,0.05203,25.44,-0.83205,152,0.0663648,,,267.67,344.97,,,,,,,,,,
Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-pokemon-center-elite-trainer-box-exclusive,Phantasmal Flames Pokemon Center Elite Trainer Box (Exclusive),,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,654135,,pokemon-me02-phantasmal-flames-phantasmal-flames-pokemon-center-elite-trainer-box-exclusive_sealed,Sealed,Normal,English,8935460.0,186.69,1763700000.0,-4.21,-63.27,,186.69,508.33,79.93,0.280952,61.04,-32.4473,21,,-68.26,483.53,186.69,588.27,136.85,0.283021,256.76,-12.285,36,0.0,-68.88,541.18,186.69,599.99,108.04,0.199633,33.53,-4.13966,57,0.0,,,186.69,599.99,,,,,,,,,,


### Run AutoML for Price Volatility (`7d_stddevPopPrice`)

In [7]:
from h2o.automl import H2OAutoML

# Define the target variable for Price Volatility
y_price_volatility = '7d_stddevPopPrice'

# Define columns to ignore (identifiers, other target variables)
ignored_columns = [
    'tcgplayerId', 'tcgplayerSkuId',
    'card_id', 'number', 'set_name_api', 'set',
    'variant_id', 'details', 'lastUpdated',
    'price', # Exclude other target variables
    'priceChange24hr' # Exclude other target variables
]

# Add columns that are directly related to '7d_stddevPopPrice' within the 7-day timeframe
# These could cause data leakage if used as predictors for '7d_stddevPopPrice'.
ignored_columns.extend([
    '7d_priceChange', '7d_avgPrice', '7d_minPrice', '7d_maxPrice',
    '7d_covPrice', '7d_iqrPrice', '7d_trendSlope', '7d_priceChangesCount',
    '7d_priceRelativeToRange'
])

# Get all column names from the H2OFrame
all_columns = h2o_df_cleaned.columns

# Create the list of predictor columns (x) by excluding the target and ignored columns
x_price_volatility = [col for col in all_columns if col not in ignored_columns and col != y_price_volatility]

print(f"Predictors for {y_price_volatility}: {x_price_volatility}")
print(f"Target for Price Volatility: {y_price_volatility}")

Predictors for 7d_stddevPopPrice: ['game', 'set_name', 'card_name', 'rarity', 'condition', 'printing', 'language', '30d_priceChange', '30d_avgPrice', '30d_minPrice', '30d_maxPrice', '30d_stddevPopPrice', '30d_covPrice', '30d_iqrPrice', '30d_trendSlope', '30d_priceChangesCount', '30d_priceRelativeToRange', '90d_priceChange', '90d_avgPrice', '90d_minPrice', '90d_maxPrice', '90d_stddevPopPrice', '90d_covPrice', '90d_iqrPrice', '90d_trendSlope', '90d_priceChangesCount', '90d_priceRelativeToRange', '1y_priceChange', '1y_avgPrice', '1y_minPrice', '1y_maxPrice', '1y_stddevPopPrice', '1y_covPrice', '1y_iqrPrice', '1y_trendSlope', '1y_priceChangesCount', '1y_priceRelativeToRange', 'minPriceAllTime', 'minPriceAllTimeDate', 'maxPriceAllTime', 'maxPriceAllTimeDate']
Target for Price Volatility: 7d_stddevPopPrice


In [13]:
from h2o.automl import H2OAutoML
import os

# Initialize and train the AutoML model for Price Volatility
# For demonstration, setting max_models to a small number and seed for reproducibility.
automl_price_volatility = H2OAutoML(max_runtime_secs=1800, seed=1205, exclude_algos=["DeepLearning"]) # max_models changed to max_runtime_secs for more robust search
automl_price_volatility.train(x=x_price_volatility, y=y_price_volatility, training_frame=h2o_df_cleaned)


02:34:56.96: _train param, Dropping bad and constant columns: [1y_trendSlope, 1y_covPrice, 1y_iqrPrice, minPriceAllTime, maxPriceAllTimeDate, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceRelativeToRange, 1y_priceChangesCount, 1y_avgPrice, 1y_stddevPopPrice]


02:35:43.738: _train param, Dropping bad and constant columns: [1y_trendSlope, 1y_covPrice, 1y_iqrPrice, minPriceAllTime, maxPriceAllTimeDate, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceRelativeToRange, 1y_priceChangesCount, 1y_avgPrice, 1y_stddevPopPrice]


02:38:36.148: GLM_1_AutoML_3_20251206_23456 [GLM def_1] failed: DistributedException from /127.0.0.1:54321: 'Java heap space', caused by java.lang.OutOfMemoryError: Java heap space
02:38:37.515: _train param, Dropping bad and constant columns: [1y_trendSlope, 1y_covPrice, 1y_iqrPrice, minPriceAllTime, maxPriceAllTimeDate, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceRelativeToRange, 1y_priceChangesCount, 1y_avgPrice, 1y_std

Unnamed: 0,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
,44.0,44.0,9703.0,5.0,5.0,5.0,7.0,18.0,10.977273

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
aic,,0.0,,,,,
loglikelihood,,0.0,,,,,
mae,0.1981181,0.0757817,0.1427556,0.1687816,0.3317219,0.1737211,0.1736102
mean_residual_deviance,52.51139,111.544624,0.5433309,2.281644,252.0318,3.1895745,4.510611
mse,52.51139,111.544624,0.5433309,2.281644,252.0318,3.1895745,4.510611
r2,0.1733511,0.4235222,0.4656502,0.3201713,0.1633508,-0.5502849,0.4678678
residual_deviance,52.51139,111.544624,0.5433309,2.281644,252.0318,3.1895745,4.510611
rmse,4.4065776,6.431705,0.7371098,1.5105112,15.875509,1.785938,2.12382
rmsle,0.1824421,0.0004937,,0.182093,,0.1827912,

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_mae,training_deviance
,2025-12-06 02:59:18,3 min 50.663 sec,0.0,7.9519138,0.3365437,63.2329323
,2025-12-06 02:59:18,3 min 50.812 sec,5.0,4.8406014,0.2506158,23.4314217
,2025-12-06 02:59:19,3 min 50.936 sec,10.0,3.0896366,0.2095641,9.5458546
,2025-12-06 02:59:19,3 min 51.061 sec,15.0,2.5037688,0.1874794,6.268858
,2025-12-06 02:59:19,3 min 51.197 sec,20.0,1.9107029,0.1693801,3.6507857
,2025-12-06 02:59:19,3 min 51.328 sec,25.0,1.4956457,0.1591998,2.2369562
,2025-12-06 02:59:19,3 min 51.454 sec,30.0,1.0830313,0.1501281,1.1729569
,2025-12-06 02:59:19,3 min 51.592 sec,35.0,0.8904242,0.1460144,0.7928552
,2025-12-06 02:59:19,3 min 51.734 sec,40.0,0.8429085,0.1400525,0.7104948
,2025-12-06 02:59:19,3 min 51.853 sec,44.0,0.7865791,0.1363787,0.6187067

variable,relative_importance,scaled_importance,percentage
90d_trendSlope,6165205.5,1.0,0.4447077
30d_trendSlope,4320032.0,0.7007118,0.3116119
90d_stddevPopPrice,1077344.5,0.1747459,0.0777109
30d_priceChangesCount,734162.0625000,0.1190815,0.0529565
30d_stddevPopPrice,664618.6875000,0.1078015,0.0479402
30d_maxPrice,194567.0625000,0.0315589,0.0140345
set_name,167468.9531250,0.0271636,0.0120798
card_name,94431.5937500,0.0153169,0.0068115
90d_iqrPrice,76456.7968750,0.0124013,0.0055150
30d_iqrPrice,47207.8593750,0.0076571,0.0034052


In [14]:
# Get the leaderboard for Price Volatility model
leaderboard_price_volatility = automl_price_volatility.leaderboard
print(f"\nLeaderboard for {y_price_volatility}:")
leaderboard_price_volatility.head()


Leaderboard for 7d_stddevPopPrice:


model_id,rmse,mse,mae,rmsle,mean_residual_deviance
GBM_grid_1_AutoML_3_20251206_23456_model_8,7.23983,52.4151,0.198565,,52.4151
GBM_5_AutoML_3_20251206_23456,7.33159,53.7522,0.185609,,53.7522
GBM_grid_1_AutoML_3_20251206_23456_model_19,7.34688,53.9766,0.17974,,53.9766
GBM_grid_1_AutoML_3_20251206_23456_model_26,7.36451,54.2361,0.196715,0.167018,54.2361
GBM_grid_1_AutoML_3_20251206_23456_model_3,7.38378,54.5202,0.169726,0.167313,54.5202
GBM_grid_1_AutoML_3_20251206_23456_model_11,7.38694,54.5669,0.180897,,54.5669
XGBoost_lr_search_selection_AutoML_3_20251206_23456_select_grid_model_1,7.39222,54.6449,0.186556,,54.6449
XGBoost_grid_1_AutoML_3_20251206_23456_model_18,7.42386,55.1137,0.180921,,55.1137
XRT_1_AutoML_3_20251206_23456,7.45006,55.5034,0.168514,0.163979,55.5034
GBM_grid_1_AutoML_3_20251206_23456_model_10,7.45064,55.512,0.178159,,55.512


In [None]:
import os

# Save the leader model as MOJO
leader_model = automl_price_volatility.leader
if leader_model:
    # Create a directory to store the model if it doesn't exist
    model_dir = 'models'
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    
    model_path = h2o.save_mojo(leader_model, path=model_dir, force=True)
    print(f"\nLeader model '{leader_model.model_id}' saved as MOJO to: {model_path}")
else:
    print("No leader model found to save.")