### Initializing H2O

In [1]:
!pip install h2o



In [2]:
# Install Java 11
!apt-get update --fix-missing
!apt-get install -y openjdk-11-jdk-headless -qq > /dev/null
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64"
!java -version

0% [Working]            Hit:1 https://packages.cloud.google.com/apt gcsfuse-jammy InRelease
0% [Connecting to archive.ubuntu.com (185.125.190.82)] [Connecting to security.                                                                               Hit:2 https://cli.github.com/packages stable InRelease
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:5 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:12 https://ppa.launchpadcontent.net/ubuntugis/ppa

In [3]:
import h2o
h2o.init(nthreads=-1, max_mem_size='4G') # Use all available cores and 4GB of memory

h2o.no_progress()
print(h2o.cluster().get_status())

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.29" 2025-10-21; OpenJDK Runtime Environment (build 11.0.29+7-post-Ubuntu-1ubuntu122.04); OpenJDK 64-Bit Server VM (build 11.0.29+7-post-Ubuntu-1ubuntu122.04, mixed mode, sharing)
  Starting server from /usr/local/lib/python3.12/dist-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpbceqswyl
  JVM stdout: /tmp/tmpbceqswyl/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmpbceqswyl/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,04 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.9
H2O_cluster_version_age:,11 days
H2O_cluster_name:,H2O_from_python_unknownUser_cnrwvi
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,4 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


H2O_cluster_uptime    H2O_cluster_timezone    H2O_data_parsing_timezone    H2O_cluster_version    H2O_cluster_version_age    H2O_cluster_name                    H2O_cluster_total_nodes    H2O_cluster_free_memory    H2O_cluster_total_cores    H2O_cluster_allowed_cores    H2O_cluster_status    H2O_connection_url      H2O_connection_proxy                                                                       H2O_internal_security    Python_version
--------------------  ----------------------  ---------------------------  ---------------------  -------------------------  ----------------------------------  -------------------------  -------------------------  -------------------------  ---------------------------  --------------------  ----------------------  -----------------------------------------------------------------------------------------  -----------------------  ----------------
05 secs               Etc/UTC                 UTC                          3.46.0.9               11 d

In [4]:
from pandas_gbq import read_gbq
import h2o
from h2o.automl import H2OAutoML
import os

In [5]:
# Global Variables

WORKING_DIR = os.getcwd()
MAX_RUNTIME = 300           # Change this at will

### Load BigQuery Data

In [6]:
project_id = "basq-477923"
table_id = "basq-477923.justcgall_dates.justtcg_alldates_snapshot_20251201"

# Construct the SQL query to select all data from your BigQuery table
sql_query = f"""SELECT * FROM `{table_id}`"""

# Load the data into a pandas DataFrame
print(f"Loading data from {table_id} into a pandas DataFrame...")
pandas_df = read_gbq(sql_query, project_id=project_id, dialect='standard')
print("Data loaded into pandas DataFrame. Converting to H2OFrame...")

# Convert the pandas DataFrame to an H2OFrame
h2o_df = h2o.H2OFrame(pandas_df)

print("H2OFrame created successfully. Here's a summary:")
h2o_df.summary()

Loading data from basq-477923.justcgall_dates.justtcg_alldates_snapshot_20251201 into a pandas DataFrame...
Downloading: 100%|[32m██████████[0m|
Data loaded into pandas DataFrame. Converting to H2OFrame...
H2OFrame created successfully. Here's a summary:


  h2o_df.summary()


Unnamed: 0,game,set_name,card_id,card_name,number,rarity,set,set_name_api,tcgplayerId,details,variant_id,condition,printing,language,tcgplayerSkuId,price,lastUpdated,priceChange24hr,7d_priceChange,7d_avgPrice,7d_minPrice,7d_maxPrice,7d_stddevPopPrice,7d_covPrice,7d_iqrPrice,7d_trendSlope,7d_priceChangesCount,7d_priceRelativeToRange,30d_priceChange,30d_avgPrice,30d_minPrice,30d_maxPrice,30d_stddevPopPrice,30d_covPrice,30d_iqrPrice,30d_trendSlope,30d_priceChangesCount,30d_priceRelativeToRange,90d_priceChange,90d_avgPrice,90d_minPrice,90d_maxPrice,90d_stddevPopPrice,90d_covPrice,90d_iqrPrice,90d_trendSlope,90d_priceChangesCount,90d_priceRelativeToRange,1y_priceChange,1y_avgPrice,1y_minPrice,1y_maxPrice,1y_stddevPopPrice,1y_covPrice,1y_iqrPrice,1y_trendSlope,1y_priceChangesCount,1y_priceRelativeToRange,minPriceAllTime,minPriceAllTimeDate,maxPriceAllTime,maxPriceAllTimeDate
type,enum,enum,enum,enum,enum,enum,enum,enum,int,enum,string,enum,enum,enum,int,real,int,real,real,enum,real,real,real,real,real,real,int,enum,real,real,real,real,real,real,real,real,int,real,real,real,real,real,real,real,real,real,int,real,enum,enum,real,real,enum,enum,enum,enum,enum,enum,enum,enum,enum,enum
mins,,,,,,,,,255216.0,,,,,,5326870.0,0.01,1742989335.0,-46.2,-94.7,,0.01,0.01,0.0,0.0,0.0,-53.849575205974695,0.0,,-99.05,0.01,0.01,0.01,0.0,0.0,0.0,-67.7772248651742,0.0,0.0,-99.37,0.01,0.01,0.01,0.0,0.0,0.0,-67.7772248651742,0.0,0.0,,,0.0,0.01,,,,,,,,,,
mean,,,,,,,,,589361.463913241,,,,,,8271354.72661327,23.430140441268133,1762904581.6865215,-0.012499884157360606,-0.4909395293142359,,22.573471192373233,23.15289665177542,0.204967991525814,0.017045956305098642,0.27011467985208054,-0.009362014428824757,1.9814268500436283,,-1.2167284340721327,22.98555680946147,22.00556990023924,24.00616237078497,0.6603520967814741,0.04777987348471529,1.0022649270785717,-0.018450888862524575,7.769310674367394,0.39515808235167943,3.951665251192263,23.311901962065303,21.10408649449068,25.853123121815266,1.4239602796106385,0.09846427795196055,1.8448217147852881,-0.028394867569691895,23.47786265897879,0.39974888983133805,,,19.84216520546806,28.377843520172863,,,,,,,,,,
maxs,,,,,,,,,664010.0,,,,,,8996314.0,32999.99,1763712709.0,350.0,783.33,,32999.99,32999.99,1602.72,1.4331865449241918,3998.98,706.912765448021,21.0,,1950.0,31279.4,26500.0,32999.99,2867.64,1.6980904155855012,6499.99,277.38798024861296,74.0,1.0,46152.0,32085.93,26500.0,32999.99,2259.62,3.0071531055654863,2099.15,56.8781895482977,215.0,1.0,,,26500.0,32999.99,,,,,,,,,,
sigma,,,,,,,,,42270.04924560148,,,,,,405164.82703955274,272.9191815156414,2950240.0578833017,4.003279850332394,13.389882924661709,,255.7741738215011,265.88583309419084,7.927855885327787,0.04676230654268565,18.35168267284819,3.467353290743939,3.6961421906285286,,33.8648238077007,245.17015871231675,224.257605469355,265.33387414302365,18.956271374747505,0.09117554615419038,36.099035955758005,1.8056041001813798,13.10967246176217,0.433666752619105,294.47236715511855,241.3382969352126,216.89715034734806,266.40786780715905,17.783079611285384,0.16284574182870776,18.998116121107437,0.7086900642623695,39.131003078032144,0.4200458319432639,,,219.79574053399918,279.0862336014116,,,,,,,,,,
zeros,,,,,,,,,0,,0,,,,0,0,0,35642,23295,,0,0,25166,22385,32043,22365,26286,,14161,0,0,0,15080,12881,20949,12858,14975,12956,10379,0,0,0,10291,9366,15453,9329,7841,12429,,,24,0,,,,,,,,,,
missing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7053,0,0,4972,4708,0,4708,4708,4708,4708,0,4728,0,0,3828,3828,3828,3828,3828,3828,0,3851,0,16709,2213,2213,2212,2212,2213,2213,93,2250,93,11579,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-box-case,Phantasmal Flames Booster Box Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,655281.0,,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-box-case_sealed,Sealed,Normal,English,8942744.0,1669.26,1763703516.0,-1.62,-5.98,,1669.26,1775.52,23.56,0.013778420055386752,4.24,-8.73515077782309,17.0,,-19.76,1889.93,1669.26,2080.3,148.55,0.07859834379505344,315.02,-15.8553472844132,50.0,0.0,-16.54,2013.46,1669.26,2210.96,163.47,0.08118631381035077,229.66,-8.547386234310009,90.0,0.0,,,1669.26,2210.96,,,,,,,,,,
1,Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-bundle-case,Phantasmal Flames Booster Bundle Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,654162.0,,pokemon-me02-phantasmal-flames-phantasmal-flames-booster-bundle-case_sealed,Sealed,Normal,English,8935542.0,1642.91,1763703516.0,0.0,-6.54,,1642.91,1757.96,36.66,0.02190122232601175,49.9,-15.3435564491779,4.0,,-14.73,1800.45,1642.91,1977.65,95.46,0.05302034258411328,141.72,-9.63520810330438,12.0,0.0,-26.96,1966.63,1642.91,2249.38,192.19,0.09772732301236954,247.22,-11.1113944473904,16.0,0.0,,,1642.91,2249.38,,,,,,,,,,
2,Pokemon,ME02: Phantasmal Flames,pokemon-me02-phantasmal-flames-phantasmal-flames-elite-trainer-box-case,Phantasmal Flames Elite Trainer Box Case,,,me02-phantasmal-flames-pokemon,ME02: Phantasmal Flames,654170.0,,pokemon-me02-phantasmal-flames-phantasmal-flames-elite-trainer-box-case_sealed,Sealed,Normal,English,8935604.0,1314.58,1763703516.0,-2.26,-12.27,,1314.58,1498.4,49.67,0.03581079839175261,57.76,-21.2016545810346,5.0,,-23.79,1566.72,1314.58,1724.96,142.53,0.09097455039484288,288.5,-14.9630286788253,10.0,0.0,-24.88,1639.53,1314.58,1749.93,130.94,0.07986622034321729,161.41,-7.0367539398493,14.0,0.0,,,1314.58,1749.93,,,,,,,,,,


### Data Preprocessing and Feature Selection

The target variables for AutoML are `7d_stddevPopPrice`, `priceChange24hr`, and `price`.

In [7]:
# Define all target variables
all_target_variables = ['7d_stddevPopPrice', 'priceChange24hr', 'price']

In [8]:
# Identify rows with missing values in any of the target variables
# We'll create a boolean mask for each target variable and combine them
missing_mask = h2o_df[all_target_variables[0]].isna()
for col in all_target_variables[1:]:
    missing_mask = missing_mask | h2o_df[col].isna()

# Get the number of rows with missing target values
num_missing_targets = h2o_df[missing_mask].nrow

if num_missing_targets > 0:
    print(f"Found {num_missing_targets} rows with missing values in at least one of the target variables: {all_target_variables}.")
    print("These rows will be removed for the AutoML training.")
    # Remove rows where any of the target variables are missing
    h2o_df_cleaned = h2o_df[~missing_mask]
    print(f"New H2OFrame size after removing missing target rows: {h2o_df_cleaned.nrow} rows.")
else:
    print(f"No missing values found in the target variables '{all_target_variables}'. No rows removed.")
    h2o_df_cleaned = h2o_df

Found 4972 rows with missing values in at least one of the target variables: ['7d_stddevPopPrice', 'priceChange24hr', 'price'].
These rows will be removed for the AutoML training.
New H2OFrame size after removing missing target rows: 43162 rows.


### Run AutoML for Price Volatility (`7d_stddevPopPrice`)

In [9]:
# Define the target variable for Price Volatility
y_price_volatility = '7d_stddevPopPrice'

# Define columns to ignore (identifiers, other target variables)
ignored_columns_volatility = [
    'tcgplayerId', 'tcgplayerSkuId',
    'card_id', 'number', 'set_name_api', 'set',
    'variant_id', 'details', 'lastUpdated',
    'price', # Exclude other target variables
    'priceChange24hr' # Exclude other target variables
]

# These could cause data leakage if used as predictors for '7d_stddevPopPrice'.
ignored_columns_volatility.extend([
    '7d_priceChange', '7d_avgPrice', '7d_minPrice', '7d_maxPrice',
    '7d_covPrice', '7d_iqrPrice', '7d_trendSlope', '7d_priceChangesCount',
    '7d_priceRelativeToRange'
])

# Get all column names from the H2OFrame
all_columns = h2o_df_cleaned.columns

# Create the list of predictor columns (x) by excluding the target and ignored columns
x_price_volatility = [col for col in all_columns if col not in ignored_columns_volatility and col != y_price_volatility]

print(f"Predictors for {y_price_volatility}: {x_price_volatility}")
print(f"Target for Price Volatility: {y_price_volatility}")

Predictors for 7d_stddevPopPrice: ['game', 'set_name', 'card_name', 'rarity', 'condition', 'printing', 'language', '30d_priceChange', '30d_avgPrice', '30d_minPrice', '30d_maxPrice', '30d_stddevPopPrice', '30d_covPrice', '30d_iqrPrice', '30d_trendSlope', '30d_priceChangesCount', '30d_priceRelativeToRange', '90d_priceChange', '90d_avgPrice', '90d_minPrice', '90d_maxPrice', '90d_stddevPopPrice', '90d_covPrice', '90d_iqrPrice', '90d_trendSlope', '90d_priceChangesCount', '90d_priceRelativeToRange', '1y_priceChange', '1y_avgPrice', '1y_minPrice', '1y_maxPrice', '1y_stddevPopPrice', '1y_covPrice', '1y_iqrPrice', '1y_trendSlope', '1y_priceChangesCount', '1y_priceRelativeToRange', 'minPriceAllTime', 'minPriceAllTimeDate', 'maxPriceAllTime', 'maxPriceAllTimeDate']
Target for Price Volatility: 7d_stddevPopPrice


In [11]:
# Initialize and train the AutoML model for Price Volatility
automl_price_volatility = H2OAutoML(max_runtime_secs=MAX_RUNTIME, seed=1205, exclude_algos=["DeepLearning"])
automl_price_volatility.train(x=x_price_volatility, y=y_price_volatility, training_frame=h2o_df_cleaned)


07:45:51.279: _train param, Dropping bad and constant columns: [1y_trendSlope, 1y_covPrice, 1y_iqrPrice, minPriceAllTime, maxPriceAllTimeDate, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceRelativeToRange, 1y_priceChangesCount, 1y_avgPrice, 1y_stddevPopPrice]


07:46:46.767: _train param, Dropping bad and constant columns: [1y_trendSlope, 1y_covPrice, 1y_iqrPrice, minPriceAllTime, maxPriceAllTimeDate, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceRelativeToRange, 1y_priceChangesCount, 1y_avgPrice, 1y_stddevPopPrice]


07:49:24.870: GLM_1_AutoML_2_20251206_74551 [GLM def_1] failed: DistributedException from /127.0.0.1:54321: 'Java heap space', caused by java.lang.OutOfMemoryError: Java heap space


07:49:26.382: _train param, Dropping bad and constant columns: [1y_trendSlope, 1y_covPrice, 1y_iqrPrice, minPriceAllTime, maxPriceAllTimeDate, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceRelativeToRange, 1y_priceChangesCount, 1y_avgPrice, 1y_

Unnamed: 0,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
,1.0,1.0,45215.0,20.0,20.0,20.0,2598.0,2598.0,2598.0

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
aic,,0.0,,,,,
loglikelihood,,0.0,,,,,
mae,0.1988499,0.0843531,0.1457401,0.1711494,0.3483603,0.1734593,0.1555401
mean_residual_deviance,54.615463,113.67148,2.3092434,3.8039691,257.9508,4.401384,4.6119103
mse,54.615463,113.67148,2.3092434,3.8039691,257.9508,4.401384,4.6119103
r2,-0.3888298,0.7752346,-1.2710723,-0.133414,0.143702,-1.1392819,0.4559173
residual_deviance,54.615463,113.67148,2.3092434,3.8039691,257.9508,4.401384,4.6119103
rmse,4.755265,6.3248434,1.5196196,1.9503766,16.060846,2.0979476,2.1475358
rmsle,0.1906522,0.0087763,0.1904983,0.1960194,0.1957422,0.1955076,0.1754937

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_mae,training_deviance
,2025-12-06 07:50:44,3.624 sec,0.0,,,
,2025-12-06 07:50:45,4.193 sec,1.0,2.1356344,0.1736258,4.5609343

variable,relative_importance,scaled_importance,percentage
90d_trendSlope,2580105.5,1.0,0.9533265
30d_trendSlope,43604.3906250,0.0169002,0.0161114
30d_stddevPopPrice,24415.8378906,0.0094631,0.0090214
card_name,19051.5625000,0.0073840,0.0070394
rarity,11150.5341797,0.0043217,0.0041200
set_name,7975.2431641,0.0030911,0.0029468
30d_avgPrice,6983.6606445,0.0027067,0.0025804
90d_minPrice,1891.5186768,0.0007331,0.0006989
90d_priceRelativeToRange,1396.8562012,0.0005414,0.0005161
30d_priceRelativeToRange,1306.0994873,0.0005062,0.0004826


In [12]:
# Get the leaderboard for Price Volatility model
leaderboard_price_volatility = automl_price_volatility.leaderboard
print(f"\nLeaderboard for {y_price_volatility}:")
leaderboard_price_volatility.head()


Leaderboard for 7d_stddevPopPrice:


model_id,rmse,mse,mae,rmsle,mean_residual_deviance
XRT_1_AutoML_2_20251206_74551,7.39007,54.6131,0.198848,0.190814,54.6131
DRF_1_AutoML_2_20251206_74551,7.56626,57.2484,0.179469,0.169205,57.2484
GBM_5_AutoML_2_20251206_74551,7.59614,57.7013,0.243153,0.187034,57.7013
XGBoost_2_AutoML_2_20251206_74551,7.64908,58.5084,0.203585,,58.5084
XGBoost_3_AutoML_2_20251206_74551,7.72069,59.6091,0.276557,,59.6091
GBM_4_AutoML_2_20251206_74551,7.75684,60.1686,0.205157,,60.1686
GBM_3_AutoML_2_20251206_74551,7.80059,60.8492,0.215415,,60.8492
GBM_1_AutoML_2_20251206_74551,7.82154,61.1765,0.215347,,61.1765
GBM_2_AutoML_2_20251206_74551,7.82187,61.1816,0.217379,,61.1816
XGBoost_grid_1_AutoML_2_20251206_74551_model_1,7.84344,61.5196,0.464456,0.324072,61.5196


In [15]:
# Save the leader model as MOJO
leader_model_price_volatility = automl_price_volatility.leader
if leader_model_price_volatility:
    model_filename_volatility = f"{y_price_volatility}_leader_model.zip"
    model_path = leader_model_price_volatility.save_mojo(path=WORKING_DIR, filename=model_filename_volatility, force=True)
    print(f"\nLeader model '{leader_model_price_volatility.model_id}' saved as MOJO to: {model_path}")
else:
    print("No leader model found to save.")


Leader model 'XRT_1_AutoML_2_20251206_74551' saved as MOJO to: /content/7d_stddevPopPrice_leader_model.zip


### Run AutoML for Price Change (`priceChange24hr`)

In [16]:
# Define the target variable for Price Change
y_price_change = 'priceChange24hr'

# Define columns to ignore (identifiers, other target variables)
ignored_columns_change = [
    'tcgplayerId', 'tcgplayerSkuId',
    'card_id', 'number', 'set_name_api', 'set',
    'variant_id', 'details', 'lastUpdated',
    'price', # Exclude other target variables
    '7d_stddevPopPrice' # Exclude other target variables
]

# Get all column names from the H2OFrame
all_columns = h2o_df_cleaned.columns

# Create the list of predictor columns (x) by excluding the target and ignored columns
x_price_change = [col for col in all_columns if col not in ignored_columns_change and col != y_price_change]

print(f"Predictors for {y_price_change}: {x_price_change}")
print(f"Target for Price Change: {y_price_change}")

Predictors for priceChange24hr: ['game', 'set_name', 'card_name', 'rarity', 'condition', 'printing', 'language', '7d_priceChange', '7d_avgPrice', '7d_minPrice', '7d_maxPrice', '7d_covPrice', '7d_iqrPrice', '7d_trendSlope', '7d_priceChangesCount', '7d_priceRelativeToRange', '30d_priceChange', '30d_avgPrice', '30d_minPrice', '30d_maxPrice', '30d_stddevPopPrice', '30d_covPrice', '30d_iqrPrice', '30d_trendSlope', '30d_priceChangesCount', '30d_priceRelativeToRange', '90d_priceChange', '90d_avgPrice', '90d_minPrice', '90d_maxPrice', '90d_stddevPopPrice', '90d_covPrice', '90d_iqrPrice', '90d_trendSlope', '90d_priceChangesCount', '90d_priceRelativeToRange', '1y_priceChange', '1y_avgPrice', '1y_minPrice', '1y_maxPrice', '1y_stddevPopPrice', '1y_covPrice', '1y_iqrPrice', '1y_trendSlope', '1y_priceChangesCount', '1y_priceRelativeToRange', 'minPriceAllTime', 'minPriceAllTimeDate', 'maxPriceAllTime', 'maxPriceAllTimeDate']
Target for Price Change: priceChange24hr


In [17]:
# Initialize and train the AutoML model for Price Volatility
automl_price_change = H2OAutoML(max_runtime_secs=MAX_RUNTIME, seed=1205, exclude_algos=["DeepLearning"])
automl_price_change.train(x=x_price_change, y=y_price_change, training_frame=h2o_df_cleaned)


07:59:31.614: _train param, Dropping bad and constant columns: [7d_avgPrice, minPriceAllTime, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceChangesCount, 7d_priceRelativeToRange, 1y_stddevPopPrice, 1y_trendSlope, 1y_covPrice, 1y_iqrPrice, maxPriceAllTimeDate, 1y_priceRelativeToRange, 1y_avgPrice]


08:00:40.887: _train param, Dropping bad and constant columns: [7d_avgPrice, minPriceAllTime, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceChangesCount, 7d_priceRelativeToRange, 1y_stddevPopPrice, 1y_trendSlope, 1y_covPrice, 1y_iqrPrice, maxPriceAllTimeDate, 1y_priceRelativeToRange, 1y_avgPrice]


08:01:26.917: _train param, Dropping bad and constant columns: [7d_avgPrice, minPriceAllTime, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceChangesCount, 7d_priceRelativeToRange, 1y_stddevPopPrice, 1y_trendSlope, 1y_covPrice, 1y_iqrPrice, maxPriceAllTimeDate, 1y_priceRelativeToRange, 1y_avgPrice]


08:02:18.694: _train param, Dropping unused columns

key,value
Stacking strategy,cross_validation
Number of base models (used / total),3/8
# GBM base models (used / total),1/4
# XGBoost base models (used / total),2/2
# GLM base models (used / total),0/1
# DRF base models (used / total),0/1
Metalearner algorithm,GLM
Metalearner fold assignment scheme,Random
Metalearner nfolds,5
Metalearner fold_column,

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
aic,46150.54,2809.873,46121.8,49835.55,45281.37,42172.227,47341.75
loglikelihood,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mae,0.7266236,0.024444,0.7454683,0.7451894,0.6867856,0.7349159,0.7207589
mean_residual_deviance,12.811082,4.180387,11.985991,18.842003,10.563121,7.910018,14.754279
mse,12.811082,4.180387,11.985991,18.842003,10.563121,7.910018,14.754279
null_deviance,138344.72,41654.582,147811.95,188113.14,119675.16,78045.836,158077.56
r2,0.1987136,0.0700661,0.2973589,0.1356917,0.230774,0.1287057,0.2010378
residual_deviance,110553.72,35900.285,103858.61,162587.64,92047.04,67978.695,126296.63
rmse,3.5413034,0.5812185,3.462079,4.3407373,3.2500956,2.8124754,3.84113
rmsle,,0.0,,,,,


In [18]:
# Get the leaderboard for Price Volatility model
leaderboard_price_change = automl_price_change.leaderboard
print(f"\nLeaderboard for {y_price_change}:")
leaderboard_price_change.head()


Leaderboard for priceChange24hr:


model_id,rmse,mse,mae,rmsle,mean_residual_deviance
StackedEnsemble_AllModels_1_AutoML_3_20251206_75931,3.57866,12.8068,0.726564,,12.8068
StackedEnsemble_BestOfFamily_1_AutoML_3_20251206_75931,3.5927,12.9075,0.736387,,12.9075
StackedEnsemble_BestOfFamily_2_AutoML_3_20251206_75931,3.6031,12.9823,0.737365,,12.9823
StackedEnsemble_AllModels_2_AutoML_3_20251206_75931,3.64138,13.2597,0.729129,,13.2597
StackedEnsemble_BestOfFamily_3_AutoML_3_20251206_75931,3.64222,13.2658,0.736291,,13.2658
StackedEnsemble_AllModels_3_AutoML_3_20251206_75931,3.66832,13.4565,0.735517,,13.4565
GBM_1_AutoML_3_20251206_75931,3.6905,13.6198,0.782593,,13.6198
GBM_4_AutoML_3_20251206_75931,3.70484,13.7259,0.74816,,13.7259
XGBoost_1_AutoML_3_20251206_75931,3.71315,13.7875,0.764612,,13.7875
GBM_2_AutoML_3_20251206_75931,3.7141,13.7945,0.762779,,13.7945


In [20]:
# Save the leader model as MOJO
leader_model_price_change = automl_price_change.leader
if leader_model_price_change:
    model_filename_change = f"{y_price_change}_leader_model.zip"
    model_path = leader_model_price_change.save_mojo(path=WORKING_DIR, filename=model_filename_change, force=True)
    print(f"\nLeader model '{leader_model_price_change.model_id}' saved as MOJO to: {model_path}")
else:
    print("No leader model found to save.")


Leader model 'StackedEnsemble_AllModels_1_AutoML_3_20251206_75931' saved as MOJO to: /content/priceChange24hr_leader_model.zip


### Run AutoML for Price (`price`)

In [21]:
# Define the target variable for Price
y_price = 'price'

# Define columns to ignore (identifiers, other target variables, and highly correlated features as per user request)
ignored_columns_price = [
    'tcgplayerId', 'tcgplayerSkuId',
    'card_id', 'number', 'set_name_api', 'set',
    'variant_id', 'details', 'lastUpdated',
    '7d_stddevPopPrice', # Exclude other target variables
    'priceChange24hr', # Exclude other target variables
    # Highly correlated features to price (from Sourendu's Correlation Graph)
    '7d_maxPrice',
    '30d_maxPrice',
    '7d_minPrice',
    '90d_maxPrice',
    '30d_avgPrice',
    '1y_maxPrice',
    '90d_avgPrice',
    '30d_minPrice',
    '90d_minPrice',
    '1y_minPrice',
    '30d_stddevPopPrice',
    '90d_stddevPopPrice',
    '30d_iqrPrice',
    '30d_trendSlope',
    '7d_trendSlope',
    '7d_iqrPrice'
]

# Get all column names from the H2OFrame
all_columns = h2o_df_cleaned.columns

# Create the list of predictor columns (x) by excluding the target and ignored columns
x_price = [col for col in all_columns if col not in ignored_columns_price and col != y_price]

print(f"Predictors for {y_price}: {x_price}")
print(f"Target for Price: {y_price}")

Predictors for price: ['game', 'set_name', 'card_name', 'rarity', 'condition', 'printing', 'language', '7d_priceChange', '7d_avgPrice', '7d_covPrice', '7d_priceChangesCount', '7d_priceRelativeToRange', '30d_priceChange', '30d_covPrice', '30d_priceChangesCount', '30d_priceRelativeToRange', '90d_priceChange', '90d_covPrice', '90d_iqrPrice', '90d_trendSlope', '90d_priceChangesCount', '90d_priceRelativeToRange', '1y_priceChange', '1y_avgPrice', '1y_stddevPopPrice', '1y_covPrice', '1y_iqrPrice', '1y_trendSlope', '1y_priceChangesCount', '1y_priceRelativeToRange', 'minPriceAllTime', 'minPriceAllTimeDate', 'maxPriceAllTime', 'maxPriceAllTimeDate']
Target for Price: price


In [22]:
# Initialize and train the AutoML model for Price
automl_price = H2OAutoML(max_runtime_secs=MAX_RUNTIME, seed=1205, exclude_algos=["DeepLearning"])
automl_price.train(x=x_price, y=y_price, training_frame=h2o_df_cleaned)


15:55:40.422: _train param, Dropping bad and constant columns: [7d_avgPrice, minPriceAllTime, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceChangesCount, 7d_priceRelativeToRange, 1y_stddevPopPrice, 1y_trendSlope, 1y_covPrice, 1y_iqrPrice, maxPriceAllTimeDate, 1y_priceRelativeToRange, 1y_avgPrice]


15:56:23.767: _train param, Dropping bad and constant columns: [7d_avgPrice, minPriceAllTime, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceChangesCount, 7d_priceRelativeToRange, 1y_stddevPopPrice, 1y_trendSlope, 1y_covPrice, 1y_iqrPrice, maxPriceAllTimeDate, 1y_priceRelativeToRange, 1y_avgPrice]


15:57:19.350: GLM_1_AutoML_4_20251206_155540 [GLM def_1] failed: DistributedException from /127.0.0.1:54321: 'Java heap space', caused by java.lang.OutOfMemoryError: Java heap space


15:57:21.281: _train param, Dropping bad and constant columns: [7d_avgPrice, minPriceAllTime, 1y_priceChange, minPriceAllTimeDate, maxPriceAllTime, 1y_priceChangesCount, 7d_priceRelat

Unnamed: 0,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
,36.0,36.0,49760.0,10.0,10.0,10.0,20.0,207.0,67.02778

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
aic,,0.0,,,,,
loglikelihood,,0.0,,,,,
mae,15.715712,1.9077854,14.958812,16.231012,18.652285,13.512292,15.224155
mean_residual_deviance,57424.258,98133.13,11440.887,21588.12,232746.81,8188.68,13156.806
mse,57424.258,98133.13,11440.887,21588.12,232746.81,8188.68,13156.806
r2,0.3407954,0.16672,0.4431657,0.2665994,0.1266113,0.5603366,0.3072642
residual_deviance,57424.258,98133.13,11440.887,21588.12,232746.81,8188.68,13156.806
rmse,188.30478,165.70143,106.96208,146.92896,482.4384,90.491325,114.70312
rmsle,,0.0,,,,,

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_mae,training_deviance
,2025-12-06 15:59:20,12.104 sec,0.0,266.1237302,32.2032912,70821.8397688
,2025-12-06 15:59:20,12.464 sec,5.0,241.2831062,23.0102733,58217.537335
,2025-12-06 15:59:20,12.820 sec,10.0,225.661953,18.1387793,50923.3170351
,2025-12-06 15:59:21,13.155 sec,15.0,216.9198736,15.3411475,47054.2315731
,2025-12-06 15:59:21,13.475 sec,20.0,210.3289719,13.8824054,44238.2764384
,2025-12-06 15:59:21,13.828 sec,25.0,204.4269471,13.1181384,41790.3767159
,2025-12-06 15:59:22,14.138 sec,30.0,199.9192569,12.6049076,39967.7092881
,2025-12-06 15:59:22,14.485 sec,35.0,196.2379703,12.3812946,38509.3409893
,2025-12-06 15:59:22,14.549 sec,36.0,195.1271267,12.3664848,38074.5955619

variable,relative_importance,scaled_importance,percentage
90d_trendSlope,3012881920.0,1.0,0.4104561
90d_iqrPrice,1481521792.0,0.4917291,0.2018332
set_name,660318848.0,0.2191652,0.0899577
90d_covPrice,535965536.0,0.1778913,0.0730166
90d_priceChangesCount,361461472.0,0.119972,0.0492432
90d_priceRelativeToRange,287538112.0,0.0954362,0.0391724
rarity,255947744.0,0.0849511,0.0348687
30d_priceRelativeToRange,150335216.0,0.0498975,0.0204807
card_name,142287168.0,0.0472263,0.0193843
condition,89174752.0,0.0295978,0.0121486


In [23]:
# Get the leaderboard for Price model
leaderboard_price = automl_price.leaderboard
print(f"\nLeaderboard for {y_price}:")
leaderboard_price.head()


Leaderboard for price:


model_id,rmse,mse,mae,rmsle,mean_residual_deviance
GBM_4_AutoML_4_20251206_155540,239.088,57163.1,15.8493,,57163.1
GBM_3_AutoML_4_20251206_155540,239.299,57263.8,16.7579,,57263.8
StackedEnsemble_BestOfFamily_3_AutoML_4_20251206_155540,239.384,57304.5,15.1001,,57304.5
GBM_2_AutoML_4_20251206_155540,239.549,57383.8,17.5094,,57383.8
StackedEnsemble_BestOfFamily_1_AutoML_4_20251206_155540,240.534,57856.5,17.2399,,57856.5
StackedEnsemble_AllModels_1_AutoML_4_20251206_155540,240.981,58072.0,15.3285,,58072.0
StackedEnsemble_BestOfFamily_2_AutoML_4_20251206_155540,241.075,58117.3,15.3155,,58117.3
StackedEnsemble_AllModels_2_AutoML_4_20251206_155540,241.785,58459.8,15.4038,,58459.8
StackedEnsemble_AllModels_3_AutoML_4_20251206_155540,241.95,58539.7,15.7408,,58539.7
GBM_5_AutoML_4_20251206_155540,243.221,59156.4,16.0573,,59156.4


In [24]:
# Save the leader model as MOJO
leader_model_price = automl_price.leader
if leader_model_price:
    model_filename_price = f"{y_price}_leader_model.zip"
    model_path = leader_model_price.save_mojo(path=WORKING_DIR, filename=model_filename_price, force=True)
    print(f"\nLeader model '{leader_model_price.model_id}' saved as MOJO to: {model_path}")
else:
    print("No leader model found to save.")


Leader model 'GBM_4_AutoML_4_20251206_155540' saved as MOJO to: /content/price_leader_model.zip
