In [None]:
# attempt to setup and run the model as close to the author's description as possible
# he uses a 66/33 train/test split and evaluates performance of 25 runs, each with different random samples

import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
main_data = pd.read_csv("./data/train.csv")

# 'critical_temp' is the target
X = main_data.drop('critical_temp', axis=1)
y = main_data['critical_temp']


# Create a baseline XGBoost model with the parameters specified in the paper
xgb_model = XGBRegressor(
    n_estimators=374,         # Tree size: 374
    max_depth=16,             # Maximum depth: 16
    learning_rate=0.02,       # Learning rate (η): 0.02
    min_child_weight=1,       # Minimum child weight: 1
    colsample_bytree=0.5,     # Column subsampling: 0.50
    random_state=42,
    objective='reg:squarederror'
)


# prepare for doing 25 runs as in the original paper
n_runs = 25
rmse_list = []
r2_list = []

for i in range(n_runs):
    # Perform a 66/33 random split; vary the random_state for each iteration
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42 + i)
    
    # Fit the model on the training set
    xgb_model.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = xgb_model.predict(X_test)
    
    # Compute RMSE and R² for this run
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    
    rmse_list.append(rmse)
    r2_list.append(r2)
    
    print(f"Run {i+1}: RMSE = {rmse:.4f}, R² = {r2:.4f}")

# Compute the average RMSE and R² over the 25 runs
avg_rmse = np.mean(rmse_list)
avg_r2 = np.mean(r2_list)
print(f"\nAverage RMSE over 25 runs: {avg_rmse:.4f}")
print(f"Average R² over 25 runs: {avg_r2:.4f}")




Run 1: RMSE = 9.4656, R² = 0.9230


Results:

