In [None]:
# attempt to run the author's model with a 66/33 train/test split 
# and 25-fold cross validation on the 66% to evaluate performance
# then predict the final 33% for comparison

import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
main_data = pd.read_csv("./data/train.csv")

# 'critical_temp' is the target
X = main_data.drop('critical_temp', axis=1)
y = main_data['critical_temp']

# Split the data into training and testing sets (66/33 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Create a baseline XGBoost model with the parameters specified in the paper
xgb_model = XGBRegressor(
    n_estimators=374,         # Tree size: 374
    max_depth=16,             # Maximum depth: 16
    learning_rate=0.02,       # Learning rate (η): 0.02
    min_child_weight=1,       # Minimum child weight: 1
    colsample_bytree=0.5,     # Column subsampling: 0.50
    random_state=42,
    objective='reg:squarederror'
)

# Define scoring metrics: note that RMSE is returned as a negative value.
scoring = {
    "rmse": "neg_root_mean_squared_error",
    "r2": "r2"
}

# Perform 25-fold cross-validation using cross_validate only on the train data
cv_results = cross_validate(xgb_model, X_train, y_train, cv=25, scoring=scoring)

# Extract and convert RMSE scores to positive values
rmse_scores = -cv_results["test_rmse"]
r2_scores = cv_results["test_r2"]

# Print RMSE and R² for each fold
for i, (rmse, r2) in enumerate(zip(rmse_scores, r2_scores), start=1):
    print(f"Fold {i}: RMSE = {rmse:.4f}, R² = {r2:.4f}")

# Calculate and print the overall average scores
avg_rmse = np.mean(rmse_scores)
avg_r2 = np.mean(r2_scores)
print(f"\nAverage Cross-validated RMSE (25 folds): {avg_rmse:.4f}")
print(f"\nAverage Cross-validated R² (25 folds): {avg_r2:.4f}")


Results:

The author's parameters 25-fold corss validation on 66% before using test data to evalute:

Fold 1: RMSE = 10.4193, R² = 0.9125
Fold 2: RMSE = 9.3529, R² = 0.9293
Fold 3: RMSE = 7.5943, R² = 0.9506
Fold 4: RMSE = 10.7370, R² = 0.9001
Fold 5: RMSE = 9.3011, R² = 0.9222
Fold 6: RMSE = 10.4272, R² = 0.9043
Fold 7: RMSE = 9.6513, R² = 0.9194
Fold 8: RMSE = 10.4734, R² = 0.9035
Fold 9: RMSE = 8.4812, R² = 0.9316
Fold 10: RMSE = 8.3877, R² = 0.9385
Fold 11: RMSE = 8.9062, R² = 0.9254
Fold 12: RMSE = 9.0328, R² = 0.9232
Fold 13: RMSE = 9.6993, R² = 0.9277
Fold 14: RMSE = 10.5692, R² = 0.9067
Fold 15: RMSE = 9.3323, R² = 0.9304
Fold 16: RMSE = 11.2379, R² = 0.8952
Fold 17: RMSE = 9.1582, R² = 0.9295
Fold 18: RMSE = 9.7220, R² = 0.9239
Fold 19: RMSE = 9.9490, R² = 0.9164
Fold 20: RMSE = 9.3758, R² = 0.9250
Fold 21: RMSE = 10.2647, R² = 0.9039
Fold 22: RMSE = 9.0041, R² = 0.9326
Fold 23: RMSE = 7.8902, R² = 0.9519
Fold 24: RMSE = 8.8469, R² = 0.9356
Fold 25: RMSE = 9.3879, R² = 0.9251

Average Cross-validated RMSE (25 folds): 9.4881

Average Cross-validated R² (25 folds): 0.9226


In [None]:
# Retrain the model on the full training set (66% of the data)
xgb_model.fit(X_train, y_train)

# Make predictions on the reserved test set
y_test_pred = xgb_model.predict(X_test)

# Evaluate on the test set
test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
test_r2 = r2_score(y_test, y_test_pred)

print(f"Reserved Test Set Performance:")
print(f"Test RMSE: {test_rmse:.4f}")
print(f"Test R²: {test_r2:.4f}")

Results:

The model evaluated on the reserved 33% test data:

Reserved Test Set Performance:
Test RMSE: 9.4656
Test R²: 0.9230
