In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import os
import tensorflow as tf

# Set GPU environment variable for XGBoost
os.environ['XGB_USE_CPP_API'] = '0'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'  # Use first GPU device

def rrmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mean_y = np.mean(y_true)
    return rmse / mean_y

def arrmse(y_true, y_pred):
    n_targets = y_true.shape[1]
    rrmse_scores = []
    for i in range(n_targets):
        rrmse_scores.append(rrmse(y_true[:, i], y_pred[:, i]))
    return np.mean(rrmse_scores)


In [None]:
# Train an XGBoost Regressor on all targets with GPU acceleration
def train_xgboost_multi_target_gpu(x_train, y_train, x_val, y_val):
    models = {}
    predictions_val = np.zeros(y_val.shape)
    predictions_test = np.zeros_like(predictions_val)  # Placeholder
    for i, target in enumerate(y_train.columns):
        print(f'Training {target}...')
        model = XGBRegressor(tree_method='gpu_hist', predictor='gpu_predictor', n_estimators=100, verbosity=1)
        model.fit(x_train, y_train[target],
                  eval_set=[(x_val, y_val[target])],
                  early_stopping_rounds=10,
                  verbose=False)
        models[target] = model
        predictions_val[:, i] = model.predict(x_val)
    # Calculate average rrmse on val set
    average_rrmse = arrmse(y_val.values, predictions_val)
    print(f'Average RRMSE on validation set: {average_rrmse}')
    return models, predictions_val
