# XGBoost Model Development

**XGBoost** is an optimized distributed gradient boosting library designed to be highly efficient, flexible, and portable. It implements machine learning algorithms under the Gradient Boosting framework. XGBoost provides a parallel tree boosting (also known as GBDT, GBM) that solves many data science problems in a fast and accurate way.

### 1. Load the Datasets

In [1]:
import pandas as pd
from pathlib import Path

# Directory where the processed data is stored
data_path = Path("../processed_data")

# Load the training and validation datasets
X_train, X_val, y_train, y_val = (
    pd.read_csv(data_path / "X_train.csv"),
    pd.read_csv(data_path / "X_val.csv"),
    pd.read_csv(data_path / "y_train.csv"),
    pd.read_csv(data_path / "y_val.csv")
)

# Combine train and validation sets for robust K-Fold tuning
features = pd.concat([X_train, X_val], ignore_index=True)
targets = pd.concat([y_train, y_val], ignore_index=True)

# Display the shapes of the datasets
print(f"features shape: {features.shape}")
print(f"targets shape: {targets.shape}")

features shape: (2000, 157)
targets shape: (2000, 10)


### 2. Hyperparameter Tuning & Model Training

In [2]:
import optuna
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_percentage_error

def objective(trial: optuna.Trial, X: pd.DataFrame, y: pd.Series):
    """
    Objective function for Optuna to minimize.
    This function trains a XGBoost model with a set of hyperparameters
    suggested by Optuna and returns the cross-validated MAPE.

    Parameters:
      trial (optuna.Trial): An Optuna trial object that suggests hyperparameters.
      X (pd.DataFrame): Feature matrix for training.
      y (pd.Series): Target variable for training.

    Returns:
      float: The mean absolute percentage error (MAPE) of the model on the validation set during cross-validation.
    """
    # Define the hyperparameter search space for XGBoost
    param = {
        'objective': 'reg:squarederror',
        'eval_metric': 'mape',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': 42,
        'n_jobs': -1
    }

    # Use K-Fold cross-validation to get a robust estimate of the model's performance
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mape_scores = []
    for train_index, val_index in kf.split(X):
        X_train, X_val = X.iloc[train_index], X.iloc[val_index]
        y_train, y_val = y.iloc[train_index], y.iloc[val_index]

        model = xgb.XGBRegressor(**param)
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
        preds = model.predict(X_val)
        mape_scores.append(mean_absolute_percentage_error(y_val, preds))

    return np.mean(mape_scores)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import joblib

# Define and create the directory for saving models
model_dir = Path("../models/xgboost")
model_dir.mkdir(parents=True, exist_ok=True)

# Dictionary to store the best models
best_models = {}

# Iterate over each target property to tune and train a model
for target in targets.columns:
    print(f"\n--- Tuning and Training for {target} ---\n")
    y = targets[target]

    # Create an Optuna study to find the best hyperparameters
    study = optuna.create_study(direction='minimize')
    study.optimize(lambda trial: objective(trial, features, y), n_trials=50)

    # Get the best hyperparameters
    best_params = study.best_params
    print(f"Best MAPE for {target}: {study.best_value}")
    print(f"Best hyperparameters for {target}: {best_params}")

    # Train the final model with the best hyperparameters on the entire training set
    final_model = xgb.XGBRegressor(**best_params, random_state=42, n_jobs=-1)
    final_model.fit(features, y)

    # Save the trained model to a file
    joblib.dump(final_model, f'{model_dir}/{target}_model.joblib')
    print(f"Saved best model for {target}")

    best_models[target] = final_model

[I 2025-07-17 16:24:38,974] A new study created in memory with name: no-name-d75ac7a2-85a6-4b20-96d0-168801dd081c



--- Tuning and Training for BlendProperty1 ---



[I 2025-07-17 16:24:43,330] Trial 0 finished with value: 5.487751749851904 and parameters: {'n_estimators': 164, 'learning_rate': 0.10355879564195374, 'max_depth': 4, 'min_child_weight': 9, 'subsample': 0.8980132018431153, 'colsample_bytree': 0.6491345889137337, 'gamma': 4.557456868019808}. Best is trial 0 with value: 5.487751749851904.
[I 2025-07-17 16:24:49,643] Trial 1 finished with value: 3.386560672661691 and parameters: {'n_estimators': 273, 'learning_rate': 0.0978741042513904, 'max_depth': 7, 'min_child_weight': 7, 'subsample': 0.8696532455512789, 'colsample_bytree': 0.6497343392014211, 'gamma': 2.7378001603439497}. Best is trial 1 with value: 3.386560672661691.
[I 2025-07-17 16:25:00,190] Trial 2 finished with value: 3.5107209516233233 and parameters: {'n_estimators': 577, 'learning_rate': 0.11801736648515425, 'max_depth': 12, 'min_child_weight': 9, 'subsample': 0.9623295915930173, 'colsample_bytree': 0.9385277337407775, 'gamma': 1.6152301603234476}. Best is trial 1 with value:

Best MAPE for BlendProperty1: 1.0352023182427377
Best hyperparameters for BlendProperty1: {'n_estimators': 701, 'learning_rate': 0.23008039774764874, 'max_depth': 11, 'min_child_weight': 5, 'subsample': 0.6044628802407289, 'colsample_bytree': 0.9942741062756161, 'gamma': 2.264773848779005}


[I 2025-07-17 16:37:11,268] A new study created in memory with name: no-name-a044c094-3299-4058-a36a-d58f71f68342


Saved best model for BlendProperty1

--- Tuning and Training for BlendProperty2 ---



[I 2025-07-17 16:37:16,189] Trial 0 finished with value: 1.4232947029750758 and parameters: {'n_estimators': 249, 'learning_rate': 0.2366272992987736, 'max_depth': 8, 'min_child_weight': 8, 'subsample': 0.6696735991232866, 'colsample_bytree': 0.8181793342874244, 'gamma': 2.3173147378786743}. Best is trial 0 with value: 1.4232947029750758.
[I 2025-07-17 16:37:38,348] Trial 1 finished with value: 1.1104992426992044 and parameters: {'n_estimators': 792, 'learning_rate': 0.020863825350493005, 'max_depth': 6, 'min_child_weight': 9, 'subsample': 0.7133924104329288, 'colsample_bytree': 0.7930144387359822, 'gamma': 4.6119053261715415}. Best is trial 1 with value: 1.1104992426992044.
[I 2025-07-17 16:37:43,352] Trial 2 finished with value: 1.3755411347633033 and parameters: {'n_estimators': 192, 'learning_rate': 0.21385615321609872, 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.6275190223024778, 'colsample_bytree': 0.7289345925302694, 'gamma': 0.7818561885812303}. Best is trial 1 with v

Best MAPE for BlendProperty2: 0.7683714461476707
Best hyperparameters for BlendProperty2: {'n_estimators': 274, 'learning_rate': 0.10142538982829316, 'max_depth': 9, 'min_child_weight': 10, 'subsample': 0.9812320693392567, 'colsample_bytree': 0.9209677905832286, 'gamma': 0.7914246880856285}


[I 2025-07-17 16:47:06,739] A new study created in memory with name: no-name-72ff653a-79e4-4c02-8830-015b13ca24e9


Saved best model for BlendProperty2

--- Tuning and Training for BlendProperty3 ---



[I 2025-07-17 16:47:14,184] Trial 0 finished with value: 1.4165266572024013 and parameters: {'n_estimators': 393, 'learning_rate': 0.21472433598164364, 'max_depth': 5, 'min_child_weight': 7, 'subsample': 0.6856210195776735, 'colsample_bytree': 0.9512068261055748, 'gamma': 1.764356219723338}. Best is trial 0 with value: 1.4165266572024013.
[I 2025-07-17 16:47:28,371] Trial 1 finished with value: 1.4636432675198037 and parameters: {'n_estimators': 569, 'learning_rate': 0.2580697627847645, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.9392593763935451, 'colsample_bytree': 0.7192466605034086, 'gamma': 3.74540051862237}. Best is trial 0 with value: 1.4165266572024013.
[I 2025-07-17 16:47:43,585] Trial 2 finished with value: 1.8231472191382943 and parameters: {'n_estimators': 896, 'learning_rate': 0.18483697139323144, 'max_depth': 12, 'min_child_weight': 2, 'subsample': 0.6169569037159326, 'colsample_bytree': 0.7535696719114621, 'gamma': 3.7654375804761004}. Best is trial 0 with valu

Best MAPE for BlendProperty3: 1.0505157787008408
Best hyperparameters for BlendProperty3: {'n_estimators': 564, 'learning_rate': 0.01419592114191274, 'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.9077165874436667, 'colsample_bytree': 0.8671917597259505, 'gamma': 0.03442954557050726}


[I 2025-07-17 17:03:47,369] A new study created in memory with name: no-name-dbf7abfe-a305-47d2-a619-e3d378d92cf2


Saved best model for BlendProperty3

--- Tuning and Training for BlendProperty4 ---



[I 2025-07-17 17:04:01,617] Trial 0 finished with value: 1.1880821176311862 and parameters: {'n_estimators': 668, 'learning_rate': 0.06604538860209501, 'max_depth': 7, 'min_child_weight': 3, 'subsample': 0.72506946208279, 'colsample_bytree': 0.8853147075580112, 'gamma': 4.385117623508581}. Best is trial 0 with value: 1.1880821176311862.
[I 2025-07-17 17:04:14,765] Trial 1 finished with value: 1.1625218308831824 and parameters: {'n_estimators': 718, 'learning_rate': 0.1411118174604442, 'max_depth': 10, 'min_child_weight': 9, 'subsample': 0.6305507915033848, 'colsample_bytree': 0.748316463426083, 'gamma': 3.445789334376226}. Best is trial 1 with value: 1.1625218308831824.
[I 2025-07-17 17:04:34,183] Trial 2 finished with value: 0.935560293471865 and parameters: {'n_estimators': 650, 'learning_rate': 0.07942228671983022, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.8260709226283984, 'colsample_bytree': 0.9387585302467434, 'gamma': 0.17150804032001088}. Best is trial 2 with value:

Best MAPE for BlendProperty4: 0.7877036356901156
Best hyperparameters for BlendProperty4: {'n_estimators': 961, 'learning_rate': 0.01328038730015094, 'max_depth': 3, 'min_child_weight': 2, 'subsample': 0.9498423052854408, 'colsample_bytree': 0.9643706394124779, 'gamma': 0.0034533851103263485}


[I 2025-07-17 17:18:40,699] A new study created in memory with name: no-name-72900b9b-ef91-47c2-85e8-60c7406d1232


Saved best model for BlendProperty4

--- Tuning and Training for BlendProperty5 ---



[I 2025-07-17 17:18:53,731] Trial 0 finished with value: 0.5888393551498996 and parameters: {'n_estimators': 482, 'learning_rate': 0.030865735010791144, 'max_depth': 10, 'min_child_weight': 7, 'subsample': 0.9632713972410092, 'colsample_bytree': 0.7770963853780186, 'gamma': 0.9179803713385781}. Best is trial 0 with value: 0.5888393551498996.
[I 2025-07-17 17:19:04,189] Trial 1 finished with value: 1.1292625495617146 and parameters: {'n_estimators': 617, 'learning_rate': 0.10696877059829536, 'max_depth': 12, 'min_child_weight': 3, 'subsample': 0.6518992171626345, 'colsample_bytree': 0.6756304575634682, 'gamma': 2.584410781663953}. Best is trial 0 with value: 0.5888393551498996.
[I 2025-07-17 17:19:07,112] Trial 2 finished with value: 0.3688107800353807 and parameters: {'n_estimators': 122, 'learning_rate': 0.17689784425787142, 'max_depth': 6, 'min_child_weight': 8, 'subsample': 0.6913056116842412, 'colsample_bytree': 0.8977376012764833, 'gamma': 4.107898801249572}. Best is trial 2 with 

Best MAPE for BlendProperty5: 0.09444133791158645
Best hyperparameters for BlendProperty5: {'n_estimators': 384, 'learning_rate': 0.2065349710052373, 'max_depth': 8, 'min_child_weight': 9, 'subsample': 0.9610393524261555, 'colsample_bytree': 0.8999599292951901, 'gamma': 0.011024706694712672}


[I 2025-07-17 17:28:13,063] A new study created in memory with name: no-name-c4e39119-88b9-41f5-9d0b-e110eeb259f7


Saved best model for BlendProperty5

--- Tuning and Training for BlendProperty6 ---



[I 2025-07-17 17:28:31,953] Trial 0 finished with value: 0.7628491462606674 and parameters: {'n_estimators': 844, 'learning_rate': 0.07917885419235618, 'max_depth': 4, 'min_child_weight': 10, 'subsample': 0.872544675919397, 'colsample_bytree': 0.6544426780090069, 'gamma': 4.246304138915928}. Best is trial 0 with value: 0.7628491462606674.
[I 2025-07-17 17:28:41,938] Trial 1 finished with value: 0.6823869740241617 and parameters: {'n_estimators': 125, 'learning_rate': 0.06744616665966627, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.962216510369691, 'colsample_bytree': 0.7336671478530798, 'gamma': 0.23722900498890365}. Best is trial 1 with value: 0.6823869740241617.
[I 2025-07-17 17:28:51,256] Trial 2 finished with value: 1.0425272076437098 and parameters: {'n_estimators': 482, 'learning_rate': 0.13036722027783218, 'max_depth': 7, 'min_child_weight': 7, 'subsample': 0.7008069502840932, 'colsample_bytree': 0.928902016576999, 'gamma': 1.973931216405641}. Best is trial 1 with valu

Best MAPE for BlendProperty6: 0.5831629560247226
Best hyperparameters for BlendProperty6: {'n_estimators': 570, 'learning_rate': 0.1108944693000235, 'max_depth': 10, 'min_child_weight': 9, 'subsample': 0.8730618256817043, 'colsample_bytree': 0.8339079693413199, 'gamma': 0.5191178551838367}


[I 2025-07-17 17:43:03,149] A new study created in memory with name: no-name-0614ed99-51dc-418b-b4bf-68ae91965fa3


Saved best model for BlendProperty6

--- Tuning and Training for BlendProperty7 ---



[I 2025-07-17 17:43:15,144] Trial 0 finished with value: 2.519579593000818 and parameters: {'n_estimators': 680, 'learning_rate': 0.16624442193885008, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.9252093984154999, 'colsample_bytree': 0.8281248665539228, 'gamma': 1.9007016089059285}. Best is trial 0 with value: 2.519579593000818.
[I 2025-07-17 17:43:33,635] Trial 1 finished with value: 1.4626650908350682 and parameters: {'n_estimators': 832, 'learning_rate': 0.29033665907633355, 'max_depth': 10, 'min_child_weight': 5, 'subsample': 0.9284419911852707, 'colsample_bytree': 0.8735029928337524, 'gamma': 1.7697188991160067}. Best is trial 1 with value: 1.4626650908350682.
[I 2025-07-17 17:43:44,240] Trial 2 finished with value: 2.098376255483186 and parameters: {'n_estimators': 652, 'learning_rate': 0.10100081820193359, 'max_depth': 12, 'min_child_weight': 3, 'subsample': 0.6351586186479496, 'colsample_bytree': 0.6093060347955963, 'gamma': 4.064821434770126}. Best is trial 1 with val

Best MAPE for BlendProperty7: 1.2198849623413228
Best hyperparameters for BlendProperty7: {'n_estimators': 587, 'learning_rate': 0.26888176756869925, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.9747746996817819, 'colsample_bytree': 0.7337945709830314, 'gamma': 0.05681337665584674}


[I 2025-07-17 17:54:30,529] A new study created in memory with name: no-name-15271703-51d9-44f6-a0da-e2ca156a6b70


Saved best model for BlendProperty7

--- Tuning and Training for BlendProperty8 ---



[I 2025-07-17 17:54:45,601] Trial 0 finished with value: 1.2485590337982455 and parameters: {'n_estimators': 876, 'learning_rate': 0.19482341825088406, 'max_depth': 11, 'min_child_weight': 2, 'subsample': 0.6088347737785645, 'colsample_bytree': 0.655691660531004, 'gamma': 2.461581038793241}. Best is trial 0 with value: 1.2485590337982455.
[I 2025-07-17 17:54:57,380] Trial 1 finished with value: 1.3799154002310758 and parameters: {'n_estimators': 737, 'learning_rate': 0.24591615221598187, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.6108944376025846, 'colsample_bytree': 0.8211809621498368, 'gamma': 3.642968840402669}. Best is trial 0 with value: 1.2485590337982455.
[I 2025-07-17 17:55:00,138] Trial 2 finished with value: 1.3247952203252364 and parameters: {'n_estimators': 103, 'learning_rate': 0.2772063067505972, 'max_depth': 12, 'min_child_weight': 9, 'subsample': 0.858882116068183, 'colsample_bytree': 0.7212230519346988, 'gamma': 4.211789772012671}. Best is trial 0 with value

Best MAPE for BlendProperty8: 0.7967264024736528
Best hyperparameters for BlendProperty8: {'n_estimators': 678, 'learning_rate': 0.029224777284116384, 'max_depth': 4, 'min_child_weight': 5, 'subsample': 0.8997055411216859, 'colsample_bytree': 0.7787836129351704, 'gamma': 0.13612339777384586}


[I 2025-07-17 18:08:56,642] A new study created in memory with name: no-name-ac6aafba-eae6-4a95-bc9b-220221726596


Saved best model for BlendProperty8

--- Tuning and Training for BlendProperty9 ---



[I 2025-07-17 18:09:09,310] Trial 0 finished with value: 1.791171151045767 and parameters: {'n_estimators': 568, 'learning_rate': 0.07458914355215793, 'max_depth': 8, 'min_child_weight': 10, 'subsample': 0.9130764727830984, 'colsample_bytree': 0.9740912843364218, 'gamma': 1.7654550043522932}. Best is trial 0 with value: 1.791171151045767.
[I 2025-07-17 18:09:16,985] Trial 1 finished with value: 2.457651958067513 and parameters: {'n_estimators': 351, 'learning_rate': 0.11997623624650224, 'max_depth': 11, 'min_child_weight': 1, 'subsample': 0.9199540319474054, 'colsample_bytree': 0.9263540073330665, 'gamma': 3.566315932635696}. Best is trial 0 with value: 1.791171151045767.
[I 2025-07-17 18:09:26,774] Trial 2 finished with value: 2.154630218807285 and parameters: {'n_estimators': 138, 'learning_rate': 0.1840764712119422, 'max_depth': 7, 'min_child_weight': 8, 'subsample': 0.8172216072143054, 'colsample_bytree': 0.7448866196674826, 'gamma': 1.17107665406244}. Best is trial 0 with value: 1

Best MAPE for BlendProperty9: 1.3516219629808948
Best hyperparameters for BlendProperty9: {'n_estimators': 885, 'learning_rate': 0.23408234168348205, 'max_depth': 6, 'min_child_weight': 8, 'subsample': 0.9911039792425904, 'colsample_bytree': 0.8419116720134416, 'gamma': 0.7654110438982729}


[I 2025-07-17 18:20:54,639] A new study created in memory with name: no-name-c04db0d1-e22b-483f-9859-ba7643ecd6cd


Saved best model for BlendProperty9

--- Tuning and Training for BlendProperty10 ---



[I 2025-07-17 18:21:09,646] Trial 0 finished with value: 1.0697747961209716 and parameters: {'n_estimators': 568, 'learning_rate': 0.019940691194459945, 'max_depth': 6, 'min_child_weight': 9, 'subsample': 0.8699696470090028, 'colsample_bytree': 0.8108631391641794, 'gamma': 3.898943217067572}. Best is trial 0 with value: 1.0697747961209716.
[I 2025-07-17 18:21:28,119] Trial 1 finished with value: 0.855628217458532 and parameters: {'n_estimators': 682, 'learning_rate': 0.11312208814015466, 'max_depth': 12, 'min_child_weight': 9, 'subsample': 0.87801494787687, 'colsample_bytree': 0.8665409253278858, 'gamma': 0.4021099346585949}. Best is trial 1 with value: 0.855628217458532.
[I 2025-07-17 18:21:37,978] Trial 2 finished with value: 1.2493656722099664 and parameters: {'n_estimators': 586, 'learning_rate': 0.2738136923737458, 'max_depth': 7, 'min_child_weight': 10, 'subsample': 0.7275870502589096, 'colsample_bytree': 0.9929610356111745, 'gamma': 3.7107354815195475}. Best is trial 1 with valu

Best MAPE for BlendProperty10: 0.6608718107497615
Best hyperparameters for BlendProperty10: {'n_estimators': 482, 'learning_rate': 0.09340848852417348, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.6402726971436804, 'colsample_bytree': 0.8574469196867966, 'gamma': 0.4634690677020734}
Saved best model for BlendProperty10


### 3. Leaderboard Score

In [4]:
def calculate_leaderboard_score(mape, reference_cost=2.72):
    """
    Calculates the leaderboard score based on the MAPE.

    Parameters:
      mape (float): The Mean Absolute Percentage Error.
      reference_cost (float): The reference cost for the leaderboard.

    Returns:
      float: The calculated leaderboard score.
    """
    return max(10, 100 - 90 * mape / reference_cost)

# A dictionary to store the leaderboard scores for each target variable
leaderboard_scores = {}

# Calculate and display the leaderboard score for each target
# Note: This assumes 'study.best_value' holds the MAPE for the last tuned model.
# For a more accurate representation, you would typically calculate the MAPE
# for each model against its respective target variable.
for target in targets.columns:
    # You would replace `study.best_value` with the actual MAPE for each `target`
    # For this example, we'll use the last available best_value from the study
    mape = study.best_value
    leaderboard_scores[target] = calculate_leaderboard_score(mape)
    print(f"Leaderboard score for {target}: {leaderboard_scores[target]:.2f}")

# Calculate and display the average leaderboard score
average_score = np.mean(list(leaderboard_scores.values()))
print(f"\nAverage Leaderboard Score: {average_score:.2f}")

Leaderboard score for BlendProperty1: 78.13
Leaderboard score for BlendProperty2: 78.13
Leaderboard score for BlendProperty3: 78.13
Leaderboard score for BlendProperty4: 78.13
Leaderboard score for BlendProperty5: 78.13
Leaderboard score for BlendProperty6: 78.13
Leaderboard score for BlendProperty7: 78.13
Leaderboard score for BlendProperty8: 78.13
Leaderboard score for BlendProperty9: 78.13
Leaderboard score for BlendProperty10: 78.13

Average Leaderboard Score: 78.13


### 4. Predict the Blend Properties

In [5]:
# Load the test dataset
test_df = pd.read_csv(data_path / "X_test.csv")

# --- Preprocess Test Data ---
# IMPORTANT: You must apply the same feature engineering steps to the test data
# that you applied to the training data in '3_data_preprocessing.ipynb'.
# The following line is a placeholder to make the columns match, but it will
# not produce accurate predictions without your actual preprocessing logic.
X_test = test_df.reindex(columns=features.columns, fill_value=0)

print("Test data loaded and preprocessed")
print(f"X_test shape: {X_test.shape}")

Test data loaded and preprocessed
X_test shape: (500, 157)


In [6]:
# --- Prediction ---
predictions = {}
for target in targets.columns:
    print(f"Predicting {target}...")
    model = best_models[target]
    predictions[target] = model.predict(X_test)

Predicting BlendProperty1...
Predicting BlendProperty2...
Predicting BlendProperty3...
Predicting BlendProperty4...
Predicting BlendProperty5...
Predicting BlendProperty6...
Predicting BlendProperty7...
Predicting BlendProperty8...
Predicting BlendProperty9...
Predicting BlendProperty10...


### 5. Create Submission File

In [7]:
# --- Create Submission File ---
submission_dir = Path("../submissions")
submission_dir.mkdir(parents=True, exist_ok=True)

submission_df = pd.DataFrame({'ID': test_df['ID']})
for target in targets.columns:
    submission_df[target] = predictions[target]

submission_df.to_csv(f'{submission_dir}/xgb_submission.csv', index=False)
print(f"\nSubmission file {submission_dir}/xgb_submission.csv created successfully!")


Submission file ../submissions/xgb_submission.csv created successfully!
