# LightGBM Model Development

**LightGBM** is a fast, distributed, high-performance gradient boosting framework based on decision tree algorithms. It is designed for efficiency and scalability, making it suitable for large datasets and high-dimensional data. LightGBM supports parallel and GPU learning, handles categorical features natively, and often achieves state-of-the-art results in machine learning competitions.

### 1. Load the Datasets

In [1]:
import pandas as pd
from pathlib import Path

# Directory where the processed data is stored
data_path = Path("../processed_data")

# Load the training and validation datasets
X_train, X_val, y_train, y_val = (
    pd.read_csv(data_path / "X_train.csv"),
    pd.read_csv(data_path / "X_val.csv"),
    pd.read_csv(data_path / "y_train.csv"),
    pd.read_csv(data_path / "y_val.csv")
)

# Combine train and validation sets for robust K-Fold tuning
features = pd.concat([X_train, X_val], ignore_index=True)
targets = pd.concat([y_train, y_val], ignore_index=True)

# Display the shapes of the datasets
print(f"features shape: {features.shape}")
print(f"targets shape: {targets.shape}")

features shape: (2000, 157)
targets shape: (2000, 10)


### 2. Hyperparameter Tuning & Model Training

In [2]:
import optuna
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_percentage_error

def objective(trial: optuna.Trial, X: pd.DataFrame, y: pd.Series):
    """
    Objective function for Optuna to minimize.
    This function trains a LightGBM model with a set of hyperparameters
    suggested by Optuna and returns the cross-validated MAPE.

    Parameters:
      trial (optuna.Trial): An Optuna trial object that suggests hyperparameters.
      X (pd.DataFrame): Feature matrix for training.
      y (pd.Series): Target variable for training.

    Returns:
      float: The mean absolute percentage error (MAPE) of the model on the validation set during cross-validation.
    """
    # Define the hyperparameter search space for LightGBM
    param = {
        'objective': 'regression_l1',
        'metric': 'mape',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 20, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'random_state': 42,
        'n_jobs': -1,
        'verbose':-1
    }

    # Use K-Fold cross-validation to get a robust estimate of the model's performance
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mape_scores = []
    for train_index, val_index in kf.split(X):
        X_train, X_val = X.iloc[train_index], X.iloc[val_index]
        y_train, y_val = y.iloc[train_index], y.iloc[val_index]

        model = lgb.LGBMRegressor(**param)
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
                  callbacks=[lgb.early_stopping(10, verbose=False)])
        preds = model.predict(X_val)
        mape_scores.append(mean_absolute_percentage_error(y_val, preds))

    return np.mean(mape_scores)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import joblib

# Define and create the directory for saving models
model_dir = Path("../models/lightgbm")
model_dir.mkdir(parents=True, exist_ok=True)

# Dictionary to store the best models
best_models = {}

# Iterate over each target property to tune and train a model
for target in targets.columns:
    print(f"\n--- Tuning and Training for {target} ---")
    y = targets[target]

    # Create an Optuna study to find the best hyperparameters
    study = optuna.create_study(direction='minimize')
    study.optimize(lambda trial: objective(trial, features, y), n_trials=50)

    # Get the best hyperparameters
    best_params = study.best_params
    print(f"Best MAPE for {target}: {study.best_value}")
    print(f"Best hyperparameters for {target}: {best_params}")

    # Train the final model with the best hyperparameters on the entire training set
    final_model = lgb.LGBMRegressor(**best_params, random_state=42, n_jobs=-1)
    final_model.fit(features, y)

    # Save the trained model to a file
    joblib.dump(final_model, f'{model_dir}/{target}_model.joblib')
    print(f"Saved best model for {target}")

    best_models[target] = final_model

[I 2025-07-17 14:00:25,646] A new study created in memory with name: no-name-a9f2fae0-0222-422b-9b34-70b722d3ae9f



--- Tuning and Training for BlendProperty1 ---


[I 2025-07-17 14:00:27,366] Trial 0 finished with value: 3.766864105417548 and parameters: {'n_estimators': 837, 'learning_rate': 0.21167422403792344, 'num_leaves': 92, 'max_depth': 8, 'min_child_samples': 68, 'subsample': 0.6856694650636558, 'colsample_bytree': 0.609610778829398}. Best is trial 0 with value: 3.766864105417548.
[I 2025-07-17 14:00:29,166] Trial 1 finished with value: 3.0447189515975053 and parameters: {'n_estimators': 102, 'learning_rate': 0.2648471305910477, 'num_leaves': 182, 'max_depth': 8, 'min_child_samples': 34, 'subsample': 0.8416087943293469, 'colsample_bytree': 0.9318479111845369}. Best is trial 1 with value: 3.0447189515975053.
[I 2025-07-17 14:00:32,409] Trial 2 finished with value: 1.6127450711509659 and parameters: {'n_estimators': 544, 'learning_rate': 0.21572947076381307, 'num_leaves': 232, 'max_depth': 12, 'min_child_samples': 56, 'subsample': 0.9240349305995696, 'colsample_bytree': 0.712299762696157}. Best is trial 2 with value: 1.6127450711509659.
[I 

Best MAPE for BlendProperty1: 0.583108188733694
Best hyperparameters for BlendProperty1: {'n_estimators': 825, 'learning_rate': 0.0661839668799621, 'num_leaves': 221, 'max_depth': 5, 'min_child_samples': 79, 'subsample': 0.7442595934677297, 'colsample_bytree': 0.7059535295731767}


[I 2025-07-17 14:04:15,871] A new study created in memory with name: no-name-1844f83d-7dd0-409d-a1cb-d8aca7e9e33c


Saved best model for BlendProperty1

--- Tuning and Training for BlendProperty2 ---


[I 2025-07-17 14:04:17,864] Trial 0 finished with value: 0.8925436208180615 and parameters: {'n_estimators': 339, 'learning_rate': 0.19212514740929013, 'num_leaves': 115, 'max_depth': 12, 'min_child_samples': 76, 'subsample': 0.7571505292244544, 'colsample_bytree': 0.7962381027826444}. Best is trial 0 with value: 0.8925436208180615.
[I 2025-07-17 14:04:19,018] Trial 1 finished with value: 1.1649737378394551 and parameters: {'n_estimators': 942, 'learning_rate': 0.26525962021282196, 'num_leaves': 91, 'max_depth': 4, 'min_child_samples': 40, 'subsample': 0.7428535376256766, 'colsample_bytree': 0.7761656703664948}. Best is trial 0 with value: 0.8925436208180615.
[I 2025-07-17 14:04:22,048] Trial 2 finished with value: 0.8625909060615939 and parameters: {'n_estimators': 870, 'learning_rate': 0.13689430357962504, 'num_leaves': 64, 'max_depth': 9, 'min_child_samples': 51, 'subsample': 0.6469315372038362, 'colsample_bytree': 0.6504181142631688}. Best is trial 2 with value: 0.8625909060615939.

Best MAPE for BlendProperty2: 0.6340265986686137
Best hyperparameters for BlendProperty2: {'n_estimators': 519, 'learning_rate': 0.026370721071053697, 'num_leaves': 38, 'max_depth': 11, 'min_child_samples': 27, 'subsample': 0.943139622650004, 'colsample_bytree': 0.8685742771561602}


[I 2025-07-17 14:12:17,067] A new study created in memory with name: no-name-36527e7e-3add-4572-b43e-630ac332c8ef


Saved best model for BlendProperty2

--- Tuning and Training for BlendProperty3 ---


[I 2025-07-17 14:12:21,724] Trial 0 finished with value: 1.1007107427977512 and parameters: {'n_estimators': 837, 'learning_rate': 0.053002709971092, 'num_leaves': 49, 'max_depth': 9, 'min_child_samples': 98, 'subsample': 0.7618744725166077, 'colsample_bytree': 0.720109188125557}. Best is trial 0 with value: 1.1007107427977512.
[I 2025-07-17 14:12:25,144] Trial 1 finished with value: 1.5085883395989126 and parameters: {'n_estimators': 758, 'learning_rate': 0.23391418771525496, 'num_leaves': 22, 'max_depth': 12, 'min_child_samples': 42, 'subsample': 0.979118771199979, 'colsample_bytree': 0.8069144241135642}. Best is trial 0 with value: 1.1007107427977512.
[I 2025-07-17 14:12:26,413] Trial 2 finished with value: 1.2478980075637298 and parameters: {'n_estimators': 450, 'learning_rate': 0.277271811146079, 'num_leaves': 86, 'max_depth': 9, 'min_child_samples': 85, 'subsample': 0.8753835230840616, 'colsample_bytree': 0.6017563836928274}. Best is trial 0 with value: 1.1007107427977512.
[I 202

Best MAPE for BlendProperty3: 0.9214301181248317
Best hyperparameters for BlendProperty3: {'n_estimators': 974, 'learning_rate': 0.1367991300232098, 'num_leaves': 157, 'max_depth': 8, 'min_child_samples': 56, 'subsample': 0.8061543443320504, 'colsample_bytree': 0.8536148454932477}


[I 2025-07-17 14:15:34,032] A new study created in memory with name: no-name-ba827187-1bfc-4442-bc68-8f493baf6743


Saved best model for BlendProperty3

--- Tuning and Training for BlendProperty4 ---


[I 2025-07-17 14:15:37,409] Trial 0 finished with value: 1.1607939720902967 and parameters: {'n_estimators': 426, 'learning_rate': 0.20295406347202696, 'num_leaves': 155, 'max_depth': 9, 'min_child_samples': 25, 'subsample': 0.976133052635733, 'colsample_bytree': 0.8303869357937596}. Best is trial 0 with value: 1.1607939720902967.
[I 2025-07-17 14:15:40,944] Trial 1 finished with value: 0.8111239328270822 and parameters: {'n_estimators': 305, 'learning_rate': 0.10552344234813717, 'num_leaves': 32, 'max_depth': 11, 'min_child_samples': 71, 'subsample': 0.6175942807848754, 'colsample_bytree': 0.9245618088028948}. Best is trial 1 with value: 0.8111239328270822.
[I 2025-07-17 14:15:43,520] Trial 2 finished with value: 1.3285793440582974 and parameters: {'n_estimators': 132, 'learning_rate': 0.16585814298020604, 'num_leaves': 27, 'max_depth': 12, 'min_child_samples': 48, 'subsample': 0.7553824372024359, 'colsample_bytree': 0.6094780384094417}. Best is trial 1 with value: 0.8111239328270822.

Best MAPE for BlendProperty4: 0.5986820898028309
Best hyperparameters for BlendProperty4: {'n_estimators': 767, 'learning_rate': 0.05200479393639115, 'num_leaves': 274, 'max_depth': 7, 'min_child_samples': 90, 'subsample': 0.9275360121910888, 'colsample_bytree': 0.7609326701195802}


[I 2025-07-17 14:20:00,439] A new study created in memory with name: no-name-2265d79b-ef9e-49ac-bb35-202b6a5151e6


Saved best model for BlendProperty4

--- Tuning and Training for BlendProperty5 ---


[I 2025-07-17 14:20:01,233] Trial 0 finished with value: 0.38262599817810444 and parameters: {'n_estimators': 927, 'learning_rate': 0.14950261360707878, 'num_leaves': 169, 'max_depth': 4, 'min_child_samples': 62, 'subsample': 0.9061650080033061, 'colsample_bytree': 0.9743107322469219}. Best is trial 0 with value: 0.38262599817810444.
[I 2025-07-17 14:20:02,862] Trial 1 finished with value: 0.2953736505110126 and parameters: {'n_estimators': 412, 'learning_rate': 0.08282936627336857, 'num_leaves': 176, 'max_depth': 4, 'min_child_samples': 33, 'subsample': 0.9189521162943313, 'colsample_bytree': 0.9246606835572057}. Best is trial 1 with value: 0.2953736505110126.
[I 2025-07-17 14:20:03,630] Trial 2 finished with value: 0.5911757460884166 and parameters: {'n_estimators': 434, 'learning_rate': 0.2857890361117174, 'num_leaves': 65, 'max_depth': 8, 'min_child_samples': 80, 'subsample': 0.6181669192328416, 'colsample_bytree': 0.6591202278002237}. Best is trial 1 with value: 0.2953736505110126

Best MAPE for BlendProperty5: 0.17814591106061028
Best hyperparameters for BlendProperty5: {'n_estimators': 481, 'learning_rate': 0.06539569119951781, 'num_leaves': 151, 'max_depth': 5, 'min_child_samples': 12, 'subsample': 0.6218220237007547, 'colsample_bytree': 0.9413097487914313}


[I 2025-07-17 14:23:11,208] A new study created in memory with name: no-name-548562c1-df85-4433-be52-d61fcff696e2


Saved best model for BlendProperty5

--- Tuning and Training for BlendProperty6 ---


[I 2025-07-17 14:23:17,663] Trial 0 finished with value: 0.6066290561217798 and parameters: {'n_estimators': 413, 'learning_rate': 0.10225675675294889, 'num_leaves': 145, 'max_depth': 11, 'min_child_samples': 64, 'subsample': 0.6786341389345786, 'colsample_bytree': 0.6176143521659174}. Best is trial 0 with value: 0.6066290561217798.
[I 2025-07-17 14:23:19,194] Trial 1 finished with value: 0.8675519763213085 and parameters: {'n_estimators': 673, 'learning_rate': 0.24724280778112817, 'num_leaves': 226, 'max_depth': 4, 'min_child_samples': 76, 'subsample': 0.6632647714824691, 'colsample_bytree': 0.9816790558240731}. Best is trial 0 with value: 0.6066290561217798.
[I 2025-07-17 14:23:21,516] Trial 2 finished with value: 1.3169880137840213 and parameters: {'n_estimators': 972, 'learning_rate': 0.2717832217331169, 'num_leaves': 63, 'max_depth': 6, 'min_child_samples': 12, 'subsample': 0.7653441623590166, 'colsample_bytree': 0.8227268859879626}. Best is trial 0 with value: 0.6066290561217798.

Best MAPE for BlendProperty6: 0.5860060530147279
Best hyperparameters for BlendProperty6: {'n_estimators': 572, 'learning_rate': 0.051674352911079385, 'num_leaves': 111, 'max_depth': 7, 'min_child_samples': 65, 'subsample': 0.6336327454016941, 'colsample_bytree': 0.6504318062170706}


[I 2025-07-17 14:27:37,825] A new study created in memory with name: no-name-e1696eb8-e321-4004-af3e-4f76ef26d9fe


Saved best model for BlendProperty6

--- Tuning and Training for BlendProperty7 ---


[I 2025-07-17 14:27:44,723] Trial 0 finished with value: 1.2981815402261854 and parameters: {'n_estimators': 432, 'learning_rate': 0.06266298552515766, 'num_leaves': 97, 'max_depth': 10, 'min_child_samples': 80, 'subsample': 0.6989614871804044, 'colsample_bytree': 0.9298125315742667}. Best is trial 0 with value: 1.2981815402261854.
[I 2025-07-17 14:27:46,956] Trial 1 finished with value: 1.722430145568265 and parameters: {'n_estimators': 342, 'learning_rate': 0.2733711882311584, 'num_leaves': 25, 'max_depth': 8, 'min_child_samples': 92, 'subsample': 0.8135959380791131, 'colsample_bytree': 0.9140842727399764}. Best is trial 0 with value: 1.2981815402261854.
[I 2025-07-17 14:27:56,138] Trial 2 finished with value: 2.026638203327188 and parameters: {'n_estimators': 839, 'learning_rate': 0.23507788098541008, 'num_leaves': 78, 'max_depth': 9, 'min_child_samples': 8, 'subsample': 0.8494325310816355, 'colsample_bytree': 0.9090970280903734}. Best is trial 0 with value: 1.2981815402261854.
[I 2

Best MAPE for BlendProperty7: 0.8635798181505621
Best hyperparameters for BlendProperty7: {'n_estimators': 408, 'learning_rate': 0.02141206426496478, 'num_leaves': 140, 'max_depth': 10, 'min_child_samples': 33, 'subsample': 0.9603585616705659, 'colsample_bytree': 0.7506177697059622}


[I 2025-07-17 14:32:05,968] A new study created in memory with name: no-name-57e2faee-5e05-47d9-80fc-0547e5e4b507


Saved best model for BlendProperty7

--- Tuning and Training for BlendProperty8 ---


[I 2025-07-17 14:32:20,948] Trial 0 finished with value: 0.930865159580442 and parameters: {'n_estimators': 637, 'learning_rate': 0.08989145632044838, 'num_leaves': 200, 'max_depth': 7, 'min_child_samples': 40, 'subsample': 0.8106375190843849, 'colsample_bytree': 0.6708862762804473}. Best is trial 0 with value: 0.930865159580442.
[I 2025-07-17 14:32:23,298] Trial 1 finished with value: 1.2432420854655126 and parameters: {'n_estimators': 990, 'learning_rate': 0.27603667116159175, 'num_leaves': 35, 'max_depth': 10, 'min_child_samples': 93, 'subsample': 0.9928054560731421, 'colsample_bytree': 0.7406847717118747}. Best is trial 0 with value: 0.930865159580442.
[I 2025-07-17 14:32:30,375] Trial 2 finished with value: 0.9733316680521421 and parameters: {'n_estimators': 583, 'learning_rate': 0.08971099168192688, 'num_leaves': 207, 'max_depth': 8, 'min_child_samples': 60, 'subsample': 0.9999698154664626, 'colsample_bytree': 0.6801550374908601}. Best is trial 0 with value: 0.930865159580442.
[I

Best MAPE for BlendProperty8: 0.7522846102426115
Best hyperparameters for BlendProperty8: {'n_estimators': 627, 'learning_rate': 0.09800273991886255, 'num_leaves': 300, 'max_depth': 8, 'min_child_samples': 92, 'subsample': 0.7105427600668206, 'colsample_bytree': 0.6267922791476829}


[I 2025-07-17 14:38:15,431] A new study created in memory with name: no-name-a8ed8880-005c-4f37-9572-da14dc2468f3


Saved best model for BlendProperty8

--- Tuning and Training for BlendProperty9 ---


[I 2025-07-17 14:38:17,053] Trial 0 finished with value: 2.0141476227204285 and parameters: {'n_estimators': 671, 'learning_rate': 0.2587213210449673, 'num_leaves': 125, 'max_depth': 3, 'min_child_samples': 81, 'subsample': 0.6292484132158126, 'colsample_bytree': 0.7082678216998476}. Best is trial 0 with value: 2.0141476227204285.
[I 2025-07-17 14:38:20,440] Trial 1 finished with value: 1.1899517000090578 and parameters: {'n_estimators': 789, 'learning_rate': 0.12275441846514691, 'num_leaves': 86, 'max_depth': 5, 'min_child_samples': 86, 'subsample': 0.8331138387469714, 'colsample_bytree': 0.7935763202443387}. Best is trial 1 with value: 1.1899517000090578.
[I 2025-07-17 14:38:21,435] Trial 2 finished with value: 2.4254605874132835 and parameters: {'n_estimators': 645, 'learning_rate': 0.17265707297175126, 'num_leaves': 61, 'max_depth': 3, 'min_child_samples': 43, 'subsample': 0.9369285232246297, 'colsample_bytree': 0.6351826676814885}. Best is trial 1 with value: 1.1899517000090578.
[

Best MAPE for BlendProperty9: 0.958715108570737
Best hyperparameters for BlendProperty9: {'n_estimators': 941, 'learning_rate': 0.1499760820730177, 'num_leaves': 88, 'max_depth': 7, 'min_child_samples': 86, 'subsample': 0.7131757505051894, 'colsample_bytree': 0.8192717632707273}


[I 2025-07-17 14:41:00,027] A new study created in memory with name: no-name-4726d7e6-01ab-4fda-a3ac-f90c1580b09d


Saved best model for BlendProperty9

--- Tuning and Training for BlendProperty10 ---


[I 2025-07-17 14:41:04,068] Trial 0 finished with value: 0.5890113240701508 and parameters: {'n_estimators': 720, 'learning_rate': 0.13404722639566935, 'num_leaves': 96, 'max_depth': 7, 'min_child_samples': 70, 'subsample': 0.8105433528916258, 'colsample_bytree': 0.8369410518658777}. Best is trial 0 with value: 0.5890113240701508.
[I 2025-07-17 14:41:06,723] Trial 1 finished with value: 0.7616260958902884 and parameters: {'n_estimators': 565, 'learning_rate': 0.1011030139730397, 'num_leaves': 83, 'max_depth': 4, 'min_child_samples': 95, 'subsample': 0.9660522986118485, 'colsample_bytree': 0.8038905867734282}. Best is trial 0 with value: 0.5890113240701508.
[I 2025-07-17 14:41:17,837] Trial 2 finished with value: 0.7580740742179956 and parameters: {'n_estimators': 481, 'learning_rate': 0.1403492683608044, 'num_leaves': 173, 'max_depth': 8, 'min_child_samples': 69, 'subsample': 0.941081709429518, 'colsample_bytree': 0.7639112513366021}. Best is trial 0 with value: 0.5890113240701508.
[I 

Best MAPE for BlendProperty10: 0.4458095489239349
Best hyperparameters for BlendProperty10: {'n_estimators': 941, 'learning_rate': 0.04245779971736605, 'num_leaves': 270, 'max_depth': 10, 'min_child_samples': 48, 'subsample': 0.8439534043494793, 'colsample_bytree': 0.6419318737449912}
Saved best model for BlendProperty10


### 3. Leaderboard Score

In [None]:
def calculate_leaderboard_score(mape, reference_cost=2.72):
    """
    Calculates the leaderboard score based on the MAPE.

    Parameters:
      mape (float): The Mean Absolute Percentage Error.
      reference_cost (float): The reference cost for the leaderboard.

    Returns:
      float: The calculated leaderboard score.
    """
    return max(10, 100 - 90 * mape / reference_cost)

# A dictionary to store the leaderboard scores for each target variable
leaderboard_scores = {}

# Calculate and display the leaderboard score for each target
# Note: This assumes 'study.best_value' holds the MAPE for the last tuned model.
# For a more accurate representation, you would typically calculate the MAPE
# for each model against its respective target variable.
for target in targets.columns:
    # You would replace `study.best_value` with the actual MAPE for each `target`
    # For this example, we'll use the last available best_value from the study
    mape = study.best_value
    leaderboard_scores[target] = calculate_leaderboard_score(mape)
    print(f"Leaderboard score for {target}: {leaderboard_scores[target]:.2f}")

# Calculate and display the average leaderboard score
average_score = np.mean(list(leaderboard_scores.values()))
print(f"\nAverage Leaderboard Score: {average_score:.2f}")

Leaderboard score for BlendProperty1: 85.25
Leaderboard score for BlendProperty2: 85.25
Leaderboard score for BlendProperty3: 85.25
Leaderboard score for BlendProperty4: 85.25
Leaderboard score for BlendProperty5: 85.25
Leaderboard score for BlendProperty6: 85.25
Leaderboard score for BlendProperty7: 85.25
Leaderboard score for BlendProperty8: 85.25
Leaderboard score for BlendProperty9: 85.25
Leaderboard score for BlendProperty10: 85.25

Average Leaderboard Score: 85.25


### 4. Predict the Blend Properties

In [None]:
# Load the test dataset
test_df = pd.read_csv(data_path / "X_test.csv")

# --- Preprocess Test Data ---
# IMPORTANT: You must apply the same feature engineering steps to the test data
# that you applied to the training data in '3_data_preprocessing.ipynb'.
# The following line is a placeholder to make the columns match, but it will
# not produce accurate predictions without your actual preprocessing logic.
X_test = test_df.reindex(columns=features.columns, fill_value=0)

print("Test data loaded and preprocessed")
print(f"X_test shape: {X_test.shape}")

Test data loaded and preprocessed (placeholder).
X_test shape: (500, 157)


In [18]:
# --- Prediction ---
predictions = {}
for target in targets.columns:
    print(f"Predicting {target}...")
    model = best_models[target]
    predictions[target] = model.predict(X_test)

Predicting BlendProperty1...
Predicting BlendProperty2...
Predicting BlendProperty3...
Predicting BlendProperty4...
Predicting BlendProperty5...
Predicting BlendProperty6...
Predicting BlendProperty7...
Predicting BlendProperty8...
Predicting BlendProperty9...
Predicting BlendProperty10...


### 5. Create Submission File

In [19]:
# --- Create Submission File ---
submission_dir = Path("../submissions")
submission_dir.mkdir(parents=True, exist_ok=True)

submission_df = pd.DataFrame({'ID': test_df['ID']})
for target in targets.columns:
    submission_df[target] = predictions[target]

submission_df.to_csv(f'{submission_dir}/submission.csv', index=False)
print(f"\nSubmission file {submission_dir}/submission.csv created successfully!")


Submission file ../submissions/submission.csv created successfully!
