# 1. Install requirements

In [1]:
%pip install pandas
%pip install scikit-learn
%pip install xgboost
%pip install optuna
%pip install joblib

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49

# 2. Add column names to dataset

In [86]:
import pandas as pd

wilderness_columns = []
soil_columns = []
other_columns = [
    "Elevation",
    "Aspect",
    "Slope",
    "Horizontal_Distance_To_Hydrology",
    "Vertical_Distance_To_Hydrology",
    "Horizontal_Distance_To_Roadways",
    "Hillshade_9am",
    "Hillshade_Noon",
    "Hillshade_3pm",
    "Horizontal_Distance_To_Fire_Points",
]



for i in range(1,5):
    wilderness_columns += [f"Wilderness_Area_{i:02d}"]

for i in range(1,41):
    soil_columns += [f"Soil_Type_{i:02d}"]

column_names = other_columns + wilderness_columns + soil_columns + ["Cover_Type"]

df = pd.read_csv("covtype.csv", header=None)

df.columns = column_names

df['Cover_Type'] = df['Cover_Type'] - 1


## Ensure wilderness areas and soil types columns are of type one-hot

In [88]:

# Wilderness areas columns can have only 4 possible combinations
wilderness = df[wilderness_columns].drop_duplicates()
if len(wilderness) == 4:
    print("Wilderness areas columns are ok.")
else:
    print("Wilderness areas columns are NOT ok.")

# Soil types columns can have only 40 possible combinations
soil = df[soil_columns].drop_duplicates()
if len(soil) == 40:
    print("Soil types columns are ok.")
else:
    print("Soil type columns are NOT ok.")

Wilderness areas columns are ok.
Soil types columns are ok.


## Show some stats on the other columns to check is there's something missing

In [357]:
display(df[other_columns].describe())
display(df['Cover_Type'].value_counts())

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points
count,581012.0,581012.0,581012.0,581012.0,581012.0,581012.0,581012.0,581012.0,581012.0,581012.0
mean,2959.365301,155.656807,14.103704,269.428217,46.418855,2350.146611,212.146049,223.318716,142.528263,1980.291226
std,279.984734,111.913721,7.488242,212.549356,58.295232,1559.25487,26.769889,19.768697,38.274529,1324.19521
min,1859.0,0.0,0.0,0.0,-173.0,0.0,0.0,0.0,0.0,0.0
25%,2809.0,58.0,9.0,108.0,7.0,1106.0,198.0,213.0,119.0,1024.0
50%,2996.0,127.0,13.0,218.0,30.0,1997.0,218.0,226.0,143.0,1710.0
75%,3163.0,260.0,18.0,384.0,69.0,3328.0,231.0,237.0,168.0,2550.0
max,3858.0,360.0,66.0,1397.0,601.0,7117.0,254.0,254.0,254.0,7173.0


Cover_Type
1    283301
0    211840
2     35754
6     20510
5     17367
4      9493
3      2747
Name: count, dtype: int64

Everything looks fine. EDA finished

## Just for fun: create vectors of a given size

In [140]:
import numpy as np

rng = np.random.default_rng()

def generate_random_one_hot_sets(n_sets=1, size=4):
    random_vectors = np.zeros((n_sets, size), dtype=int)

    random_indices = rng.integers(low=0, high=size, size=n_sets)

    row_indices = np.arange(n_sets)
    random_vectors[row_indices, random_indices] = 1

    return random_vectors

def generate_random_number(min=0, max=10000):
    return rng.integers(low=min, high=max)

# 3. Train XGBoost

In [2]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

X = df.drop(columns=['Cover_Type'])
y = df['Cover_Type']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
scaler = StandardScaler()

xgb_model = xgb.XGBClassifier(
    objective='multi:softmax',
    n_estimators=100,             # Tree's number (estimators)
    learning_rate=0.1,            # Learning rate
    max_depth=6,                  # Max. depth
    eval_metric='merror',         # Classification error
    random_state=42,
    tree_method='hist',          
    device="cuda"                 # Use this if you have NVidia
)

print("Trining XGBoost model...")
xgb_model.fit(scaler.fit_transform(X_train), y_train)
print("Training finished.")

y_pred = xgb_model.predict(scaler.transform(X_test))

Trining XGBoost model...
Training finished.


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)


# 4. Show XGBoost metrics

In [3]:
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy of XGBoost model: {accuracy:.4f}")

print("\nClassification report:")
print(classification_report(y_test, y_pred))


Accuracy of XGBoost model: 0.8108

Classification report:
              precision    recall  f1-score   support

           0       0.81      0.78      0.79     42368
           1       0.81      0.86      0.84     56661
           2       0.79      0.86      0.82      7151
           3       0.83      0.83      0.83       549
           4       0.87      0.31      0.46      1899
           5       0.73      0.49      0.59      3473
           6       0.90      0.81      0.85      4102

    accuracy                           0.81    116203
   macro avg       0.82      0.71      0.74    116203
weighted avg       0.81      0.81      0.81    116203



# 5. Optimize XGBoost

In [7]:
import optuna
import xgboost as xgb
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from optuna.pruners import MedianPruner
from optuna import TrialPruned
from xgboost.callback import EarlyStopping


def objective(trial, X, y):
    # Hyperparameters
    param = {
        'objective': 'multi:softmax',
        'eval_metric': 'merror',
        'n_jobs': -1,
        'random_state': 42,
        'tree_method': 'hist',
        'device': 'cuda',

        # Learning rate
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),

        # Overfitting control
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 1e-8, 1.0, log=True),

        # Bagging-like
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),

        # L1 and L2 regularization
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),  # L2
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True)   # L1
    }

    # Estimators (trees)
    n_estimators = 500

    # Create kfold
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    accuracies = []
    scaler = StandardScaler()

    # callback_es = EarlyStopping(
    #     rounds=20,
    #     metric_name="merror",
    #     data_name="validation_0"
    # )

    fold_step = 0
    for train_index, val_index in kf.split(X, y):
        X_train_fold, X_val_fold = X.iloc[train_index], X.iloc[val_index]
        y_train_fold, y_val_fold = y.iloc[train_index], y.iloc[val_index]

        X_train_scaled = scaler.fit_transform(X_train_fold)
        X_val_scaled = scaler.transform(X_val_fold)

        # Create model
        model = xgb.XGBClassifier(
            **param,
            n_estimators=n_estimators
        )

        # Train model
        model.fit(
            X_train_scaled, y_train_fold,
            eval_set=[(X_val_scaled, y_val_fold)],
            verbose=False # No progress
        )

        # Predict and compute precission
        y_pred = model.predict(X_val_scaled)
        accuracy = accuracy_score(y_val_fold, y_pred)
        accuracies.append(accuracy)

        fold_step += 1
        trial.report(np.mean(accuracies), fold_step)
        if trial.should_prune():
            raise TrialPruned

    # Return the mean of all accuracies to optuna (maximize)
    return np.mean(accuracies)


# Create optuna study
study = optuna.create_study(
    direction='maximize',
    pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=30, interval_steps=10)
)

# Run optimization (it can take a long time)
print("Optuna study is starting...")
study.optimize(
    lambda trial: objective(trial, X_train, y_train),
    n_trials=50, 
    show_progress_bar=True
)

print("\n--- Optuna Results---")
print(f"Best score for cross validation: {study.best_value:.4f}")
print("Best hyperparams:")
print(study.best_params)

  from .autonotebook import tqdm as notebook_tqdm
[I 2025-10-15 16:49:35,758] A new study created in memory with name: no-name-cfafa8fd-19d6-4e53-a9b7-3d0f110a11c9


Optuna study is starting...


Best trial: 0. Best value: 0.958678:   2%|▏         | 1/50 [01:57<1:35:44, 117.24s/it]

[I 2025-10-15 16:51:32,997] Trial 0 finished with value: 0.9586776501667901 and parameters: {'learning_rate': 0.2863414865272773, 'max_depth': 8, 'min_child_weight': 9, 'gamma': 8.325937909892682e-06, 'subsample': 0.9048967594249374, 'colsample_bytree': 0.7115815178224135, 'lambda': 3.170849864792997e-07, 'alpha': 0.04137263265173337}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:   4%|▍         | 2/50 [03:03<1:09:34, 86.96s/it] 

[I 2025-10-15 16:52:38,770] Trial 1 finished with value: 0.7869705568351406 and parameters: {'learning_rate': 0.04099491215107271, 'max_depth': 4, 'min_child_weight': 9, 'gamma': 8.551946244720282e-05, 'subsample': 0.947974669825048, 'colsample_bytree': 0.5257057876139336, 'lambda': 0.02081029322179165, 'alpha': 1.037524529289905e-08}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:   6%|▌         | 3/50 [04:08<1:00:31, 77.26s/it]

[I 2025-10-15 16:53:44,485] Trial 2 finished with value: 0.8040291782835526 and parameters: {'learning_rate': 0.05879069200797752, 'max_depth': 4, 'min_child_weight': 10, 'gamma': 0.001030101950497546, 'subsample': 0.8049775134394102, 'colsample_bytree': 0.5715913231203578, 'lambda': 3.1341231761660583e-06, 'alpha': 0.2697031109088564}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:   8%|▊         | 4/50 [05:21<57:53, 75.51s/it]  

[I 2025-10-15 16:54:57,300] Trial 3 finished with value: 0.7997715129081457 and parameters: {'learning_rate': 0.026295546966650322, 'max_depth': 5, 'min_child_weight': 10, 'gamma': 0.012460038764513882, 'subsample': 0.7023387162695998, 'colsample_bytree': 0.9790511301419875, 'lambda': 1.0193454238451208e-06, 'alpha': 6.690870309957789e-06}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  10%|█         | 5/50 [07:54<1:17:41, 103.60s/it]

[I 2025-10-15 16:57:30,701] Trial 4 finished with value: 0.885899367310261 and parameters: {'learning_rate': 0.01380586692905219, 'max_depth': 10, 'min_child_weight': 10, 'gamma': 0.13610555281404874, 'subsample': 0.8634050548906715, 'colsample_bytree': 0.8567959323620034, 'lambda': 7.061603633923131e-08, 'alpha': 7.619685432004728e-05}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  12%|█▏        | 6/50 [09:39<1:16:18, 104.06s/it]

[I 2025-10-15 16:59:15,668] Trial 5 finished with value: 0.8936918167937089 and parameters: {'learning_rate': 0.03727303232169336, 'max_depth': 8, 'min_child_weight': 8, 'gamma': 0.07844402035985906, 'subsample': 0.6648950386172174, 'colsample_bytree': 0.8504688058384982, 'lambda': 3.3379198793799945e-07, 'alpha': 1.7940518943719674e-08}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  14%|█▍        | 7/50 [12:09<1:25:09, 118.82s/it]

[I 2025-10-15 17:01:44,870] Trial 6 finished with value: 0.9362426295940509 and parameters: {'learning_rate': 0.04439095183126759, 'max_depth': 10, 'min_child_weight': 5, 'gamma': 5.667014595431978e-08, 'subsample': 0.8382452234515322, 'colsample_bytree': 0.7191088671857585, 'lambda': 1.832361774284876e-08, 'alpha': 0.0014303135403441712}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  16%|█▌        | 8/50 [13:35<1:15:56, 108.50s/it]

[I 2025-10-15 17:03:11,271] Trial 7 finished with value: 0.9579999527929264 and parameters: {'learning_rate': 0.224704725831771, 'max_depth': 10, 'min_child_weight': 10, 'gamma': 0.5480679385083945, 'subsample': 0.6526062191974451, 'colsample_bytree': 0.9721538180140233, 'lambda': 2.2122196570338892e-08, 'alpha': 6.447252034003792e-05}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  18%|█▊        | 9/50 [15:45<1:18:41, 115.17s/it]

[I 2025-10-15 17:05:21,112] Trial 8 finished with value: 0.9399861013541907 and parameters: {'learning_rate': 0.06511732163166842, 'max_depth': 9, 'min_child_weight': 9, 'gamma': 0.000413031970297957, 'subsample': 0.536655367581826, 'colsample_bytree': 0.9636311501364296, 'lambda': 0.012250674932704998, 'alpha': 0.005619057223393019}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  20%|██        | 10/50 [17:36<1:15:58, 113.96s/it]

[I 2025-10-15 17:07:12,367] Trial 9 finished with value: 0.9568424874334541 and parameters: {'learning_rate': 0.2057651038308867, 'max_depth': 8, 'min_child_weight': 4, 'gamma': 0.041179908128365895, 'subsample': 0.9180123438999326, 'colsample_bytree': 0.6869026684236934, 'lambda': 0.0034661691331898154, 'alpha': 1.4510221882496312e-06}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  22%|██▏       | 11/50 [18:58<1:07:36, 104.02s/it]

[I 2025-10-15 17:08:33,828] Trial 10 finished with value: 0.8965532087219102 and parameters: {'learning_rate': 0.10954220877276923, 'max_depth': 6, 'min_child_weight': 1, 'gamma': 1.6041604226182293e-06, 'subsample': 0.9835500420507665, 'colsample_bytree': 0.6339428805539755, 'lambda': 9.12295030190173e-05, 'alpha': 0.3325151860465931}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  24%|██▍       | 12/50 [20:50<1:07:29, 106.56s/it]

[I 2025-10-15 17:10:26,195] Trial 11 finished with value: 0.9580236178212761 and parameters: {'learning_rate': 0.27672794820156854, 'max_depth': 8, 'min_child_weight': 7, 'gamma': 5.064549984410635e-06, 'subsample': 0.5940124230365602, 'colsample_bytree': 0.8177756472411569, 'lambda': 1.1003690919270685e-05, 'alpha': 0.004898489081248786}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  26%|██▌       | 13/50 [22:26<1:03:48, 103.47s/it]

[I 2025-10-15 17:12:02,581] Trial 12 finished with value: 0.9344634039034094 and parameters: {'learning_rate': 0.1355255793821698, 'max_depth': 7, 'min_child_weight': 7, 'gamma': 5.26479096461731e-06, 'subsample': 0.521082368727091, 'colsample_bytree': 0.814139184731391, 'lambda': 2.2540244415113722e-05, 'alpha': 0.010952970917855595}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  28%|██▊       | 14/50 [24:04<1:00:57, 101.61s/it]

[I 2025-10-15 17:13:39,868] Trial 13 finished with value: 0.9510250436487981 and parameters: {'learning_rate': 0.2895654309630293, 'max_depth': 7, 'min_child_weight': 7, 'gamma': 2.3889425860940683e-06, 'subsample': 0.6040702475156192, 'colsample_bytree': 0.7921578201686162, 'lambda': 0.0010563893442739436, 'alpha': 0.031300027571530505}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  30%|███       | 15/50 [25:55<1:01:04, 104.70s/it]

[I 2025-10-15 17:15:31,722] Trial 14 finished with value: 0.9432282941091206 and parameters: {'learning_rate': 0.12215564727415262, 'max_depth': 8, 'min_child_weight': 6, 'gamma': 2.2101973168670977e-08, 'subsample': 0.7547025414696052, 'colsample_bytree': 0.6477795828524113, 'lambda': 0.2849105418906789, 'alpha': 0.0007412899065848062}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  32%|███▏      | 16/50 [27:23<56:21, 99.46s/it]   

[I 2025-10-15 17:16:59,036] Trial 15 finished with value: 0.9299045420866403 and parameters: {'learning_rate': 0.1856950105050203, 'max_depth': 6, 'min_child_weight': 3, 'gamma': 2.580711504551348e-05, 'subsample': 0.765337335575701, 'colsample_bytree': 0.7604531869358668, 'lambda': 8.704721126195714e-06, 'alpha': 0.9291055456814213}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 0. Best value: 0.958678:  34%|███▍      | 17/50 [29:32<59:38, 108.43s/it]

[I 2025-10-15 17:19:08,317] Trial 16 finished with value: 0.9476150427744034 and parameters: {'learning_rate': 0.0856525429877515, 'max_depth': 9, 'min_child_weight': 8, 'gamma': 3.9316203760614126e-07, 'subsample': 0.5982608470506531, 'colsample_bytree': 0.8783634595344567, 'lambda': 0.00017157751864857769, 'alpha': 0.05240481120561697}. Best is trial 0 with value: 0.9586776501667901.


Best trial: 17. Best value: 0.964758:  36%|███▌      | 18/50 [31:42<1:01:14, 114.82s/it]

[I 2025-10-15 17:21:18,003] Trial 17 finished with value: 0.9647575671369584 and parameters: {'learning_rate': 0.269884331817183, 'max_depth': 9, 'min_child_weight': 6, 'gamma': 1.64764816219849e-07, 'subsample': 0.8840016456313342, 'colsample_bytree': 0.9063612803509961, 'lambda': 2.1721719794925644e-07, 'alpha': 0.0005348908800940645}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  38%|███▊      | 19/50 [34:01<1:03:08, 122.20s/it]

[I 2025-10-15 17:23:37,392] Trial 18 finished with value: 0.9638109412860139 and parameters: {'learning_rate': 0.15939392332964436, 'max_depth': 9, 'min_child_weight': 2, 'gamma': 1.726279692819244e-07, 'subsample': 0.9059991150360159, 'colsample_bytree': 0.9107016895923068, 'lambda': 2.529037491629934e-07, 'alpha': 5.500577188744619e-06}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  40%|████      | 20/50 [36:25<1:04:19, 128.66s/it]

[I 2025-10-15 17:26:01,120] Trial 19 finished with value: 0.9636797033162363 and parameters: {'learning_rate': 0.1609278552105877, 'max_depth': 9, 'min_child_weight': 1, 'gamma': 1.333706387217639e-07, 'subsample': 0.9945081587029909, 'colsample_bytree': 0.9151373499815614, 'lambda': 1.1861745632748032e-07, 'alpha': 5.499289607905484e-07}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  42%|████▏     | 21/50 [37:19<51:25, 106.38s/it]  

[I 2025-10-15 17:26:55,565] Trial 20 finished with value: 0.7962216708966093 and parameters: {'learning_rate': 0.09394478581338586, 'max_depth': 3, 'min_child_weight': 3, 'gamma': 1.2333509335454877e-08, 'subsample': 0.8663369181662688, 'colsample_bytree': 0.9174272993567133, 'lambda': 2.0986522582566774e-06, 'alpha': 1.106935674647386e-05}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  44%|████▍     | 22/50 [39:43<54:50, 117.54s/it]

[I 2025-10-15 17:29:19,105] Trial 21 finished with value: 0.9642907078367429 and parameters: {'learning_rate': 0.16891569716719715, 'max_depth': 9, 'min_child_weight': 1, 'gamma': 2.213216254753462e-07, 'subsample': 0.9938458292266428, 'colsample_bytree': 0.9224417557855272, 'lambda': 1.1689695038000014e-07, 'alpha': 1.9418906059622482e-07}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  46%|████▌     | 23/50 [42:03<55:58, 124.40s/it]

[I 2025-10-15 17:31:39,503] Trial 22 finished with value: 0.9633118102491725 and parameters: {'learning_rate': 0.1569078524988212, 'max_depth': 9, 'min_child_weight': 2, 'gamma': 3.5911670911419976e-07, 'subsample': 0.9536691031040068, 'colsample_bytree': 0.9178155594783359, 'lambda': 1.2577788061132499e-08, 'alpha': 1.270651761136039e-07}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  48%|████▊     | 24/50 [44:47<58:57, 136.06s/it]

[I 2025-10-15 17:34:22,781] Trial 23 finished with value: 0.9594951901821046 and parameters: {'learning_rate': 0.08038010013324232, 'max_depth': 10, 'min_child_weight': 2, 'gamma': 3.855344958273665e-07, 'subsample': 0.9017083787285637, 'colsample_bytree': 0.8880069456554602, 'lambda': 4.600377687861082e-07, 'alpha': 1.166605477421986e-05}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  50%|█████     | 25/50 [46:58<56:09, 134.79s/it]

[I 2025-10-15 17:36:34,609] Trial 24 finished with value: 0.9645144554983698 and parameters: {'learning_rate': 0.21853066705119553, 'max_depth': 9, 'min_child_weight': 5, 'gamma': 1.0496248236537673e-07, 'subsample': 0.8154357921621854, 'colsample_bytree': 0.9990141191372784, 'lambda': 8.301044366449606e-08, 'alpha': 1.2075142356488063e-07}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  52%|█████▏    | 26/50 [48:37<49:38, 124.09s/it]

[I 2025-10-15 17:38:13,734] Trial 25 finished with value: 0.9506528485120445 and parameters: {'learning_rate': 0.20398282373854235, 'max_depth': 7, 'min_child_weight': 5, 'gamma': 6.73654789295331e-08, 'subsample': 0.8064496029724045, 'colsample_bytree': 0.9970786592103822, 'lambda': 4.342835265759073e-08, 'alpha': 1.514882796730201e-07}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  54%|█████▍    | 27/50 [50:49<48:22, 126.20s/it]

[I 2025-10-15 17:40:24,859] Trial 26 finished with value: 0.9647575664658051 and parameters: {'learning_rate': 0.22547526066783638, 'max_depth': 9, 'min_child_weight': 4, 'gamma': 1.0595376395602962e-08, 'subsample': 0.8076718672609983, 'colsample_bytree': 0.9434470453968499, 'lambda': 1.7304263272506613e-06, 'alpha': 0.0004398096691860603}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  56%|█████▌    | 28/50 [53:19<48:55, 133.44s/it]

[I 2025-10-15 17:42:55,198] Trial 27 finished with value: 0.9021189316370906 and parameters: {'learning_rate': 0.019595184856390776, 'max_depth': 10, 'min_child_weight': 4, 'gamma': 1.1291026777569325e-08, 'subsample': 0.7943384463395673, 'colsample_bytree': 0.9657284797583078, 'lambda': 1.4941659875426006e-06, 'alpha': 0.0006332127675974904}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  58%|█████▊    | 29/50 [54:55<42:49, 122.35s/it]

[I 2025-10-15 17:44:31,657] Trial 28 finished with value: 0.9527978140247277 and parameters: {'learning_rate': 0.24191222818669056, 'max_depth': 7, 'min_child_weight': 6, 'gamma': 4.8147016025356374e-08, 'subsample': 0.7284748188119945, 'colsample_bytree': 0.9558940111953108, 'lambda': 4.783073581443939e-05, 'alpha': 0.0002506015825653003}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  60%|██████    | 30/50 [56:50<39:58, 119.90s/it]

[I 2025-10-15 17:46:25,854] Trial 29 finished with value: 0.9635420128783627 and parameters: {'learning_rate': 0.29519800260608103, 'max_depth': 8, 'min_child_weight': 4, 'gamma': 1.0285920827347471e-06, 'subsample': 0.84837896045167, 'colsample_bytree': 0.995418700235616, 'lambda': 0.00027179411821278977, 'alpha': 3.958268808697876e-05}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  62%|██████▏   | 31/50 [58:44<37:25, 118.19s/it]

[I 2025-10-15 17:48:20,040] Trial 30 finished with value: 0.9503064708849367 and parameters: {'learning_rate': 0.12398294412214442, 'max_depth': 8, 'min_child_weight': 5, 'gamma': 2.50305600880281e-05, 'subsample': 0.8167692944192753, 'colsample_bytree': 0.9454586551902529, 'lambda': 5.989734886719654e-06, 'alpha': 0.000334115653773324}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  64%|██████▍   | 32/50 [1:00:54<36:32, 121.80s/it]

[I 2025-10-15 17:50:30,271] Trial 31 finished with value: 0.964460670774969 and parameters: {'learning_rate': 0.23863110181325337, 'max_depth': 9, 'min_child_weight': 6, 'gamma': 3.368353340905146e-08, 'subsample': 0.8897302343089197, 'colsample_bytree': 0.9406179498368661, 'lambda': 1.208113593182323e-07, 'alpha': 6.571647736895098e-08}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 17. Best value: 0.964758:  66%|██████▌   | 33/50 [1:03:02<35:03, 123.74s/it]

[I 2025-10-15 17:52:38,525] Trial 32 finished with value: 0.9642584377664285 and parameters: {'learning_rate': 0.24667313949408545, 'max_depth': 9, 'min_child_weight': 6, 'gamma': 3.1761423777784015e-08, 'subsample': 0.8846244612394736, 'colsample_bytree': 0.8772841961944838, 'lambda': 6.746713306690342e-07, 'alpha': 3.9942839555657326e-08}. Best is trial 17 with value: 0.9647575671369584.


Best trial: 33. Best value: 0.965764:  68%|██████▊   | 34/50 [1:05:21<34:12, 128.31s/it]

[I 2025-10-15 17:54:57,496] Trial 33 finished with value: 0.965764432100287 and parameters: {'learning_rate': 0.211559529191413, 'max_depth': 10, 'min_child_weight': 6, 'gamma': 1.2230800776086208e-08, 'subsample': 0.9507787800424045, 'colsample_bytree': 0.9409506588924733, 'lambda': 5.4376412568042684e-08, 'alpha': 0.0024849757763894184}. Best is trial 33 with value: 0.965764432100287.


Best trial: 34. Best value: 0.965818:  70%|███████   | 35/50 [1:07:44<33:10, 132.67s/it]

[I 2025-10-15 17:57:20,348] Trial 34 finished with value: 0.9658182185825724 and parameters: {'learning_rate': 0.1933410309772009, 'max_depth': 10, 'min_child_weight': 5, 'gamma': 1.1791007736329358e-08, 'subsample': 0.9391887401864758, 'colsample_bytree': 0.8436737961980509, 'lambda': 4.875208929346155e-08, 'alpha': 0.0018303277615631477}. Best is trial 34 with value: 0.9658182185825724.


Best trial: 35. Best value: 0.966339:  72%|███████▏  | 36/50 [1:10:10<31:53, 136.64s/it]

[I 2025-10-15 17:59:46,264] Trial 35 finished with value: 0.9663388607778748 and parameters: {'learning_rate': 0.1860475651591471, 'max_depth': 10, 'min_child_weight': 4, 'gamma': 1.1989038159077863e-08, 'subsample': 0.9491365613542891, 'colsample_bytree': 0.8223377116018663, 'lambda': 3.380923482186126e-08, 'alpha': 0.0020233339806308157}. Best is trial 35 with value: 0.9663388607778748.


Best trial: 35. Best value: 0.966339:  74%|███████▍  | 37/50 [1:12:31<29:52, 137.90s/it]

[I 2025-10-15 18:02:07,099] Trial 36 finished with value: 0.9589379708015768 and parameters: {'learning_rate': 0.10297966287633915, 'max_depth': 10, 'min_child_weight': 8, 'gamma': 0.0025326968261001064, 'subsample': 0.9420941063571839, 'colsample_bytree': 0.842868861499857, 'lambda': 1.0515316569055107e-08, 'alpha': 0.0019517773228510493}. Best is trial 35 with value: 0.9663388607778748.


Best trial: 35. Best value: 0.966339:  76%|███████▌  | 38/50 [1:15:14<29:04, 145.36s/it]

[I 2025-10-15 18:04:49,863] Trial 37 finished with value: 0.8858972139726659 and parameters: {'learning_rate': 0.011376108353540379, 'max_depth': 10, 'min_child_weight': 3, 'gamma': 7.859295454106694e-07, 'subsample': 0.9484553813068535, 'colsample_bytree': 0.7656084034749961, 'lambda': 2.77816328197452e-08, 'alpha': 0.036894464796151445}. Best is trial 35 with value: 0.9663388607778748.


Best trial: 35. Best value: 0.966339:  78%|███████▊  | 39/50 [1:17:34<26:22, 143.85s/it]

[I 2025-10-15 18:07:10,200] Trial 38 finished with value: 0.9627610476645223 and parameters: {'learning_rate': 0.1398985742396597, 'max_depth': 10, 'min_child_weight': 7, 'gamma': 3.0691540870911375e-08, 'subsample': 0.933000064489822, 'colsample_bytree': 0.8218601035602816, 'lambda': 2.274581574796764e-07, 'alpha': 0.00016588608149576865}. Best is trial 35 with value: 0.9663388607778748.


Best trial: 35. Best value: 0.966339:  80%|████████  | 40/50 [1:19:56<23:52, 143.23s/it]

[I 2025-10-15 18:09:31,977] Trial 39 finished with value: 0.9514617848971307 and parameters: {'learning_rate': 0.06788338369539572, 'max_depth': 10, 'min_child_weight': 5, 'gamma': 0.00017436540313242672, 'subsample': 0.9636368267976824, 'colsample_bytree': 0.7802988976307368, 'lambda': 4.0855951586707065e-08, 'alpha': 0.002597538273261046}. Best is trial 35 with value: 0.9663388607778748.


Best trial: 35. Best value: 0.966339:  82%|████████▏ | 41/50 [1:22:12<21:10, 141.21s/it]

[I 2025-10-15 18:11:48,457] Trial 40 finished with value: 0.9167141786637758 and parameters: {'learning_rate': 0.031797279477464836, 'max_depth': 10, 'min_child_weight': 6, 'gamma': 8.638788053925482e-08, 'subsample': 0.9694038440688534, 'colsample_bytree': 0.7292228436832056, 'lambda': 7.931317188546508e-07, 'alpha': 0.016358986846375254}. Best is trial 35 with value: 0.9663388607778748.


Best trial: 41. Best value: 0.966369:  84%|████████▍ | 42/50 [1:24:34<18:51, 141.44s/it]

[I 2025-10-15 18:14:10,454] Trial 41 finished with value: 0.9663689808663607 and parameters: {'learning_rate': 0.18567674604733983, 'max_depth': 10, 'min_child_weight': 4, 'gamma': 1.0504682760380047e-08, 'subsample': 0.9261863429800848, 'colsample_bytree': 0.8538936495756569, 'lambda': 2.873938652759758e-06, 'alpha': 0.0010517948537042028}. Best is trial 41 with value: 0.9663689808663607.


Best trial: 42. Best value: 0.966414:  86%|████████▌ | 43/50 [1:26:56<16:30, 141.48s/it]

[I 2025-10-15 18:16:32,006] Trial 42 finished with value: 0.9664141609643749 and parameters: {'learning_rate': 0.19233332261561073, 'max_depth': 10, 'min_child_weight': 4, 'gamma': 1.1398397388773814e-08, 'subsample': 0.9168214876380959, 'colsample_bytree': 0.8543631908128326, 'lambda': 2.477137816907562e-07, 'alpha': 0.0010359915469845408}. Best is trial 42 with value: 0.9664141609643749.


Best trial: 42. Best value: 0.966414:  88%|████████▊ | 44/50 [1:29:17<14:09, 141.53s/it]

[I 2025-10-15 18:18:53,678] Trial 43 finished with value: 0.9663453157454382 and parameters: {'learning_rate': 0.18799382175086932, 'max_depth': 10, 'min_child_weight': 4, 'gamma': 1.1045144452173612e-08, 'subsample': 0.9242969306373655, 'colsample_bytree': 0.8594223569231606, 'lambda': 3.6372575176792126e-08, 'alpha': 0.004725973415602503}. Best is trial 42 with value: 0.9664141609643749.


Best trial: 42. Best value: 0.966414:  90%|█████████ | 45/50 [1:31:38<11:46, 141.35s/it]

[I 2025-10-15 18:21:14,603] Trial 44 finished with value: 0.9660806903332053 and parameters: {'learning_rate': 0.19201762595079933, 'max_depth': 10, 'min_child_weight': 4, 'gamma': 2.663812959299011e-08, 'subsample': 0.922677533124049, 'colsample_bytree': 0.8461821748286646, 'lambda': 2.3754111136079547e-08, 'alpha': 0.006151193582502654}. Best is trial 42 with value: 0.9664141609643749.


Best trial: 42. Best value: 0.966414:  92%|█████████▏| 46/50 [1:32:51<08:03, 120.75s/it]

[I 2025-10-15 18:22:27,289] Trial 45 finished with value: 0.8872267963485421 and parameters: {'learning_rate': 0.14329470906329064, 'max_depth': 5, 'min_child_weight': 4, 'gamma': 3.036806270842104e-08, 'subsample': 0.9106099652135119, 'colsample_bytree': 0.798027853411504, 'lambda': 1.0112430689406832e-08, 'alpha': 0.10291246232897062}. Best is trial 42 with value: 0.9664141609643749.


Best trial: 46. Best value: 0.966634:  94%|█████████▍| 47/50 [1:35:28<06:34, 131.59s/it]

[I 2025-10-15 18:25:04,172] Trial 46 finished with value: 0.9666336062323072 and parameters: {'learning_rate': 0.1786912232090324, 'max_depth': 10, 'min_child_weight': 3, 'gamma': 6.009415529412678e-08, 'subsample': 0.8558726329105429, 'colsample_bytree': 0.8641884002688407, 'lambda': 2.3812223464914405e-08, 'alpha': 4.630223688020337e-05}. Best is trial 46 with value: 0.9666336062323072.


Best trial: 46. Best value: 0.966634:  96%|█████████▌| 48/50 [1:38:06<04:38, 139.42s/it]

[I 2025-10-15 18:27:41,866] Trial 47 finished with value: 0.9403217235177843 and parameters: {'learning_rate': 0.054847436631770226, 'max_depth': 10, 'min_child_weight': 3, 'gamma': 6.450754945106557e-08, 'subsample': 0.8485894825746234, 'colsample_bytree': 0.5309913059960999, 'lambda': 3.559690491379976e-07, 'alpha': 4.277969581026634e-05}. Best is trial 46 with value: 0.9666336062323072.


Best trial: 46. Best value: 0.966634:  98%|█████████▊| 49/50 [1:39:08<01:56, 116.20s/it]

[I 2025-10-15 18:28:43,876] Trial 48 finished with value: 0.8435400375630309 and parameters: {'learning_rate': 0.12867512769976844, 'max_depth': 4, 'min_child_weight': 3, 'gamma': 0.0020667796463782165, 'subsample': 0.980103040269324, 'colsample_bytree': 0.8724985545336613, 'lambda': 5.185592016847184e-06, 'alpha': 0.00010274863216333713}. Best is trial 46 with value: 0.9666336062323072.


Best trial: 46. Best value: 0.966634: 100%|██████████| 50/50 [1:41:44<00:00, 122.09s/it]

[I 2025-10-15 18:31:20,262] Trial 49 finished with value: 0.9659860284146357 and parameters: {'learning_rate': 0.1763558289078166, 'max_depth': 10, 'min_child_weight': 4, 'gamma': 7.427196859737999e-07, 'subsample': 0.8679044623364404, 'colsample_bytree': 0.8189472652871326, 'lambda': 3.1790377638050322e-06, 'alpha': 0.0010521032775424257}. Best is trial 46 with value: 0.9666336062323072.

--- Optuna Results---
Best score for cross validation: 0.9666
Best hyperparams:
{'learning_rate': 0.1786912232090324, 'max_depth': 10, 'min_child_weight': 3, 'gamma': 6.009415529412678e-08, 'subsample': 0.8558726329105429, 'colsample_bytree': 0.8641884002688407, 'lambda': 2.3812223464914405e-08, 'alpha': 4.630223688020337e-05}





# 6. Train XGBoost model with optimized parameters

In [4]:
import xgboost as xgb
from sklearn.preprocessing import StandardScaler

fixed_params = {
    'objective': 'multi:softmax',
    'eval_metric': 'merror',
    'n_jobs': -1,
    'random_state': 42,
    'tree_method': 'hist',
    'device': 'cuda'
}

# Use this if previous step fails
study = {'learning_rate': 0.1786912232090324, 'max_depth': 10, 'min_child_weight': 3, 'gamma': 6.009415529412678e-08, 'subsample': 0.8558726329105429, 'colsample_bytree': 0.8641884002688407, 'lambda': 2.3812223464914405e-08, 'alpha': 4.630223688020337e-05}
final_params = { **fixed_params, **study }

# Use this if previous step don't fail
# final_params = { **fixed_params, **study.best_params }

final_n_estimators = 1000
scaler = StandardScaler()

final_model = xgb.XGBClassifier(
    **final_params,
    n_estimators = final_n_estimators
)

print("\nTraining optimized model with train data...")
final_model.fit(scaler.fit_transform(X_train), y_train)

y_pred = final_model.predict(scaler.transform(X_test))


Training optimized model with train data...


# 7. Show performance of optimized XGBoost

In [5]:
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy for optimized XGBoost model: {accuracy:.4f}")

print("\nClassification report:")
print(classification_report(y_test, y_pred))


Accuracy for optimized XGBoost model: 0.9707

Classification report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97     42368
           1       0.97      0.98      0.98     56661
           2       0.96      0.97      0.97      7151
           3       0.90      0.87      0.89       549
           4       0.93      0.90      0.91      1899
           5       0.95      0.93      0.94      3473
           6       0.97      0.97      0.97      4102

    accuracy                           0.97    116203
   macro avg       0.95      0.94      0.95    116203
weighted avg       0.97      0.97      0.97    116203



# 8. Train the model with all data and save it

In [6]:
import joblib
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

print("\nTraining optimized model with all data...")
final_model.fit(scaler.fit_transform(X), y)

print("\nSaving model and scaler...")
joblib.dump(final_model, 'xgboost_optimized.pkl')
joblib.dump(scaler, 'xgboost_scaler.pkl')


Training optimized model with all data...

Saving model and scaler...


['xgboost_scaler.pkl']

# 9. Play with the model

In [348]:
import joblib

model = joblib.load('xgboost_optimized.pkl')
scaler = joblib.load('xgboost_scaler.pkl')

elevation = generate_random_number(1000, 1500)
aspect = generate_random_number(0, 360)
slope = generate_random_number(0, 75)
hdh = generate_random_number(0, 10000)
vdh = generate_random_number(0, 5000)
hdr = generate_random_number(0, 10000)
h9 = generate_random_number(0,254)
h12 = generate_random_number(0,254)
h15 = generate_random_number(0,254)
hdfp = generate_random_number(0,10000)
wilderness = generate_random_one_hot_sets(1, 4)
soil = generate_random_one_hot_sets(1, 40)

df_other = pd.DataFrame(columns=other_columns)
df_other.loc[1]=[elevation, aspect, slope, hdh, vdh, hdr, h9, h12, h15, hdfp]

df_wilderness = pd.DataFrame(columns=wilderness_columns)
df_wilderness.loc[1] = wilderness[0]

df_soil = pd.DataFrame(columns=soil_columns)
df_soil.loc[1] = soil[0]

X_rnd = pd.concat([df_other, df_wilderness, df_soil], axis=1)
y_pred = model.predict(scaler.transform(X_rnd))

display(X_rnd)
print(f"Cover predicted: {y_pred[0] + 1}")


Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,...,Soil_Type_31,Soil_Type_32,Soil_Type_33,Soil_Type_34,Soil_Type_35,Soil_Type_36,Soil_Type_37,Soil_Type_38,Soil_Type_39,Soil_Type_40
1,1238,18,26,482,878,9534,112,213,171,9736,...,0,0,0,0,0,0,0,0,0,0


Cover predicted: 3
