In [17]:
# --- Imports and Setup ---
import pandas as pd
import numpy as np
import xgboost as xgb
import catboost as cb
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from collections import Counter
import optuna
import time
import gc
import warnings
warnings.filterwarnings('ignore')

In [19]:
print("Loading data from your local S: drive...")

try:
    # --- Using the correct folder name 'Kaggle-Fertilizer-Project' ---
    train_path = r'S:\Projects\Kaggle-Fertilizer-Project\data\train.csv'
    test_path = r'S:\Projects\Kaggle-Fertilizer-Project\data\test.csv'
    original_path = r'S:\Projects\Kaggle-Fertilizer-Project\data\Fertilizer Prediction.csv'

    train_df_original = pd.read_csv(train_path)
    test_df_original = pd.read_csv(test_path)
    external_df = pd.read_csv(original_path)

    # Combine all training data
    train_df = pd.concat([train_df_original, external_df], ignore_index=True)
    test_df = test_df_original.copy()
    
    print("All data files loaded successfully!")
    
except FileNotFoundError as e:
    print(f"ERROR: A file was not found. Please double-check your folder and file names.")
    print(f"The path we tried was: {e.filename}")
    raise e


Loading data from your local S: drive...
All data files loaded successfully!


In [20]:
# Cell 3: Generate Base Model Predictions
# This part trains your champion models across 5 folds to get reliable predictions.
# You only need to run this once.

print("\n--- Generating Base Model Predictions (Out-of-Fold) ---")

# --- Champion Model Parameters ---
xgb_params = {
    'device': 'cuda', 'objective': 'multi:softprob', 'eval_metric': 'mlogloss', 'seed': 42,
    'max_depth': 12, 'colsample_bytree': 0.467, 'subsample': 0.86, 'learning_rate': 0.03,
    'gamma': 0.26, 'reg_alpha': 2.7, 'reg_lambda': 1.4,
    'enable_categorical': True, 'num_class': len(y.unique())
}
num_boost_round = 4000

cat_params = {
    'task_type': 'GPU', 'objective': 'MultiClass', 'eval_metric': 'MultiClass', 'verbose': 0, 'random_seed': 42,
    'iterations': 2745, 'learning_rate': 0.0411, 'depth': 5,
    'l2_leaf_reg': 7.96, 'subsample': 0.892, 'bootstrap_type': 'Bernoulli'
}

# --- CV and Prediction Generation ---
N_SPLITS = 5
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
num_classes = len(y.unique())

oof_preds_xgb = np.zeros((len(X), num_classes))
test_preds_xgb = np.zeros((len(X_test), num_classes))
oof_preds_cat = np.zeros((len(X), num_classes))
test_preds_cat = np.zeros((len(X_test), num_classes))

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f"\n===== FOLD {fold + 1} / {N_SPLITS} =====")
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

    # --- XGBoost ---
    print("  - Training XGBoost...")
    dtrain = xgb.DMatrix(X_train, label=y_train, enable_categorical=True)
    dval = xgb.DMatrix(X_val, label=y_val, enable_categorical=True)
    dtest = xgb.DMatrix(X_test, enable_categorical=True)
    xgb_model = xgb.train(params=xgb_params, dtrain=dtrain, num_boost_round=num_boost_round, evals=[(dval, 'eval')], early_stopping_rounds=150, verbose_eval=False)
    oof_preds_xgb[val_idx] = xgb_model.predict(dval, iteration_range=(0, xgb_model.best_iteration))
    test_preds_xgb += xgb_model.predict(dtest, iteration_range=(0, xgb_model.best_iteration)) / N_SPLITS

    # --- CatBoost ---
    print("  - Training CatBoost...")
    cat_model = cb.CatBoostClassifier(**cat_params)
    cat_model.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=100, verbose=False)
    oof_preds_cat[val_idx] = cat_model.predict_proba(X_val)
    test_preds_cat += cat_model.predict_proba(X_test) / N_SPLITS
    
    gc.collect()

print("\nBase model predictions generated.")


--- Generating Base Model Predictions (Out-of-Fold) ---

===== FOLD 1 / 5 =====
  - Training XGBoost...
  - Training CatBoost...

===== FOLD 2 / 5 =====
  - Training XGBoost...
  - Training CatBoost...

===== FOLD 3 / 5 =====
  - Training XGBoost...
  - Training CatBoost...

===== FOLD 4 / 5 =====
  - Training XGBoost...
  - Training CatBoost...

===== FOLD 5 / 5 =====
  - Training XGBoost...
  - Training CatBoost...

Base model predictions generated.


In [21]:
# Cell 4: Optimize Ensemble Weights with Optuna

print("\n--- Step 4: Optimizing Ensemble Weights ---")

# Define your scoring function
def calculate_map3(y_true, y_pred_probs):
    def ap3(actual, predicted):
        score = 0.0
        for i, p in enumerate(predicted[:3]):
            if p == actual:
                score += 1.0 / (i + 1.0)
                return score
        return score
    return np.mean([ap3(a, p) for a, p in zip(y_true, np.argsort(-y_pred_probs, axis=1))])

def objective_weights(trial):
    # Suggest weights for each model
    w_xgb = trial.suggest_float("w_xgb", 0.0, 1.0)
    w_cat = trial.suggest_float("w_cat", 0.0, 1.0)
    
    # Blend the OOF predictions with the suggested weights
    weighted_oof_preds = (w_xgb * oof_preds_xgb) + (w_cat * oof_preds_cat)
    
    # Return the MAP@3 score
    return calculate_map3(y.values, weighted_oof_preds)

# Run the Optuna study to find the best weights
study_weights = optuna.create_study(direction='maximize', study_name='Weight_Optimization')
study_weights.optimize(objective_weights, n_trials=100, show_progress_bar=True)

best_weights = study_weights.best_params
print("\nBest weights found:", best_weights)

[I 2025-07-15 03:14:58,843] A new study created in memory with name: Weight_Optimization



--- Step 4: Optimizing Ensemble Weights ---


Best trial: 0. Best value: 0.350433:   1%|          | 1/100 [00:01<01:39,  1.01s/it]

[I 2025-07-15 03:14:59,861] Trial 0 finished with value: 0.3504329411764706 and parameters: {'w_xgb': 0.7640242689270729, 'w_cat': 0.6161492511861133}. Best is trial 0 with value: 0.3504329411764706.


Best trial: 0. Best value: 0.350433:   2%|▏         | 2/100 [00:01<01:33,  1.04it/s]

[I 2025-07-15 03:15:00,772] Trial 1 finished with value: 0.34998862745098036 and parameters: {'w_xgb': 0.2447026643698771, 'w_cat': 0.2350571140517249}. Best is trial 0 with value: 0.3504329411764706.


Best trial: 0. Best value: 0.350433:   3%|▎         | 3/100 [00:02<01:29,  1.08it/s]

[I 2025-07-15 03:15:01,668] Trial 2 finished with value: 0.34924725490196074 and parameters: {'w_xgb': 0.742490801115531, 'w_cat': 0.8872609229935712}. Best is trial 0 with value: 0.3504329411764706.


Best trial: 0. Best value: 0.350433:   4%|▍         | 4/100 [00:03<01:27,  1.10it/s]

[I 2025-07-15 03:15:02,551] Trial 3 finished with value: 0.34764725490196074 and parameters: {'w_xgb': 0.5023456629147738, 'w_cat': 0.8870532361826031}. Best is trial 0 with value: 0.3504329411764706.


Best trial: 0. Best value: 0.350433:   5%|▌         | 5/100 [00:04<01:26,  1.10it/s]

[I 2025-07-15 03:15:03,462] Trial 4 finished with value: 0.33956411764705885 and parameters: {'w_xgb': 0.11020301282555345, 'w_cat': 0.7883337947391261}. Best is trial 0 with value: 0.3504329411764706.


Best trial: 5. Best value: 0.351458:   6%|▌         | 6/100 [00:05<01:25,  1.10it/s]

[I 2025-07-15 03:15:04,364] Trial 5 finished with value: 0.3514582352941176 and parameters: {'w_xgb': 0.7085969994224324, 'w_cat': 0.09315133872303738}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:   7%|▋         | 7/100 [00:06<01:25,  1.09it/s]

[I 2025-07-15 03:15:05,300] Trial 6 finished with value: 0.3402825490196078 and parameters: {'w_xgb': 0.15747740685676792, 'w_cat': 0.9913984406197767}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:   8%|▊         | 8/100 [00:07<01:25,  1.07it/s]

[I 2025-07-15 03:15:06,266] Trial 7 finished with value: 0.3498521568627451 and parameters: {'w_xgb': 0.8608348378533877, 'w_cat': 0.8739340108107692}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:   9%|▉         | 9/100 [00:08<01:25,  1.07it/s]

[I 2025-07-15 03:15:07,211] Trial 8 finished with value: 0.35081235294117646 and parameters: {'w_xgb': 0.458202897061162, 'w_cat': 0.3001201232858407}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  10%|█         | 10/100 [00:09<01:21,  1.10it/s]

[I 2025-07-15 03:15:08,056] Trial 9 finished with value: 0.35102607843137257 and parameters: {'w_xgb': 0.8913326790518723, 'w_cat': 0.5233877346319362}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  11%|█         | 11/100 [00:10<01:23,  1.06it/s]

[I 2025-07-15 03:15:09,076] Trial 10 finished with value: 0.35131803921568633 and parameters: {'w_xgb': 0.5589769061518253, 'w_cat': 0.029026913844935054}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  12%|█▏        | 12/100 [00:11<01:20,  1.09it/s]

[I 2025-07-15 03:15:09,926] Trial 11 finished with value: 0.3513105882352941 and parameters: {'w_xgb': 0.5593159320367913, 'w_cat': 0.03291501209809639}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  13%|█▎        | 13/100 [00:11<01:19,  1.10it/s]

[I 2025-07-15 03:15:10,836] Trial 12 finished with value: 0.3513050980392157 and parameters: {'w_xgb': 0.6358305348972104, 'w_cat': 0.004203847177759006}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  14%|█▍        | 14/100 [00:12<01:17,  1.10it/s]

[I 2025-07-15 03:15:11,727] Trial 13 finished with value: 0.35106843137254895 and parameters: {'w_xgb': 0.35563463331909173, 'w_cat': 0.20288302142953135}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  15%|█▌        | 15/100 [00:13<01:17,  1.09it/s]

[I 2025-07-15 03:15:12,663] Trial 14 finished with value: 0.3514472549019607 and parameters: {'w_xgb': 0.9952403406640717, 'w_cat': 0.12580433612170128}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  16%|█▌        | 16/100 [00:14<01:15,  1.11it/s]

[I 2025-07-15 03:15:13,522] Trial 15 finished with value: 0.35145764705882354 and parameters: {'w_xgb': 0.9864076485172112, 'w_cat': 0.3400126290806353}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  17%|█▋        | 17/100 [00:15<01:14,  1.11it/s]

[I 2025-07-15 03:15:14,423] Trial 16 finished with value: 0.3514360784313726 and parameters: {'w_xgb': 0.9984954519693338, 'w_cat': 0.3697606292674546}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  18%|█▊        | 18/100 [00:16<01:13,  1.11it/s]

[I 2025-07-15 03:15:15,319] Trial 17 finished with value: 0.35104137254901957 and parameters: {'w_xgb': 0.7160969482130087, 'w_cat': 0.4188571006938244}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 5. Best value: 0.351458:  19%|█▉        | 19/100 [00:17<01:13,  1.11it/s]

[I 2025-07-15 03:15:16,231] Trial 18 finished with value: 0.3507588235294118 and parameters: {'w_xgb': 0.8928997287245313, 'w_cat': 0.6077056923943625}. Best is trial 5 with value: 0.3514582352941176.


Best trial: 19. Best value: 0.351512:  20%|██        | 20/100 [00:18<01:11,  1.12it/s]

[I 2025-07-15 03:15:17,095] Trial 19 finished with value: 0.35151215686274506 and parameters: {'w_xgb': 0.7927834773208822, 'w_cat': 0.14540600809401671}. Best is trial 19 with value: 0.35151215686274506.


Best trial: 19. Best value: 0.351512:  21%|██        | 21/100 [00:19<01:09,  1.14it/s]

[I 2025-07-15 03:15:17,949] Trial 20 finished with value: 0.35149980392156865 and parameters: {'w_xgb': 0.6702307260304405, 'w_cat': 0.12560381315895716}. Best is trial 19 with value: 0.35151215686274506.


Best trial: 21. Best value: 0.35158:  22%|██▏       | 22/100 [00:19<01:08,  1.13it/s] 

[I 2025-07-15 03:15:18,844] Trial 21 finished with value: 0.3515798039215686 and parameters: {'w_xgb': 0.6575136510962942, 'w_cat': 0.15921032364662116}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  23%|██▎       | 23/100 [00:20<01:10,  1.09it/s]

[I 2025-07-15 03:15:19,838] Trial 22 finished with value: 0.3515719607843137 and parameters: {'w_xgb': 0.6364711006690532, 'w_cat': 0.17228678202767855}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  24%|██▍       | 24/100 [00:21<01:08,  1.11it/s]

[I 2025-07-15 03:15:20,705] Trial 23 finished with value: 0.3515623529411765 and parameters: {'w_xgb': 0.8287380093995199, 'w_cat': 0.22667664222459694}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  25%|██▌       | 25/100 [00:22<01:07,  1.11it/s]

[I 2025-07-15 03:15:21,597] Trial 24 finished with value: 0.3514172549019608 and parameters: {'w_xgb': 0.5963898070653783, 'w_cat': 0.24099824611626816}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  26%|██▌       | 26/100 [00:23<01:06,  1.11it/s]

[I 2025-07-15 03:15:22,496] Trial 25 finished with value: 0.3500513725490196 and parameters: {'w_xgb': 0.4705236398854307, 'w_cat': 0.44297483531888593}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  27%|██▋       | 27/100 [00:24<01:05,  1.12it/s]

[I 2025-07-15 03:15:23,367] Trial 26 finished with value: 0.3514629411764706 and parameters: {'w_xgb': 0.8318940093509765, 'w_cat': 0.2845323162457213}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  28%|██▊       | 28/100 [00:25<01:03,  1.13it/s]

[I 2025-07-15 03:15:24,247] Trial 27 finished with value: 0.35107803921568626 and parameters: {'w_xgb': 0.3287328746586952, 'w_cat': 0.18918822646213107}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  29%|██▉       | 29/100 [00:26<01:02,  1.13it/s]

[I 2025-07-15 03:15:25,124] Trial 28 finished with value: 0.35031862745098036 and parameters: {'w_xgb': 0.6367417546691818, 'w_cat': 0.5355065489485406}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  30%|███       | 30/100 [00:27<01:01,  1.14it/s]

[I 2025-07-15 03:15:25,998] Trial 29 finished with value: 0.3513643137254902 and parameters: {'w_xgb': 0.7854250535750207, 'w_cat': 0.0716835360567564}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  31%|███       | 31/100 [00:28<01:00,  1.14it/s]

[I 2025-07-15 03:15:26,871] Trial 30 finished with value: 0.3499913725490196 and parameters: {'w_xgb': 0.41009860352092464, 'w_cat': 0.3941605742382369}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  32%|███▏      | 32/100 [00:28<00:59,  1.14it/s]

[I 2025-07-15 03:15:27,742] Trial 31 finished with value: 0.3515145098039216 and parameters: {'w_xgb': 0.7822077918975985, 'w_cat': 0.1570744244127619}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  33%|███▎      | 33/100 [00:29<00:59,  1.13it/s]

[I 2025-07-15 03:15:28,636] Trial 32 finished with value: 0.35156176470588235 and parameters: {'w_xgb': 0.6833827870996845, 'w_cat': 0.17985175787389462}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  34%|███▍      | 34/100 [00:30<00:58,  1.13it/s]

[I 2025-07-15 03:15:29,523] Trial 33 finished with value: 0.35142823529411765 and parameters: {'w_xgb': 0.6857182153168276, 'w_cat': 0.2649311706594295}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  35%|███▌      | 35/100 [00:31<00:59,  1.09it/s]

[I 2025-07-15 03:15:30,514] Trial 34 finished with value: 0.35147843137254897 and parameters: {'w_xgb': 0.5364716609987404, 'w_cat': 0.1944987014822848}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  36%|███▌      | 36/100 [00:32<00:58,  1.10it/s]

[I 2025-07-15 03:15:31,404] Trial 35 finished with value: 0.35111372549019604 and parameters: {'w_xgb': 0.6267218390186434, 'w_cat': 0.3431043100720404}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  37%|███▋      | 37/100 [00:33<00:57,  1.10it/s]

[I 2025-07-15 03:15:32,311] Trial 36 finished with value: 0.3513558823529412 and parameters: {'w_xgb': 0.7360238654669776, 'w_cat': 0.0695181091677493}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  38%|███▊      | 38/100 [00:34<00:55,  1.11it/s]

[I 2025-07-15 03:15:33,193] Trial 37 finished with value: 0.3502133333333333 and parameters: {'w_xgb': 0.8338297378390224, 'w_cat': 0.7437454372857651}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  39%|███▉      | 39/100 [00:35<00:54,  1.11it/s]

[I 2025-07-15 03:15:34,090] Trial 38 finished with value: 0.3514721568627451 and parameters: {'w_xgb': 0.9120044209854077, 'w_cat': 0.30119930148478746}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  40%|████      | 40/100 [00:36<00:53,  1.12it/s]

[I 2025-07-15 03:15:34,963] Trial 39 finished with value: 0.3515741176470588 and parameters: {'w_xgb': 0.7420425008178597, 'w_cat': 0.2131282387478805}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  41%|████      | 41/100 [00:37<00:52,  1.12it/s]

[I 2025-07-15 03:15:35,861] Trial 40 finished with value: 0.35102372549019606 and parameters: {'w_xgb': 0.7662840329614344, 'w_cat': 0.4557860520924818}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  42%|████▏     | 42/100 [00:37<00:51,  1.12it/s]

[I 2025-07-15 03:15:36,764] Trial 41 finished with value: 0.35146529411764704 and parameters: {'w_xgb': 0.6820980745289724, 'w_cat': 0.23288119393268844}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  43%|████▎     | 43/100 [00:38<00:50,  1.12it/s]

[I 2025-07-15 03:15:37,639] Trial 42 finished with value: 0.351501568627451 and parameters: {'w_xgb': 0.5844246741226272, 'w_cat': 0.09787816244634624}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  44%|████▍     | 44/100 [00:39<00:50,  1.12it/s]

[I 2025-07-15 03:15:38,548] Trial 43 finished with value: 0.3368631372549019 and parameters: {'w_xgb': 0.013439060959454796, 'w_cat': 0.17467289772326822}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  45%|████▌     | 45/100 [00:40<00:49,  1.11it/s]

[I 2025-07-15 03:15:39,451] Trial 44 finished with value: 0.35147392156862745 and parameters: {'w_xgb': 0.7139253412754603, 'w_cat': 0.23537315753812474}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  46%|████▌     | 46/100 [00:41<00:49,  1.09it/s]

[I 2025-07-15 03:15:40,405] Trial 45 finished with value: 0.3508596078431372 and parameters: {'w_xgb': 0.4951933302421079, 'w_cat': 0.3193850702361163}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  47%|████▋     | 47/100 [00:42<00:49,  1.08it/s]

[I 2025-07-15 03:15:41,355] Trial 46 finished with value: 0.35136156862745105 and parameters: {'w_xgb': 0.6502327898127575, 'w_cat': 0.05704359979519785}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  48%|████▊     | 48/100 [00:43<00:47,  1.09it/s]

[I 2025-07-15 03:15:42,265] Trial 47 finished with value: 0.35145039215686275 and parameters: {'w_xgb': 0.9223940703343162, 'w_cat': 0.10787593457983623}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  49%|████▉     | 49/100 [00:44<00:46,  1.10it/s]

[I 2025-07-15 03:15:43,140] Trial 48 finished with value: 0.3515623529411765 and parameters: {'w_xgb': 0.8297374860128427, 'w_cat': 0.22694690239771248}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  50%|█████     | 50/100 [00:45<00:45,  1.10it/s]

[I 2025-07-15 03:15:44,045] Trial 49 finished with value: 0.3515429411764706 and parameters: {'w_xgb': 0.840366548631301, 'w_cat': 0.26088889966589235}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 21. Best value: 0.35158:  51%|█████     | 51/100 [00:46<00:44,  1.09it/s]

[I 2025-07-15 03:15:44,984] Trial 50 finished with value: 0.3513250980392157 and parameters: {'w_xgb': 0.749287639218053, 'w_cat': 0.02421688008770226}. Best is trial 21 with value: 0.3515798039215686.


Best trial: 51. Best value: 0.351588:  52%|█████▏    | 52/100 [00:47<00:43,  1.10it/s]

[I 2025-07-15 03:15:45,888] Trial 51 finished with value: 0.35158784313725494 and parameters: {'w_xgb': 0.8072313536991402, 'w_cat': 0.2099567860458908}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  53%|█████▎    | 53/100 [00:47<00:42,  1.11it/s]

[I 2025-07-15 03:15:46,760] Trial 52 finished with value: 0.35155980392156866 and parameters: {'w_xgb': 0.8182412452709266, 'w_cat': 0.21843068435967}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  54%|█████▍    | 54/100 [00:48<00:41,  1.11it/s]

[I 2025-07-15 03:15:47,669] Trial 53 finished with value: 0.35150509803921565 and parameters: {'w_xgb': 0.8802574260931266, 'w_cat': 0.16381628600702297}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  55%|█████▌    | 55/100 [00:49<00:41,  1.09it/s]

[I 2025-07-15 03:15:48,613] Trial 54 finished with value: 0.35147568627450987 and parameters: {'w_xgb': 0.9344293654402007, 'w_cat': 0.13393459900044766}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  56%|█████▌    | 56/100 [00:50<00:39,  1.11it/s]

[I 2025-07-15 03:15:49,473] Trial 55 finished with value: 0.3514078431372549 and parameters: {'w_xgb': 0.9466818189742219, 'w_cat': 0.3564498922767132}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  57%|█████▋    | 57/100 [00:51<00:38,  1.11it/s]

[I 2025-07-15 03:15:50,374] Trial 56 finished with value: 0.3494890196078432 and parameters: {'w_xgb': 0.8706142345901653, 'w_cat': 0.9945452840310327}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  58%|█████▊    | 58/100 [00:52<00:38,  1.09it/s]

[I 2025-07-15 03:15:51,342] Trial 57 finished with value: 0.3514582352941176 and parameters: {'w_xgb': 0.7984302451466699, 'w_cat': 0.27519493697753583}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  59%|█████▉    | 59/100 [00:53<00:37,  1.09it/s]

[I 2025-07-15 03:15:52,265] Trial 58 finished with value: 0.35142098039215686 and parameters: {'w_xgb': 0.7389212153567608, 'w_cat': 0.31026835673193537}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  60%|██████    | 60/100 [00:54<00:36,  1.10it/s]

[I 2025-07-15 03:15:53,149] Trial 59 finished with value: 0.35141725490196074 and parameters: {'w_xgb': 0.5367699346056347, 'w_cat': 0.21686157923385596}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  61%|██████    | 61/100 [00:55<00:35,  1.10it/s]

[I 2025-07-15 03:15:54,046] Trial 60 finished with value: 0.3508335294117647 and parameters: {'w_xgb': 0.6073311447994716, 'w_cat': 0.3955052853616995}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  62%|██████▏   | 62/100 [00:56<00:34,  1.11it/s]

[I 2025-07-15 03:15:54,938] Trial 61 finished with value: 0.35157450980392163 and parameters: {'w_xgb': 0.6911477518276313, 'w_cat': 0.18744926457059238}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  63%|██████▎   | 63/100 [00:56<00:33,  1.11it/s]

[I 2025-07-15 03:15:55,831] Trial 62 finished with value: 0.35150117647058826 and parameters: {'w_xgb': 0.714954403359083, 'w_cat': 0.1354082255650406}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  64%|██████▍   | 64/100 [00:57<00:32,  1.11it/s]

[I 2025-07-15 03:15:56,729] Trial 63 finished with value: 0.3514494117647059 and parameters: {'w_xgb': 0.7556887661742581, 'w_cat': 0.0955390104605377}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  65%|██████▌   | 65/100 [00:58<00:31,  1.11it/s]

[I 2025-07-15 03:15:57,638] Trial 64 finished with value: 0.3515819607843137 and parameters: {'w_xgb': 0.8518758105732565, 'w_cat': 0.18918360700001738}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  66%|██████▌   | 66/100 [00:59<00:30,  1.11it/s]

[I 2025-07-15 03:15:58,531] Trial 65 finished with value: 0.35155980392156866 and parameters: {'w_xgb': 0.6491565263671163, 'w_cat': 0.1558376501861433}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  67%|██████▋   | 67/100 [01:00<00:29,  1.11it/s]

[I 2025-07-15 03:15:59,424] Trial 66 finished with value: 0.35127372549019603 and parameters: {'w_xgb': 0.9592644916163197, 'w_cat': 0.0020989851934734016}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  68%|██████▊   | 68/100 [01:01<00:28,  1.11it/s]

[I 2025-07-15 03:16:00,322] Trial 67 finished with value: 0.35157058823529413 and parameters: {'w_xgb': 0.8002816484953955, 'w_cat': 0.19632569187185334}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  69%|██████▉   | 69/100 [01:02<00:27,  1.11it/s]

[I 2025-07-15 03:16:01,219] Trial 68 finished with value: 0.350203137254902 and parameters: {'w_xgb': 0.7993995519882101, 'w_cat': 0.7135913822777189}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  70%|███████   | 70/100 [01:03<00:27,  1.09it/s]

[I 2025-07-15 03:16:02,183] Trial 69 finished with value: 0.3515101960784313 and parameters: {'w_xgb': 0.5793576083814166, 'w_cat': 0.18412255810741365}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  71%|███████   | 71/100 [01:04<00:26,  1.10it/s]

[I 2025-07-15 03:16:03,078] Trial 70 finished with value: 0.3513560784313725 and parameters: {'w_xgb': 0.6772432427225696, 'w_cat': 0.06484860318162956}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  72%|███████▏  | 72/100 [01:05<00:25,  1.10it/s]

[I 2025-07-15 03:16:03,972] Trial 71 finished with value: 0.35157450980392163 and parameters: {'w_xgb': 0.850963666683566, 'w_cat': 0.2474661111994568}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  73%|███████▎  | 73/100 [01:06<00:24,  1.10it/s]

[I 2025-07-15 03:16:04,879] Trial 72 finished with value: 0.3515839215686275 and parameters: {'w_xgb': 0.8643279917048129, 'w_cat': 0.253674645999066}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  74%|███████▍  | 74/100 [01:06<00:23,  1.11it/s]

[I 2025-07-15 03:16:05,776] Trial 73 finished with value: 0.3515125490196078 and parameters: {'w_xgb': 0.8577553978983189, 'w_cat': 0.27353995122119373}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  75%|███████▌  | 75/100 [01:07<00:22,  1.10it/s]

[I 2025-07-15 03:16:06,690] Trial 74 finished with value: 0.35155803921568624 and parameters: {'w_xgb': 0.9008790465551844, 'w_cat': 0.2537790437552813}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  76%|███████▌  | 76/100 [01:08<00:21,  1.11it/s]

[I 2025-07-15 03:16:07,587] Trial 75 finished with value: 0.35149294117647056 and parameters: {'w_xgb': 0.7715344828534477, 'w_cat': 0.12491465151706199}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  77%|███████▋  | 77/100 [01:09<00:20,  1.11it/s]

[I 2025-07-15 03:16:08,477] Trial 76 finished with value: 0.3514656862745098 and parameters: {'w_xgb': 0.9758022302451017, 'w_cat': 0.33314570512422287}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 51. Best value: 0.351588:  78%|███████▊  | 78/100 [01:10<00:19,  1.11it/s]

[I 2025-07-15 03:16:09,369] Trial 77 finished with value: 0.3514154901960784 and parameters: {'w_xgb': 0.7287005445924828, 'w_cat': 0.2910340427529536}. Best is trial 51 with value: 0.35158784313725494.


Best trial: 78. Best value: 0.351593:  79%|███████▉  | 79/100 [01:11<00:18,  1.11it/s]

[I 2025-07-15 03:16:10,265] Trial 78 finished with value: 0.351593137254902 and parameters: {'w_xgb': 0.8664966498370847, 'w_cat': 0.19963464587757696}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  80%|████████  | 80/100 [01:12<00:17,  1.12it/s]

[I 2025-07-15 03:16:11,149] Trial 79 finished with value: 0.35157 and parameters: {'w_xgb': 0.855530006337347, 'w_cat': 0.20907415786393602}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  81%|████████  | 81/100 [01:13<00:17,  1.12it/s]

[I 2025-07-15 03:16:12,052] Trial 80 finished with value: 0.35141823529411764 and parameters: {'w_xgb': 0.8885855863193339, 'w_cat': 0.3742549678956349}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  82%|████████▏ | 82/100 [01:14<00:16,  1.08it/s]

[I 2025-07-15 03:16:13,055] Trial 81 finished with value: 0.35072215686274516 and parameters: {'w_xgb': 0.2317629251189236, 'w_cat': 0.16640525918519924}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  83%|████████▎ | 83/100 [01:15<00:15,  1.09it/s]

[I 2025-07-15 03:16:13,951] Trial 82 finished with value: 0.3514562745098039 and parameters: {'w_xgb': 0.6930090586778133, 'w_cat': 0.25413080184637976}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  84%|████████▍ | 84/100 [01:15<00:14,  1.10it/s]

[I 2025-07-15 03:16:14,843] Trial 83 finished with value: 0.35156313725490196 and parameters: {'w_xgb': 0.8108489250821509, 'w_cat': 0.20012618636352242}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  85%|████████▌ | 85/100 [01:16<00:13,  1.10it/s]

[I 2025-07-15 03:16:15,760] Trial 84 finished with value: 0.35151666666666664 and parameters: {'w_xgb': 0.6181287672035932, 'w_cat': 0.10540078158239717}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  86%|████████▌ | 86/100 [01:17<00:12,  1.10it/s]

[I 2025-07-15 03:16:16,668] Trial 85 finished with value: 0.3514990196078431 and parameters: {'w_xgb': 0.8513741930784456, 'w_cat': 0.15750454606162662}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  87%|████████▋ | 87/100 [01:18<00:11,  1.09it/s]

[I 2025-07-15 03:16:17,586] Trial 86 finished with value: 0.3515260784313725 and parameters: {'w_xgb': 0.7696507916336041, 'w_cat': 0.24310973539010344}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  88%|████████▊ | 88/100 [01:19<00:10,  1.10it/s]

[I 2025-07-15 03:16:18,481] Trial 87 finished with value: 0.3514725490196078 and parameters: {'w_xgb': 0.9140941473321667, 'w_cat': 0.12778368647668997}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  89%|████████▉ | 89/100 [01:20<00:09,  1.10it/s]

[I 2025-07-15 03:16:19,382] Trial 88 finished with value: 0.3485023529411765 and parameters: {'w_xgb': 0.6567627820035284, 'w_cat': 0.9517218373901916}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  90%|█████████ | 90/100 [01:21<00:09,  1.11it/s]

[I 2025-07-15 03:16:20,274] Trial 89 finished with value: 0.35131745098039213 and parameters: {'w_xgb': 0.8754209069193082, 'w_cat': 0.04801304468283328}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  91%|█████████ | 91/100 [01:22<00:08,  1.11it/s]

[I 2025-07-15 03:16:21,172] Trial 90 finished with value: 0.35043431372549017 and parameters: {'w_xgb': 0.7187130891933684, 'w_cat': 0.5734050591863493}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  92%|█████████▏| 92/100 [01:23<00:07,  1.11it/s]

[I 2025-07-15 03:16:22,068] Trial 91 finished with value: 0.3515641176470588 and parameters: {'w_xgb': 0.8121161568221801, 'w_cat': 0.19386810582956346}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  93%|█████████▎| 93/100 [01:24<00:06,  1.08it/s]

[I 2025-07-15 03:16:23,066] Trial 92 finished with value: 0.3515917647058823 and parameters: {'w_xgb': 0.7749374486709466, 'w_cat': 0.1813819245931895}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  94%|█████████▍| 94/100 [01:25<00:05,  1.07it/s]

[I 2025-07-15 03:16:24,010] Trial 93 finished with value: 0.35156529411764703 and parameters: {'w_xgb': 0.7525674199506497, 'w_cat': 0.2143800245057965}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  95%|█████████▌| 95/100 [01:26<00:04,  1.09it/s]

[I 2025-07-15 03:16:24,901] Trial 94 finished with value: 0.351401568627451 and parameters: {'w_xgb': 0.7814315988362457, 'w_cat': 0.08361923578850372}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  96%|█████████▌| 96/100 [01:26<00:03,  1.09it/s]

[I 2025-07-15 03:16:25,806] Trial 95 finished with value: 0.35155549019607846 and parameters: {'w_xgb': 0.7029105892336474, 'w_cat': 0.17468942922746347}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  97%|█████████▋| 97/100 [01:27<00:02,  1.09it/s]

[I 2025-07-15 03:16:26,719] Trial 96 finished with value: 0.35151078431372546 and parameters: {'w_xgb': 0.8433823841989553, 'w_cat': 0.14265427259207042}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  98%|█████████▊| 98/100 [01:28<00:01,  1.11it/s]

[I 2025-07-15 03:16:27,582] Trial 97 finished with value: 0.35146372549019606 and parameters: {'w_xgb': 0.8209230755336969, 'w_cat': 0.29913614110837594}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593:  99%|█████████▉| 99/100 [01:29<00:00,  1.11it/s]

[I 2025-07-15 03:16:28,490] Trial 98 finished with value: 0.3510780392156863 and parameters: {'w_xgb': 0.41191062511379295, 'w_cat': 0.2340776212978869}. Best is trial 78 with value: 0.351593137254902.


Best trial: 78. Best value: 0.351593: 100%|██████████| 100/100 [01:30<00:00,  1.10it/s]

[I 2025-07-15 03:16:29,412] Trial 99 finished with value: 0.35144509803921575 and parameters: {'w_xgb': 0.9381372581642947, 'w_cat': 0.11929888896597418}. Best is trial 78 with value: 0.351593137254902.

Best weights found: {'w_xgb': 0.8664966498370847, 'w_cat': 0.19963464587757696}





In [22]:
# Cell 5: Create Final Submission with Optimized Weights
# =============================================================================
print("\n--- Step 5: Creating final submission with optimal weights ---")

# Blend the test set predictions using the best weights found by Optuna
w_xgb = best_weights['w_xgb']
w_cat = best_weights['w_cat']
# Normalize the weights so they sum to 1
total_weight = w_xgb + w_cat
final_ensemble_proba = ((w_xgb / total_weight) * test_preds_xgb) + ((w_cat / total_weight) * test_preds_cat)

# --- Create the submission file ---
top_3_indices = np.argsort(final_ensemble_proba, axis=1)[:, -3:][:, ::-1]
predicted_names_list = [' '.join(le.inverse_transform(indices)) for indices in top_3_indices]
submission_df = pd.DataFrame({'id': test_ids, 'Fertilizer Name': predicted_names_list})
submission_df.to_csv('submission_optimized_ensemble.csv', index=False)

print("\nSUCCESS! Final optimized ensemble submission file created.")
print(submission_df.head())


--- Step 5: Creating final submission with optimal weights ---

SUCCESS! Final optimized ensemble submission file created.
       id      Fertilizer Name
0  750000      DAP 28-28 20-20
1  750001  17-17-17 20-20 Urea
2  750002      20-20 28-28 DAP
3  750003    14-35-14 DAP Urea
4  750004  20-20 10-26-26 Urea
