<!-- Google Fonts -->
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap" rel="stylesheet">

<div style="
    border-radius: 15px; 
    border: 2px solid #8B4513; 
    padding: 20px; 
    background: linear-gradient(125deg, #d73027 7%, #fdae61 47%, #a6d96a 85%, #1a9850 100%);
    text-align: center; 
    box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.5);
">
    <h1 style="
        color: #fff; 
        text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.7); 
        font-weight: bold; 
        margin-bottom: 10px; 
        font-size: 36px; 
        font-family: 'Roboto', sans-serif;
        letter-spacing: 1px;
    ">
        🌾 Smart Fertilizer Ranker | XGBoost + MAP@3 🔢
    </h1>
</div>

In [1]:
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings("ignore")

# Start timer
start_time = time.time()


In [2]:
# Load datasets
train_data = pd.read_csv('/kaggle/input/playground-series-s5e6/train.csv')
test_data = pd.read_csv('/kaggle/input/playground-series-s5e6/test.csv')
sample_data = pd.read_csv('/kaggle/input/playground-series-s5e6/sample_submission.csv')
original_data = pd.read_csv('/kaggle/input/fertilizer-prediction/Fertilizer Prediction.csv')

print("📂 Datasets loaded successfully!")


📂 Datasets loaded successfully!


In [3]:
# Fix column name typo
for df in [train_data, test_data, original_data]:
    df.rename(columns={'Temparature': 'Temperature'}, inplace=True)
print("✅ Fixed column name: 'Temparature' ➝ 'Temperature'")


✅ Fixed column name: 'Temparature' ➝ 'Temperature'


In [4]:
# Augment original dataset
original_data = pd.concat([original_data]*7, ignore_index=True)
print(f"🧪 Original dataset replicated to match training size (shape: {original_data.shape})")


🧪 Original dataset replicated to match training size (shape: (700000, 9))


In [5]:
# Feature engineering: bin numerical features
def feature_eng(df):
    for col in df.select_dtypes(include=['int64', 'float64']).columns:
        if col != 'id':
            df[f'{col}_Binned'] = df[col].astype(str).astype('category')
    return df

train_data = feature_eng(train_data)
test_data = feature_eng(test_data)
original_data = feature_eng(original_data)
print("📊 Feature engineering complete.")


📊 Feature engineering complete.


In [6]:
# Encode categorical variables
cat_cols = [col for col in train_data.select_dtypes(include=['object', 'category']) if col != 'Fertilizer Name']
le_dict = {}

for col in cat_cols:
    le = LabelEncoder()
    train_data[col] = le.fit_transform(train_data[col])
    test_data[col] = le.transform(test_data[col])
    original_data[col] = le.fit_transform(original_data[col])
    le_dict[col] = le
    

In [7]:
# Encode target variable
target_le = LabelEncoder()
train_data["Fertilizer Name"] = target_le.fit_transform(train_data["Fertilizer Name"])
original_data["Fertilizer Name"] = target_le.fit_transform(original_data["Fertilizer Name"])


In [8]:
# Cast to category type
for col in cat_cols:
    train_data[col] = train_data[col].astype("category")
    test_data[col] = test_data[col].astype("category")
    original_data[col] = original_data[col].astype("category")
    

In [9]:
# Split data
X = train_data.drop(columns=["id", "Fertilizer Name"])
y = train_data["Fertilizer Name"]
X_test = test_data.drop(columns=["id"])
X_original = original_data.drop(columns=["Fertilizer Name"])
y_original = original_data["Fertilizer Name"]


In [10]:
# XGBoost hyperparameters
params = {
    'objective': 'multi:softprob',
    'num_class': y.nunique(),
    'max_depth': 7,
    'learning_rate': 0.03,
    'subsample': 0.8,
    'colsample_bytree': 0.2,
    'tree_method': 'gpu_hist',
    'random_state': 42,
    'eval_metric': 'mlogloss',
    'enable_categorical': True,
    'n_estimators': 10000,
    'early_stopping_rounds': 50
}


In [11]:
# MAP@3 evaluation metric
def mapk(actual, predicted, k=3):
    def apk(a, p, k):
        p = p[:k]
        score, hits = 0.0, 0
        seen = set()
        for i, pred in enumerate(p):
            if pred in a and pred not in seen:
                hits += 1
                score += hits / (i + 1.0)
                seen.add(pred)
        return score / min(len(a), k)
    return np.mean([apk(a, p, k) for a, p in zip(actual, predicted)])
    

In [12]:
# 10-Fold Stratified CV
FOLDS = 10
skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=42)
oof = np.zeros((len(train_data), y.nunique()))
pred_prob = np.zeros((len(test_data), y.nunique()))
map3_scores = []

print(f"🚀 Starting {FOLDS}-Fold Stratified Cross-Validation...")

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):
    print(f"\n🔄 Fold {fold}/{FOLDS} in progress...")

    x_train, x_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Combine with original data
    x_train = pd.concat([x_train, X_original], ignore_index=True)
    y_train = pd.concat([y_train, y_original], ignore_index=True)

    model = XGBClassifier(**params)
    model.fit(x_train, y_train, eval_set=[(x_val, y_val)], verbose=100)

    oof[val_idx] = model.predict_proba(x_val)
    pred_prob += model.predict_proba(X_test)

    top3 = np.argsort(oof[val_idx], axis=1)[:, -3:][:, ::-1]
    actual = [[label] for label in y_val]
    score = mapk(actual, top3)
    map3_scores.append(score)

    print(f"✅ Fold {fold} complete — MAP@3: {score:.5f}")

# Final evaluation
avg_map3 = np.mean(map3_scores)
print(f"\n🎯 Average MAP@3 across {FOLDS} folds: {avg_map3:.5f}")


🚀 Starting 10-Fold Stratified Cross-Validation...

🔄 Fold 1/10 in progress...
[0]	validation_0-mlogloss:1.94575
[100]	validation_0-mlogloss:1.93373
[200]	validation_0-mlogloss:1.92688
[300]	validation_0-mlogloss:1.92173
[400]	validation_0-mlogloss:1.91763
[500]	validation_0-mlogloss:1.91421
[600]	validation_0-mlogloss:1.91103
[700]	validation_0-mlogloss:1.90814
[800]	validation_0-mlogloss:1.90578
[900]	validation_0-mlogloss:1.90365
[1000]	validation_0-mlogloss:1.90168
[1100]	validation_0-mlogloss:1.89980
[1200]	validation_0-mlogloss:1.89834
[1300]	validation_0-mlogloss:1.89695
[1400]	validation_0-mlogloss:1.89561
[1500]	validation_0-mlogloss:1.89438
[1600]	validation_0-mlogloss:1.89333
[1700]	validation_0-mlogloss:1.89244
[1800]	validation_0-mlogloss:1.89153
[1900]	validation_0-mlogloss:1.89076
[2000]	validation_0-mlogloss:1.89009
[2100]	validation_0-mlogloss:1.88950
[2200]	validation_0-mlogloss:1.88898
[2300]	validation_0-mlogloss:1.88853
[2400]	validation_0-mlogloss:1.88819
[2500]	va

In [13]:
# Generate submission file
top_3_preds = np.argsort(pred_prob, axis=1)[:, -3:][:, ::-1]
top_3_labels = target_le.inverse_transform(top_3_preds.ravel()).reshape(top_3_preds.shape)
submission = pd.DataFrame({
    'id': test_data['id'],
    'Fertilizer Name': [' '.join(row) for row in top_3_labels]
})
submission.to_csv("submission.csv", index=False)
print("📁 Submission file saved!")


📁 Submission file saved!


In [14]:
# End timing
elapsed_time = time.time() - start_time
print(f"\n⏱️ Total Training Time: {elapsed_time / 60:.2f} minutes")



⏱️ Total Training Time: 62.08 minutes


In [15]:
# Print Submission preview
print(submission.head(10))


       id             Fertilizer Name
0  750000          10-26-26 20-20 DAP
1  750001      17-17-17 10-26-26 Urea
2  750002              20-20 Urea DAP
3  750003      14-35-14 Urea 10-26-26
4  750004         20-20 Urea 17-17-17
5  750005        28-28 20-20 14-35-14
6  750006     28-28 10-26-26 14-35-14
7  750007          28-28 DAP 10-26-26
8  750008  10-26-26 17-17-17 14-35-14
9  750009        28-28 14-35-14 20-20


<!-- Include Google Fonts for a modern font -->
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap" rel="stylesheet">

<div style="
    border-radius: 15px; 
    border: 2px solid #a63603; /* Darker warm brown to match palette */
    padding: 20px; 
    background: linear-gradient(135deg, #d73027, #fdae61, #a6d96a, #1a9850); 
    text-align: center; 
    box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.5);
">
    <h1 style="
        color: #ffffff; 
        text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.7); 
        font-weight: bold; 
        margin-bottom: 10px; 
        font-size: 28px; 
        font-family: 'Roboto', sans-serif;
    ">
        🙏 Thanks for Reading! 🚀
    </h1>
    <p style="color: #ffffff; font-size: 18px; text-align: center;">
        Happy Coding! 🙌😊
    </p>
</div>
