# 📘 XGBoost Step-by-Step Example (Manual Calculation)

This notebook walks through one full iteration of XGBoost for binary classification on a small dataset, showing all internal calculations (log-odds, gradients, hessians, gain, leaf weights, and prediction updates).

In [None]:
# 📊 Step 1: Prepare Dataset
import pandas as pd
import numpy as np
import math

data = pd.DataFrame({
    'ID': [1, 2, 3, 4],
    'Age': [25, 45, 35, 22],
    'Salary': [50, 80, 60, 45],
    'EduLevel': [1, 2, 1, 0],
    'Gender': [0, 1, 0, 1],
    'CreditScore': [700, 780, 720, 680],
    'Y': [0, 1, 1, 0]
})
data

In [None]:
# 🧮 Step 2: Initial Log-Odds and Prediction
y_mean = data['Y'].mean()
f0 = math.log(y_mean / (1 - y_mean))
data['f0'] = f0
data['y_hat0'] = 1 / (1 + np.exp(-f0))
data

In [None]:
# 📉 Step 3: Gradients and Hessians
data['grad'] = data['y_hat0'] - data['Y']
data['hess'] = data['y_hat0'] * (1 - data['y_hat0'])
data

In [None]:
# 🌳 Step 4: Try Split on Age < 30
left = data[data['Age'] < 30]
right = data[data['Age'] >= 30]

gL, hL = left['grad'].sum(), left['hess'].sum()
gR, hR = right['grad'].sum(), right['hess'].sum()
gT, hT = data['grad'].sum(), data['hess'].sum()

lambda_ = 1
gain = 0.5 * ((gL**2 / (hL + lambda_)) + (gR**2 / (hR + lambda_)) - (gT**2 / (hT + lambda_)))
gain

In [None]:
# 🍃 Step 5: Compute Leaf Weights
wL = -gL / (hL + lambda_)
wR = -gR / (hR + lambda_)
wL, wR

In [None]:
# ➕ Step 6: Update Raw Predictions (f1)
eta = 0.1
data['leaf'] = np.where(data['Age'] < 30, 'L', 'R')
data['weight'] = np.where(data['leaf'] == 'L', wL, wR)
data['f1'] = data['f0'] + eta * data['weight']
data['y_hat1'] = 1 / (1 + np.exp(-data['f1']))
data[['ID', 'f0', 'y_hat0', 'grad', 'hess', 'f1', 'y_hat1']]