In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error

from xgboost import XGBClassifier, XGBRegressor

In [2]:
data = {
    "age": [22, 25, 30, 35, 40, 45, 50, 55, 60, 65],
    "salary": [30000, 35000, 50000, 60000, 65000, 70000, 90000, 120000, 130000, 150000],
    "experience": [1, 2, 5, 7, 10, 12, 18, 25, 30, 35],
    "target": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
}

df = pd.DataFrame(data)

X = df[["age", "salary", "experience"]]
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = .25,
    stratify = y,
    random_state = 42
)

In [4]:
# Exercise 1 — XGBoost Classification===========================
    # Train classifier
    # Evaluate accuracy

xgb = XGBClassifier(
    n_estimators = 100,     ## ==> 100 boosted trees
    learning_rate = .1,     # slow, stable learning
    max_depth = 3,          # weak trees (prevents overfitting)
    subsample = .8,         
    colsample_bytree = .8,  # subsample & colsample => randomness => better generalization
    eval_metric = "logloss",
    random_state = 42
)

xgb.fit(X_train, y_train)

y_pred = xgb.predict(X_test)

print("XGBoost Accuracy: ", accuracy_score(y_test, y_pred))

XGBoost Accuracy:  0.6666666666666666


In [5]:
# Exercise 2 — Regularization Effect ===========================
    # Compare alpha/lambda

xgb_reg_strong = XGBClassifier(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=4,
    reg_alpha=1.0,     # L1 regularization  => removes useless splits
    reg_lambda=10.0,   # L2 regularization  => Penalizes large trees
    eval_metric="logloss",
    random_state=42
)

xgb_reg_strong.fit(X_train, y_train)

y_pred = xgb_reg_strong.predict(X_test)

print("Accuracy with strong regularization:", accuracy_score(y_test, y_pred))

Accuracy with strong regularization: 0.6666666666666666


In [None]:
# Exercise 3 — Feature Importance    ===========================
    # Print & interpret

