# XGB Step 3 â€” Train XGBoost model

In [None]:
try:
    import xgboost
except Exception as e:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "xgboost"])

import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib, os

df = pd.read_csv("data/high_salary.csv")
y = df["label"]
X = df.drop(columns=["label"])

X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1,
    eval_metric="logloss",
)
model.fit(X_train, y_train)

pred = model.predict(X_valid)
print("Accuracy:", accuracy_score(y_valid, pred))
print(classification_report(y_valid, pred))

os.makedirs("XGB/model", exist_ok=True)
joblib.dump(model, "XGB/model/modelXGB.joblib")
print("Saved model to XGB/model/modelXGB.joblib")
