In [3]:
import pathlib
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import xgboost as xgb

In [4]:
DATA_PATH = pathlib.Path("UCI_Credit_Card.csv")
if not DATA_PATH.exists():
    raise FileNotFoundError(
        "Arquivo 'UCI_Credit_Card.csv' n√£o encontrado. Coloque-o na mesma pasta "
        "ou ajuste DATA_PATH.")

print("üìä  Carregando dados‚Ä¶")
df = pd.read_csv(DATA_PATH)
print(f"‚úîÔ∏è  Linhas: {len(df):,}; Colunas: {len(df.columns)}")

TARGET_COL = "default.payment.next.month"  # 1: sim (default), 0: n√£o
ID_COL = "ID"   

üìä  Carregando dados‚Ä¶
‚úîÔ∏è  Linhas: 30,000; Colunas: 25


In [5]:
X = df.drop(columns=[ID_COL, TARGET_COL])
y = df[TARGET_COL]
FEATURE_NAMES = X.columns.tolist()

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [10]:
xgb_params = dict(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="binary:logistic",
    eval_metric="logloss",
    random_state=42,
)

In [11]:
print("üöÄ  Treinando XGBoost‚Ä¶")
model = xgb.XGBClassifier(**xgb_params)
model.fit(X_train, y_train)

üöÄ  Treinando XGBoost‚Ä¶


In [12]:
y_pred_proba = model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_pred_proba)
print(f"üéØ  AUC no teste: {auc:.4f}")

üéØ  AUC no teste: 0.7748


In [9]:
print("üíæ  Salvando booster em JSON‚Ä¶")
booster = model.get_booster()
booster.save_model("credit_default_xgb.json")

print("üíæ  Salvando Pipeline completa (opcional)‚Ä¶")
try:
    import joblib

    joblib.dump(model, "credit_default_xgb.pkl")
except ImportError:
    print("‚ö†Ô∏è  joblib n√£o instalado; pulei .pkl. Use 'pip install joblib' se quiser.")

print("‚úÖ  Conclu√≠do!")

üíæ  Salvando booster em JSON‚Ä¶
üíæ  Salvando Pipeline completa (opcional)‚Ä¶
‚úÖ  Conclu√≠do!
