In [1]:
import pandas as pd
import numpy as np
import time
import psutil
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from xgboost import XGBClassifier
import os

In [2]:
results = []

def split_features(X):
    cat_cols = X.select_dtypes(include=["object", "category"]).columns.tolist()
    num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
    return num_cols, cat_cols

def build_preprocessor(num_cols, cat_cols):
    numeric_transformer = StandardScaler()
    categorical_transformer = OrdinalEncoder(
        handle_unknown="use_encoded_value",
        unknown_value=-1
    )

    preprocessor = ColumnTransformer([
        ("num", numeric_transformer, num_cols),
        ("cat", categorical_transformer, cat_cols),
    ])
    
    return preprocessor

def evaluate_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=999, stratify=y)

    start_time = time.time()
    process = psutil.Process()
    process.cpu_percent(interval=None)
    cpu_before = psutil.cpu_percent(interval=None)

    model = XGBClassifier(
        n_estimators=100,
        use_label_encoder=False,
        eval_metric='logloss',
        random_state=999,
        n_jobs=-1
    )
    
    model.fit(X_train, y_train)

    end_time = time.time()
    elapsed_time = end_time - start_time
    cpu_after = psutil.cpu_percent(interval=None)
    cpu_occupied = cpu_after - cpu_before
    memory_used = process.memory_info().rss / (1024 ** 2)

    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)

    print(f"Accuracy: {acc:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"AUC: {auc:.4f}")
    print(f"time: {elapsed_time:.2f} seconds")
    print(f"cpu_occupied: {cpu_occupied}%")
    print(f"Memory Used: {memory_used:.2f} MB")
    
    results.append({
        'Accuracy': acc,
        'F1-score': f1,
        'AUC': auc,
        'time': elapsed_time,
        'cpu_occupied(%)': cpu_occupied,
        'Memory_Used (MB)': memory_used
    })

In [3]:
file_path = r"C:\Users\张凤智\Downloads\credit\creditcard.csv"
save_path = r"D:\DSS5104\XGBoost\xgboost_result\999\CreditCard\CreditCard.csv"

df = pd.read_csv(file_path)

y = df['Class']
X = df.drop(columns=['Class'])

num_cols, cat_cols = split_features(X)
preprocessor = build_preprocessor(num_cols, cat_cols)

X_processed = preprocessor.fit_transform(X)

evaluate_model(X_processed, y)

os.makedirs(os.path.dirname(save_path), exist_ok=True)
results_df = pd.DataFrame(results)
results_df.to_csv(save_path, index=False)
print(f"\nSaved all: {save_path}")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.9992
F1-score: 0.7582
AUC: 0.8773
time: 2.64 seconds
cpu_occupied: 63.599999999999994%
Memory Used: 450.00 MB

Saved all: D:\DSS5104\XGBoost\xgboost_result\999\CreditCard\CreditCard.csv
