In [1]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 加载数据
df1 = pd.read_csv("data1.csv")

# 假设'X1'到'X24'是特征列，'Y(1=default, 0=non-default)'是标签列
X = df1.loc[:, 'X1':'X24']  # 选择特征列
y = df1['Y(1=default, 0=non-default)']  # 选择标签列

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 将pandas DataFrame转换为DMatrix格式，这是XGBoost的专用格式
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# 设置XGBoost的参数
params = {
    'max_depth': 3,  # 树的深度
    'eta': 0.3,  # 学习率
    'objective': 'binary:logistic',  # 因为是二分类问题
    'eval_metric': 'logloss'  # 评估指标，这里使用对数损失
}

# 训练模型
num_round = 100  # 训练轮数
bst = xgb.train(params, dtrain, num_round)

# 使用训练好的模型进行预测
y_pred = bst.predict(dtest)
# 因为XGBoost输出的是概率值，我们需要将概率转换为二分类结果
y_pred_class = [1 if x > 0.5 else 0 for x in y_pred]

# 计算准确率
accuracy = accuracy_score(y_test, y_pred_class)
print(f"Accuracy: {accuracy:.2%}")

# 如果你想要保存模型以便后续使用
bst.save_model('xgb_model.model')

# 如果你想要加载模型进行预测
# loaded_model = xgb.Booster()
# loaded_model.load_model('xgb_model.model')
# predictions_on_new_data = loaded_model.predict(new_data_dmatrix)

Accuracy: 81.00%


