In [3]:
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings("ignore")

# 读取数据
dataTrain=pd.read_csv("allAtt_onehot_large_train_new8.csv")
dataTest=pd.read_csv("allAtt_onehot_large_test_new8.csv")
# 假设数据已经准备好
x_train, y_train = dataTrain.iloc[:,4:38].values,dataTrain.iloc[:,38:].values
x_test, y_test = dataTest.iloc[:,4:38].values,dataTest.iloc[:,38:].values

# 如果 y 是 one-hot 编码，则转为整数标签
y_train_int = np.argmax(y_train, axis=1)
y_test_int = np.argmax(y_test, axis=1)

# 构建 CatBoostClassifier（二分类）
model = CatBoostClassifier(
    iterations=500,
    learning_rate=0.1,
    depth=6,
    loss_function='MultiClass',
    eval_metric='Accuracy',
    verbose=0,  # 训练过程不打印输出
    random_seed=42
)

# 训练模型
model.fit(x_train, y_train_int)

# 预测
y_pred = model.predict(x_test)
y_pred = y_pred.flatten()  # 保证是1D向量

# 评估
acc = accuracy_score(y_test_int, y_pred)
print(f"✅ Accuracy: {acc:.4f}")
print("📊 Classification Report:")
print(classification_report(y_test_int, y_pred))

✅ Accuracy: 0.6868
📊 Classification Report:
              precision    recall  f1-score   support

           0       0.66      0.66      0.66       175
           1       0.71      0.71      0.71       205

    accuracy                           0.69       380
   macro avg       0.68      0.69      0.69       380
weighted avg       0.69      0.69      0.69       380


In [5]:
import joblib

joblib.dump(model, 'models/catboost_model.pkl')

['models/catboost_model.pkl']

In [6]:
model = joblib.load('models/catboost_model.pkl')