In [None]:
import numpy as np
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# 生成一个示例数据集，20个特征，15个有用的特征，5个冗余特征
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, 
                         n_redundant=5, random_state=42)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建基分类器 - 决策树
base_estimator = DecisionTreeClassifier(max_depth=3)

# 创建Bagging分类器
bagging = BaggingClassifier(
    estimator=base_estimator, 
    n_estimators=10,  # 集成10个基分类器
    max_samples=0.8,   # 每个基分类器使用80%的训练样本
    max_features=0.8,  # 每个基分类器使用80%的特征
    random_state=42)

# 训练模型
bagging.fit(X_train, y_train)

# 评估模型
train_score = bagging.score(X_train, y_train)
test_score = bagging.score(X_test, y_test)

print(f"训练集准确率: {train_score:.3f}")
print(f"测试集准确率: {test_score:.3f}")


训练集准确率: 0.855
测试集准确率: 0.845


In [3]:
from sklearn.ensemble import AdaBoostClassifier

# 创建AdaBoost分类器
adaboost = AdaBoostClassifier(
    estimator=base_estimator,
    n_estimators=10,  # 使用相同数量的基分类器便于比较
    random_state=42
)

# 训练AdaBoost模型
adaboost.fit(X_train, y_train)

# 评估AdaBoost模型
ada_train_score = adaboost.score(X_train, y_train)
ada_test_score = adaboost.score(X_test, y_test)

print("\nAdaBoost模型评估结果:")
print(f"训练集准确率: {ada_train_score:.3f}")
print(f"测试集准确率: {ada_test_score:.3f}")

print("\n两种方法对比:")
print("Bagging vs AdaBoost")
print(f"训练集准确率: {train_score:.3f} vs {ada_train_score:.3f}")
print(f"测试集准确率: {test_score:.3f} vs {ada_test_score:.3f}")



AdaBoost模型评估结果:
训练集准确率: 0.955
测试集准确率: 0.870

两种方法对比:
Bagging vs AdaBoost
训练集准确率: 0.855 vs 0.955
测试集准确率: 0.845 vs 0.870
