In [4]:
# 导入库
from RandomForest import RandomForest
from sklearn.datasets import fetch_california_housing, load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor


### 葡萄酒数据集（分类任务）

In [5]:
# 加载 Wine 数据集
data = load_wine()
X = data.data
y = data.target

# 分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# 创建自定义随机森林实例（分类任务）
rf_classification = RandomForest(
    n_estimators=10,  
    max_depth=10,
    min_samples_split=2,
    max_features="sqrt",
    task='classification',
)

# 训练自定义随机森林
rf_classification.fit(X_train, y_train)

# 预测测试集
y_pred_class = rf_classification.predict(X_test)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred_class)
print(f"自定义随机森林（分类）准确率: {accuracy * 100:.2f}%")

# 创建 sklearn 随机森林实例（分类任务）
sklearn_rf_classification = RandomForestClassifier(
    n_estimators=10, 
    max_depth=10, 
    min_samples_split=2, 
    max_features="sqrt", 
    random_state=42
)

# 训练 sklearn 随机森林
sklearn_rf_classification.fit(X_train, y_train)

# 预测测试集
y_pred_sklearn_class = sklearn_rf_classification.predict(X_test)

# 计算准确率
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn_class)
print(f"sklearn 随机森林（分类）准确率: {accuracy_sklearn * 100:.2f}%")


自定义随机森林（分类）准确率: 100.00%
sklearn 随机森林（分类）准确率: 96.30%


### 加利福尼亚房价数据集 - 回归任务

In [6]:
# 加载加州房价数据集
california = fetch_california_housing()
X = california.data
y = california.target

# 分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 创建自定义随机森林实例（回归任务）
rf_regression = RandomForest(
    n_estimators=10,  # 树的数量，可以根据需要调整
    max_depth=10,
    min_samples_split=2,
    max_features="sqrt",
    task='regression',
)

# 训练自定义随机森林
rf_regression.fit(X_train, y_train)

# 预测测试集
y_pred_reg = rf_regression.predict(X_test)

# 计算回归指标
mse = mean_squared_error(y_test, y_pred_reg)
r2 = r2_score(y_test, y_pred_reg)
print(f"自定义随机森林（回归）MSE: {mse:.4f}")
print(f"自定义随机森林（回归）R2 Score: {r2:.4f}")

# 创建 sklearn 随机森林实例（回归任务）
sklearn_rf_regression = RandomForestRegressor(
    n_estimators=10, max_depth=10, min_samples_split=2, max_features="sqrt", random_state=42
)

# 训练 sklearn 随机森林
sklearn_rf_regression.fit(X_train, y_train)

# 预测测试集
y_pred_sklearn_reg = sklearn_rf_regression.predict(X_test)

# 计算回归指标
mse_sklearn = mean_squared_error(y_test, y_pred_sklearn_reg)
r2_sklearn = r2_score(y_test, y_pred_sklearn_reg)
print(f"sklearn 随机森林（回归）MSE: {mse_sklearn:.4f}")
print(f"sklearn 随机森林（回归）R2 Score: {r2_sklearn:.4f}")

自定义随机森林（回归）MSE: 0.3205
自定义随机森林（回归）R2 Score: 0.7558
sklearn 随机森林（回归）MSE: 0.3316
sklearn 随机森林（回归）R2 Score: 0.7473
