# 分数位随机森林

In [12]:
# 分数位随机森林
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm

In [5]:
# 加载数据
data = pd.read_csv('../../datafile/SY_POINT_MERGES.csv')
data.rename(columns={"pre_2022_mean_Band_1":"pre","tmp_2022_mean_Band_1":"tmp","SY_ASP_5_Band_1":"asp","SY_SLOP_5_Band_1":"slope","SY_DEM_5_Band_1":"dem","DLMC":"dl"},inplace=True)
data.sample(1)

Unnamed: 0.1,Unnamed: 0,X,Y,pre,tmp,asp,slope,dem,dl,PH
3593,3595,107.120082,28.358279,840.25,157.416672,233.249374,43.860817,922.0,旱地,5.1


In [9]:
# 删除有缺失值的行
data.dropna(inplace=True)
missing_values = data.isnull().sum()
missing_values

Unnamed: 0    0
X             0
Y             0
pre           0
tmp           0
asp           0
slope         0
dem           0
dl            0
PH            0
dtype: int64

In [18]:
# 假设您已经有一个包含特征和目标变量（pH值）的数据集
# 提取特征和目标变量
le =LabelEncoder()
data['dl'] = le.fit_transform(data['dl'])





In [26]:
def get_result(data,n_tree,max_depth,random_state,min_samples_leaf):
    X = data[['pre','tmp','asp','slope','dem','dl']]

    y = data['PH']

    # 分割数据集为训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    # 创建并训练QRF模型
    # 可以根据需要调整QRF的参数，例如：n_estimators（树的数量）、max_depth（树的最大深度）等
    qrf_model = GradientBoostingRegressor(loss='quantile', alpha=0.95, n_estimators=n_tree, max_depth=max_depth, random_state=random_state,min_samples_leaf=min_samples_leaf)
    qrf_model.fit(X_train, y_train)

    # 添加常数项（截距）
    X_train = sm.add_constant(X_train)
    X_test = sm.add_constant(X_test)

    # 创建并拟合分位数回归模型
    # 这里我们分别获取分位数为0.05和0.95的回归结果
    qrf_model_lower = sm.QuantReg(y_train, X_train).fit(q=0.05)
    qrf_model_upper = sm.QuantReg(y_train, X_train).fit(q=0.95)

    # 使用测试集进行预测
    y_pred_lower = qrf_model_lower.predict(X_test)
    y_pred_upper = qrf_model_upper.predict(X_test)

    # 使用0.5分位数来作为中位数预测
    y_pred_median = sm.QuantReg(y_train, X_train).fit(q=0.5).predict(X_test)

    # 评估模型性能
    mse = mean_squared_error(y_test, y_pred_median)
    r2 = r2_score(y_test, y_pred_median)

    # 评估测试集中的新能
    return mse,r2

In [27]:
r2_scores = []
# 得到最好的参数
for tree_value in range(100, 900):
    r2 = get_result(data,tree_value,5,42,5)
    r2_scores.append(r2)

TypeError: 'list' object is not callable