In [1]:
import random
import datetime

import shap
import numpy as np
import pandas as pd
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt
import category_encoders as ce

from xgboost import plot_importance
from xgboost.sklearn import XGBRegressor
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor
from sklearn import neighbors
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR

from joblib import dump, load
from sklearn import preprocessing
from sklearn import linear_model
from sklearn.feature_selection import RFECV
from sklearn.feature_selection import SelectKBest
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import learning_curve
from sklearn.feature_selection import f_regression, mutual_info_regression
from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score

from sklearn.model_selection import train_test_split



def read_data(path):
    '''读取数据'''
    try:
        data = pd.read_csv(path, engine='python', encoding='utf8')
        return data
    except FileNotFoundError:
        print('File not found. Please check the path and filename.')
        return None
    except Exception as e:
        print('An error occurred:', e)
        return None

def assess_metrics(truth,pred):
    '''MAE、MSE、RMSE、决定系数'''
    print('----')
    print('MAE: %.4f'%mean_absolute_error(truth,pred))
    print('MSE: %.4f'%mean_squared_error(truth,pred))
    print('RMSE: %.4f'%mean_squared_error(truth,pred,squared=False))
    print('R2: %.4f'%r2_score(truth,pred))
    return mean_squared_error(truth,pred,squared=False),r2_score(truth,pred)

def model_result(est,X_train_val, X_test, y_train_val, y_test):
    '''Train and test a model, and print the results'''
    # training
    y_train_pred = est.predict(X_train_val)
    print('train metrics')
    assess_metrics(y_train_val, y_train_pred)
    # test
    print('test metrics')
    y_test_pred = est.predict(X_test)
    RMSE, R2 = assess_metrics(y_test, y_test_pred)
    return RMSE, R2
def model_lineresult(est,model,year,X_train_val, X_test, y_train_val, y_test):
    '''Train and test a linear regression model, and print the results in a line'''
    date = datetime.datetime.now().strftime('%Y-%m-%d')
    # train and test
    y_train_pred = est.predict(X_train_val)
    y_test_pred = est.predict(X_test)
    print('{} {} {} 【Train_RMSE: {:.2f}, Train_R2: {:.2f}, Test_RMSE: {:.4f}, Test_R2: {:.2f}】'
          .format(date,year,model,mean_squared_error(y_train_val, y_train_pred,squared=False),r2_score(y_train_val, y_train_pred),
          mean_squared_error(y_test, y_test_pred,squared=False),r2_score(y_test, y_test_pred)))
    return mean_squared_error(y_test, y_test_pred,squared=False),r2_score(y_test, y_test_pred)


In [31]:
def yield_predictor(data, test_year=[2020], N=2, experiment = 'GDT', croptype = 'soybean', model = ['XGBoost'], verbose = 0,
                    tune_model = False, model_parameter=None, result = False, result_dir = '', feature_importance = False,
                    fea_eng = False,seed = 99,save_model=False, save_model_dir='', trend_n = 30,
                    importance_dir = '', feature_list = None, n_features = 200, 
                    metric_list = [],feature_name = [],feature_type = 'one_feature',col = None):
    '''Yield predictor
    Parameters:
    
    '''
    if feature_type == 'one_feature':
        print('feature name:',feature_name, 'seed:',seed, 'croptype:',croptype,' ','test_year:',test_year[0],' ','model:',model[0],' ',
          'experiment:',experiment,'n_feature：',n_features) 

        feature_name1 = ['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield']+[feature_name]
        print(len(feature_name1)-6)
        data = data[feature_name1]
        print(data.columns.shape[0])
    else:
        print('feature name:',col, 'seed:',seed, 'croptype:',croptype,' ','test_year:',test_year[0],' ','model:',model[0],' ',
          'experiment:',experiment,'n_feature：',n_features) 
        feature_name1 = ['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield']+feature_name
        print(len(feature_name1)-6)
        data = data[feature_name1]
        print(data.columns.shape[0])
        
        # 如果特征数量不足五百，则不管，特征数量超过五百，取前存在的五百个
        common_features = list(set(feature_list['feature']) & set(data.columns))
        n_common_features = len(common_features)

        if n_common_features > n_features:
            feature_name = feature_list.loc[feature_list['feature'].isin(common_features), 'feature'].iloc[:n_features].tolist()
        else:
            feature_name = feature_list.loc[feature_list['feature'].isin(common_features), 'feature'].tolist()
        
    # split train-validation and test data
    # testset
    data_test = data[data['Year'].isin(test_year)]
    # tra-val set
    data_train_val = data[data['Year'].isin([i for i in range(test_year[0]-N,test_year[0])])]
    # split input and output
    y_train_val = data_train_val['yield(t/ha)']
    y_test = data_test['yield(t/ha)']
    
    if experiment == 'GDT': # GDT
        # de-trend the yield using linear model
        yield_mean = data_train_val['yield(t/ha)'].mean()
        linear_m = linear_model.LinearRegression()
        
        data_train_val1 = data[data['Year'].isin([i for i in range(test_year[0]-trend_n,test_year[0])])]
        X99 = np.array(data_train_val1['Year']).reshape(-1,1)
        Y99 = np.array(data_train_val1['yield(t/ha)']).reshape(-1,1)

        linear_m.fit(X99, Y99)
        if linear_m.coef_[0,0]<0:
            yield_mean = data_train_val['yield(t/ha)'].mean()
        print('The foluma of de-trend the yield: yield_new = yield - {:.3f}×year + {:.3f} - ({:.3f})'
              .format(linear_m.coef_[0,0],yield_mean,linear_m.intercept_[0]))

        # de-trend data_train_val yield 
        data1 = data_train_val.copy()
        data1.loc[:,'yield(t/ha)_fix'] = data_train_val['yield(t/ha)'] - linear_m.coef_[0] * data_train_val['Year'] + yield_mean - linear_m.intercept_
        data1 = data1.drop(['yield(t/ha)'], axis = 1).rename({'yield(t/ha)_fix':'yield(t/ha)'}, axis=1)
        data_train_val = data1
        # de-trend data_test yield 
        data1 = data_test.copy()
        data1.loc[:,'yield(t/ha)_fix'] = data_test['yield(t/ha)'] - linear_m.coef_[0] * data_test['Year'] + yield_mean - linear_m.intercept_

        data1 = data1.drop(['yield(t/ha)'], axis = 1).rename({'yield(t/ha)_fix':'yield(t/ha)'}, axis=1)
        data_test = data1
        y_train_val = data_train_val['yield(t/ha)']
        y_test = data_test['yield(t/ha)']
        
        X_train_val = data_train_val.drop(['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield'], axis=1)# 
        X_test = data_test.drop(['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield'], axis=1) # 'Value'
    else: None
 
    # Scale numeric features
    columns_to_scale = X_train_val.columns.tolist()
    std_scaler = preprocessing.StandardScaler().fit(X_train_val[columns_to_scale])
    X_train_val.loc[:,columns_to_scale] = std_scaler.transform(X_train_val[columns_to_scale])
    X_test.loc[:,columns_to_scale] = std_scaler.transform(X_test[columns_to_scale])
    
    # 模型选择 ['LR','RF','KNN','ANN','XGBoost','SVR']
    if 'XGBoost'in model:
        est = XGBRegressor(max_depth= 7,learning_rate=0.09,n_estimators=700,seed=seed,objective= 'reg:gamma',
                           min_child_weight= 3,colsample_bytree=0.7, subsample= 0.8,reg_alpha= 0.1)# mse learning_rate=0.05,n_estimators=1000
        est.fit(X_train_val,y_train_val)
        if verbose == 0:
            RMSE,R2 = model_lineresult(est,'XGBoost',test_year,X_train_val, X_test, y_train_val, y_test)
        else:
            RMSE,R2 = model_result(est,X_train_val, X_test, y_train_val, y_test)
    else: 
        None

    metric_list.append([col,seed, croptype, test_year[0], model[0], N,  RMSE,R2])
                                 
    if result:
        # get the yield and pred
        pred = est.predict(X_test)
        if croptype == 'maize':
            result = pd.concat([data.loc[y_test.index,['sta_con', 'yield(t/ha)']], pd.DataFrame(pred,index = y_test.index,columns=['pred'])],axis=1)
        else:
            result = pd.concat([data.loc[y_test.index,['sta_con', 'yield(t/ha)']], pd.DataFrame(pred,index = y_test.index,columns=['pred'])],axis=1)
        if experiment == 'GDT':
            result.loc[:,'pred'] = result.loc[:,'pred']+linear_m.coef_[0] * data_test['Year'] - yield_mean + linear_m.intercept_
        # result.loc[:,['yield','pred']] = result.loc[:,['yield','pred']]*62.719012*0.001
        result.loc[:,'residual'] =  result.loc[:,'pred'] - result.loc[:,'yield(t/ha)']
        if feature_type == 'one_feature':
            result.to_csv(result_dir +croptype+'/'+feature_name+str(seed) + '_' + str(test_year[0]) + '_' + model[0] + '_' + 'exp'+str(experiment)+'_N'+str(N)+'_预测结果.csv',header=True)
            # print('Results have been saved！')
        else:
            result.to_csv(result_dir +croptype+'/'+str(col)+str(seed) + '_' + str(test_year[0]) + '_' + model[0] + '_' + 'exp'+str(experiment)+'_N'+str(N)+'_预测结果.csv',header=True)
    
    shap_values = None

    return None,None,metric_list

### 多因子特征结果-大豆

In [13]:
# 2014-2021，每10年预测一年
data = read_data('D:/毕业大论文/数据/input_soybean_rename.csv')
featurelist = read_data('D:/毕业大论文/结果/SHAP值结果/soybean/average_feature_importance.csv')

In [26]:
# 特征组合列表
crop_type = 'soybean'
all_features = data.columns.tolist()
more_feature_list = []
RS_sublist = ['NDVI_','EVI_','LSWI_','GCVI_', 'RVI_','SAVI_','WDRVI_','Fpar_', 'LAI_','ET_','LE_','LST_Day_','LST_Night_','Red_','Nir_','Blue_','Green_',
'Nir1_','Swir1_','Swir2_',]
time_list = ['Mid Mar.','Late Mar.','Early Apr.','Mid Apr.','Late Apr.','Early May.','Mid May.','Late May.','Early Jun.','Mid Jun.','Late Jun.','Early Jul.',
                 'Mid Jul.','Late Jul.','Early Aug.','Mid Aug.','Late Aug.','Early Sep.','Mid Sep.','Late Sep.','Early Oct.','Mid Oct.','Late Oct.','Early Nov.',
                 'Mid Nov.','Late Nov.','Early Dec.']
WE_sublist = ['SPI14d_','SPI30d_','SPI90d_','EDDI14d_','EDDI30d_','EDDI90d_','SPEI14d_','SPEI30d_','SPEI90d_','PDSI_','Palmer Z_','Precipitation_','Temp_',
'Humidity_','Pressure_', 'Shortwave_','Longwave_']
phology_list_soybean = ['Planting','Emerging','Blooming','Podding','Dropping leaves','Harvest',]
phology_list_maize = ['Planting','Emerging','Silking','Dough','Dent','Maturity','Harvest',]

#（1）遥感卫星
RS = []
for i in RS_sublist:
    for time in time_list:
        RS.append(i+time)
more_feature_list.append(RS)

#（2）遥感卫星+气候变量
WE = []
for i in WE_sublist:
    for time in time_list:
        WE.append(i+time)
more_feature_list.append(RS+WE)

# 遥感卫星+气候变量+物候信息
PH = []
for i in [element for element in RS_sublist+WE_sublist if element not in ['Precipitation_','Humidity_','Pressure_',]]:
    if crop_type == 'soybean':
        for phology in phology_list_soybean:
            PH.append(i+phology)
    else:
        for phology in phology_list_maize:
            PH.append(i+phology)
more_feature_list.append(RS+WE+PH)

#（4）遥感卫星+气候变量+物候信息+位置编码
LO = ['IL','IN','IA','KS','MI','MN','MO','NE','ND','OH','SD','WI','Longitude', 'Latitude',]
more_feature_list.append(RS+WE+PH+LO)

#（5）遥感卫星+气候变量+物候信息+位置编码+土壤性质
SO = ['AREA_Irrigated', 'AREA', 'PIC', 'CaCO3', 'CEC', 'Drainage', 'EC', 'I_class', 'N_class', 'Max_OM', 'PAWS', 'pH', 'SAR', 'Texture', 'Sand', 'Silt', 'Clay',]
more_feature_list.append(RS+WE+PH+LO+SO)

# print(len(more_feature_list))
# print(more_feature_list)
# more_feature_list = one_feature_list+['EVI_Podding','Latitude','PAWS','SPI30d_Early Sep.','PIC']
# one_feature_list

In [33]:
metric_list = []
for i in range(5):
    for year in range(2014,2022):
            shap_values_train,shap_values_test,metric_list = yield_predictor(data, test_year=[year], N=10, experiment = 'GDT', croptype = crop_type, model = ['XGBoost'], verbose = 0,
                        result = True, seed = 99, feature_list = featurelist, n_features = 500, metric_list = metric_list, 
                        result_dir = 'D:/毕业大论文/结果/特征组合结果/多因子结果/',feature_name = more_feature_list[i],feature_type = 'more_feature',col = i)
        
name = ['feature','seed', 'croptype', 'test_year', 'model', 'Train_years', 'RMSE', 'R2']
print(metric_list)
metric_df = pd.DataFrame(columns = name, data=metric_list)
metric_df.to_csv('D:/毕业大论文/结果/特征组合结果/多因子结果/' + crop_type +'_预测指标.csv',header=True)

feature name: 0 seed: 99 croptype: soybean   test_year: 2014   model: XGBoost   experiment: GDT n_feature： 500
540
546
The foluma of de-trend the yield: yield_new = yield - 0.030×year + 2.915 - (-58.235)
2023-03-14 [2014] XGBoost 【Train_RMSE: 0.02, Train_R2: 1.00, Test_RMSE: 0.2875, Test_R2: 0.72】
feature name: 0 seed: 99 croptype: soybean   test_year: 2015   model: XGBoost   experiment: GDT n_feature： 500
540
546
The foluma of de-trend the yield: yield_new = yield - 0.030×year + 2.962 - (-57.658)
2023-03-14 [2015] XGBoost 【Train_RMSE: 0.02, Train_R2: 1.00, Test_RMSE: 0.3095, Test_R2: 0.70】
feature name: 0 seed: 99 croptype: soybean   test_year: 2016   model: XGBoost   experiment: GDT n_feature： 500
540
546
The foluma of de-trend the yield: yield_new = yield - 0.033×year + 3.003 - (-63.403)
2023-03-14 [2016] XGBoost 【Train_RMSE: 0.02, Train_R2: 1.00, Test_RMSE: 0.3933, Test_R2: 0.37】
feature name: 0 seed: 99 croptype: soybean   test_year: 2017   model: XGBoost   experiment: GDT n_featu

### 多因子特征结果-玉米

In [34]:
# 2014-2021，每10年预测一年
data = read_data('D:/毕业大论文/数据/input_maize_rename.csv')
featurelist = read_data('D:/毕业大论文/结果/SHAP值结果/maize/average_feature_importance.csv')

In [35]:
# 特征组合列表
crop_type = 'maize'
all_features = data.columns.tolist()
more_feature_list = []
RS_sublist = ['NDVI_','EVI_','LSWI_','GCVI_', 'RVI_','SAVI_','WDRVI_','Fpar_', 'LAI_','ET_','LE_','LST_Day_','LST_Night_','Red_','Nir_','Blue_','Green_',
'Nir1_','Swir1_','Swir2_',]
time_list = ['Mid Mar.','Late Mar.','Early Apr.','Mid Apr.','Late Apr.','Early May.','Mid May.','Late May.','Early Jun.','Mid Jun.','Late Jun.','Early Jul.',
                 'Mid Jul.','Late Jul.','Early Aug.','Mid Aug.','Late Aug.','Early Sep.','Mid Sep.','Late Sep.','Early Oct.','Mid Oct.','Late Oct.','Early Nov.',
                 'Mid Nov.','Late Nov.','Early Dec.']
WE_sublist = ['SPI14d_','SPI30d_','SPI90d_','EDDI14d_','EDDI30d_','EDDI90d_','SPEI14d_','SPEI30d_','SPEI90d_','PDSI_','Palmer Z_','Precipitation_','Temp_',
'Humidity_','Pressure_', 'Shortwave_','Longwave_']
phology_list_soybean = ['Planting','Emerging','Blooming','Podding','Dropping leaves','Harvest',]
phology_list_maize = ['Planting','Emerging','Silking','Dough','Dent','Maturity','Harvest',]

#（1）遥感卫星
RS = []
for i in RS_sublist:
    for time in time_list:
        RS.append(i+time)
more_feature_list.append(RS)

#（2）遥感卫星+气候变量
WE = []
for i in WE_sublist:
    for time in time_list:
        WE.append(i+time)
more_feature_list.append(RS+WE)

# 遥感卫星+气候变量+物候信息
PH = []
for i in [element for element in RS_sublist+WE_sublist if element not in ['Precipitation_','Humidity_','Pressure_',]]:
    if crop_type == 'soybean':
        for phology in phology_list_soybean:
            PH.append(i+phology)
    else:
        for phology in phology_list_maize:
            PH.append(i+phology)
more_feature_list.append(RS+WE+PH)

#（4）遥感卫星+气候变量+物候信息+位置编码
LO = ['IL','IN','IA','KS','MI','MN','MO','NE','ND','OH','SD','WI','Longitude', 'Latitude',]
more_feature_list.append(RS+WE+PH+LO)

#（5）遥感卫星+气候变量+物候信息+位置编码+土壤性质
SO = ['AREA_Irrigated', 'AREA', 'PIC', 'CaCO3', 'CEC', 'Drainage', 'EC', 'I_class', 'N_class', 'Max_OM', 'PAWS', 'pH', 'SAR', 'Texture', 'Sand', 'Silt', 'Clay',]
more_feature_list.append(RS+WE+PH+LO+SO)

In [36]:
metric_list = []
for i in range(5):
    for year in range(2014,2022):
            shap_values_train,shap_values_test,metric_list = yield_predictor(data, test_year=[year], N=10, experiment = 'GDT', croptype = crop_type, model = ['XGBoost'], verbose = 0,
                        result = True, seed = 99, feature_list = featurelist, n_features = 500, metric_list = metric_list, 
                        result_dir = 'D:/毕业大论文/结果/特征组合结果/多因子结果/',feature_name = more_feature_list[i],feature_type = 'more_feature',col = i)
        
name = ['feature','seed', 'croptype', 'test_year', 'model', 'Train_years', 'RMSE', 'R2']
print(metric_list)
metric_df = pd.DataFrame(columns = name, data=metric_list)
metric_df.to_csv('D:/毕业大论文/结果/特征组合结果/多因子结果/' + crop_type +'_预测指标.csv',header=True)

feature name: 0 seed: 99 croptype: maize   test_year: 2014   model: XGBoost   experiment: GDT n_feature： 500
540
546
The foluma of de-trend the yield: yield_new = yield - 0.117×year + 9.097 - (-226.754)
2023-03-15 [2014] XGBoost 【Train_RMSE: 0.06, Train_R2: 1.00, Test_RMSE: 0.8829, Test_R2: 0.73】
feature name: 0 seed: 99 croptype: maize   test_year: 2015   model: XGBoost   experiment: GDT n_feature： 500
540
546
The foluma of de-trend the yield: yield_new = yield - 0.124×year + 9.205 - (-240.281)
2023-03-15 [2015] XGBoost 【Train_RMSE: 0.05, Train_R2: 1.00, Test_RMSE: 0.9955, Test_R2: 0.66】
feature name: 0 seed: 99 croptype: maize   test_year: 2016   model: XGBoost   experiment: GDT n_feature： 500
540
546
The foluma of de-trend the yield: yield_new = yield - 0.133×year + 9.360 - (-257.905)
2023-03-15 [2016] XGBoost 【Train_RMSE: 0.05, Train_R2: 1.00, Test_RMSE: 0.9180, Test_R2: 0.69】
feature name: 0 seed: 99 croptype: maize   test_year: 2017   model: XGBoost   experiment: GDT n_feature： 5

### 单因子特征结果-大豆

In [3]:
# 2014-2021，每10年预测一年
data = read_data('D:/毕业大论文/数据/input_soybean_rename.csv')
featurelist = read_data('D:/毕业大论文/结果/SHAP值结果/soybean/average_feature_importance.csv')


In [4]:
# 特征组合列表
all_features = data.columns.tolist()
one_feature_list = []
one_feature_list = one_feature_list+['EVI_Podding','Latitude','PAWS','SPI30d_Early Sep.','PIC']
one_feature_list
#     sub_list =  sub_list+static_list
#     name_list = [element for element in all_features if any(substring in element for substring in sub_list)]

['EVI_Podding', 'Latitude', 'PAWS', 'SPI30d_Early Sep.', 'PIC']

In [None]:
# 单因子预测 大豆
crop_type = 'soybean'
metric_list = []
for i in range(5):
    for year in range(2014,2022):
            shap_values_train,shap_values_test,metric_list = yield_predictor(data, test_year=[year], N=10, experiment = 'GDT', croptype = crop_type, model = ['XGBoost'], verbose = 0,
                        result = True, seed = 99, feature_list = featurelist, n_features = 500, metric_list = metric_list, 
                        result_dir = 'D:/毕业大论文/结果/特征组合结果/单因子结果/',feature_name = one_feature_list[i])
        
name = ['feature','seed', 'croptype', 'test_year', 'model', 'Train_years', 'RMSE', 'R2']
print(metric_list)
metric_df = pd.DataFrame(columns = name, data=metric_list)
metric_df.to_csv('D:/毕业大论文/结果/特征组合结果/单因子结果/' + crop_type +'_预测指标.csv',header=True)

### 单因子特征结果-玉米

In [10]:
# 2014-2021，每10年预测一年
data = read_data('D:/毕业大论文/数据/input_maize_rename.csv')
featurelist = read_data('D:/毕业大论文/结果/SHAP值结果/maize/average_feature_importance.csv')

In [11]:
# 特征组合列表
all_features = data.columns.tolist()
one_feature_list = []
one_feature_list = one_feature_list+['LSWI_Dough','EVI_Dough','Latitude','pH','EDDI14d_Late Oct.','AREA_Irrigated']
one_feature_list

['LSWI_Dough',
 'EVI_Dough',
 'Latitude',
 'pH',
 'EDDI14d_Late Oct.',
 'AREA_Irrigated']

In [12]:
# 无特征工程预测 玉米
crop_type = 'maize'
metric_list = []
for i in range(6):
    for year in range(2014,2022):
            shap_values_train,shap_values_test,metric_list = yield_predictor(data, test_year=[year], N=10, experiment = 'GDT', croptype = crop_type, model = ['XGBoost'], verbose = 0,
                        result = True, seed = 99, feature_list = featurelist, n_features = 500, metric_list = metric_list, 
                        result_dir = 'D:/毕业大论文/结果/特征组合结果/单因子结果/',feature_name = one_feature_list[i])
        
name = ['feature','seed', 'croptype', 'test_year', 'model', 'Train_years', 'RMSE', 'R2']
print(metric_list)
metric_df = pd.DataFrame(columns = name, data=metric_list)
metric_df.to_csv('D:/毕业大论文/结果/特征组合结果/单因子结果/' + crop_type +'_预测指标.csv',header=True)

feature name: LSWI_Dough seed: 99 croptype: maize   test_year: 2014   model: XGBoost   experiment: GDT n_feature： 500
1
7
The foluma of de-trend the yield: yield_new = yield - 0.117×year + 9.097 - (-226.754)
2023-03-12 [2014] XGBoost 【Train_RMSE: 1.13, Train_R2: 0.69, Test_RMSE: 1.3936, Test_R2: 0.34】
feature name: LSWI_Dough seed: 99 croptype: maize   test_year: 2015   model: XGBoost   experiment: GDT n_feature： 500
1
7
The foluma of de-trend the yield: yield_new = yield - 0.124×year + 9.205 - (-240.281)
2023-03-12 [2015] XGBoost 【Train_RMSE: 1.09, Train_R2: 0.70, Test_RMSE: 1.2390, Test_R2: 0.47】
feature name: LSWI_Dough seed: 99 croptype: maize   test_year: 2016   model: XGBoost   experiment: GDT n_feature： 500
1
7
The foluma of de-trend the yield: yield_new = yield - 0.133×year + 9.360 - (-257.905)
2023-03-12 [2016] XGBoost 【Train_RMSE: 1.07, Train_R2: 0.71, Test_RMSE: 1.2245, Test_R2: 0.45】
feature name: LSWI_Dough seed: 99 croptype: maize   test_year: 2017   model: XGBoost   expe