In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf

from joblib import dump, load
from numpy.random import seed
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score

seed_value= 0
# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
os.environ['PYTHONHASHSEED']=str(seed_value)
seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def read_data(path):
    '''读取数据'''
    try:
        data = pd.read_csv(path, engine='python', encoding='utf8')
        return data
    except FileNotFoundError:
        print('File not found. Please check the path and filename.')
        return None
    except Exception as e:
        print('An error occurred:', e)
        return None

def assess_metrics(truth,pred):
    '''MAE、MSE、RMSE、决定系数'''
    print('----')
    print('MAE: %.4f'%mean_absolute_error(truth,pred))
    print('MSE: %.4f'%mean_squared_error(truth,pred))
    print('RMSE: %.4f'%mean_squared_error(truth,pred,squared=False))
    print('R2: %.4f'%r2_score(truth,pred))
    return mean_squared_error(truth,pred,squared=False),r2_score(truth,pred)

def model_result(est,X_train_val, X_test, y_train_val, y_test):
    '''Train and test a model, and print the results'''
    # training
    y_train_pred = est.predict(X_train_val)
    print('train metrics')
    assess_metrics(y_train_val, y_train_pred)
    # test
    print('test metrics')
    y_test_pred = est.predict(X_test)
    RMSE, R2 = assess_metrics(y_test, y_test_pred)
    return RMSE, R2
def model_lineresult(est,model,year,X_train_val, X_test, y_train_val, y_test):
    '''Train and test a linear regression model, and print the results in a line'''
    date = datetime.datetime.now().strftime('%Y-%m-%d')
    # train and test
    y_train_pred = est.predict(X_train_val)
    y_test_pred = est.predict(X_test)
    print('{} {} {} 【Train_RMSE: {:.2f}, Train_R2: {:.2f}, Test_RMSE: {:.4f}, Test_R2: {:.2f}】'
          .format(date,year,model,mean_squared_error(y_train_val, y_train_pred,squared=False),r2_score(y_train_val, y_train_pred),
          mean_squared_error(y_test, y_test_pred,squared=False),r2_score(y_test, y_test_pred)))
    return mean_squared_error(y_test, y_test_pred,squared=False),r2_score(y_test, y_test_pred)

### 查看已有结果的精度

In [None]:
crop_type = 'maize'
metric_list = []
seed = 99

i = 0
for model in ['LSTM','DNN']:
  for year in range(2014,2022):
    result = read_data('/content/drive/MyDrive/csv_data/'+crop_type+'/99_'+str(year)+'_'+model+'_expGDT_N10_预测结果.csv')
    RMSE, R2 = assess_metrics(result['yield(t/ha)'],result["pred"])
    metric_list.append([seed,crop_type,year,model,RMSE, R2])
    
name = ['seed', 'croptype', 'test_year', 'model', 'RMSE', 'R2']
metric_df = pd.DataFrame(columns = name, data=metric_list)
metric_df.to_csv('/content/drive/MyDrive/csv_data/' + crop_type +'_预测指标.csv',header=True)
metric_df

----
MAE: 4.8116
MSE: 24.3077
RMSE: 4.9303
R2: -7.2654
----
MAE: 1.0229
MSE: 1.6222
RMSE: 1.2737
R2: 0.4414
----
MAE: 0.7303
MSE: 0.8409
RMSE: 0.9170
R2: 0.6911
----
MAE: 0.7803
MSE: 0.9508
RMSE: 0.9751
R2: 0.7107
----
MAE: 0.7271
MSE: 0.9601
RMSE: 0.9798
R2: 0.7175
----
MAE: 0.7482
MSE: 0.8874
RMSE: 0.9420
R2: 0.6120
----
MAE: 0.8880
MSE: 1.2282
RMSE: 1.1082
R2: 0.5573
----
MAE: 1.0087
MSE: 1.6412
RMSE: 1.2811
R2: 0.7290
----
MAE: 0.8459
MSE: 1.1833
RMSE: 1.0878
R2: 0.5976
----
MAE: 0.6816
MSE: 0.7543
RMSE: 0.8685
R2: 0.7403
----
MAE: 0.7569
MSE: 0.9163
RMSE: 0.9572
R2: 0.6634
----
MAE: 0.7373
MSE: 0.9230
RMSE: 0.9607
R2: 0.7192
----
MAE: 0.8408
MSE: 1.2116
RMSE: 1.1007
R2: 0.6435
----
MAE: 0.7758
MSE: 1.0392
RMSE: 1.0194
R2: 0.5456
----
MAE: 0.9215
MSE: 1.4246
RMSE: 1.1936
R2: 0.4865
----
MAE: 0.7614
MSE: 1.0795
RMSE: 1.0390
R2: 0.8218


Unnamed: 0,seed,croptype,test_year,model,RMSE,R2
0,99,maize,2014,LSTM,4.93028,-7.265363
1,99,maize,2015,LSTM,1.273672,0.441413
2,99,maize,2016,LSTM,0.916993,0.691136
3,99,maize,2017,LSTM,0.975103,0.710709
4,99,maize,2018,LSTM,0.979822,0.717499
5,99,maize,2019,LSTM,0.942022,0.611968
6,99,maize,2020,LSTM,1.108237,0.557328
7,99,maize,2021,LSTM,1.281104,0.729045
8,99,maize,2014,DNN,1.087809,0.597631
9,99,maize,2015,DNN,0.868483,0.740284


### 大豆预测


In [None]:
# 大豆预测
# read data
# 2014-2021，每10年预测一年
data = read_data('/content/drive/MyDrive/csv_data/input_soybean_rename.csv')

# 删掉空缺值
data = data.dropna()

# 打印数据维度
print(data.shape)

# 打印特征列表区分时间特征、物候特征、静态特征
print(data.columns.tolist())

(12119, 1240)
['Year', 'State', 'Value', 'sta_con', 'yield(t/ha)', 'NDVI_Mid Mar.', 'EVI_Mid Mar.', 'LSWI_Mid Mar.', 'GCVI_Mid Mar.', 'RVI_Mid Mar.', 'SAVI_Mid Mar.', 'WDRVI_Mid Mar.', 'Fpar_Mid Mar.', 'LAI_Mid Mar.', 'ET_Mid Mar.', 'LE_Mid Mar.', 'LST_Day_Mid Mar.', 'LST_Night_Mid Mar.', 'SPI14d_Mid Mar.', 'SPI30d_Mid Mar.', 'SPI90d_Mid Mar.', 'EDDI14d_Mid Mar.', 'EDDI30d_Mid Mar.', 'EDDI90d_Mid Mar.', 'SPEI14d_Mid Mar.', 'SPEI30d_Mid Mar.', 'SPEI90d_Mid Mar.', 'PDSI_Mid Mar.', 'Palmer Z_Mid Mar.', 'Red_Mid Mar.', 'Nir_Mid Mar.', 'Blue_Mid Mar.', 'Green_Mid Mar.', 'Nir1_Mid Mar.', 'Swir1_Mid Mar.', 'Swir2_Mid Mar.', 'Precipitation_Mid Mar.', 'Temp_Mid Mar.', 'Humidity_Mid Mar.', 'Pressure_Mid Mar.', 'Shortwave_Mid Mar.', 'Longwave_Mid Mar.', 'NDVI_Late Mar.', 'EVI_Late Mar.', 'LSWI_Late Mar.', 'GCVI_Late Mar.', 'RVI_Late Mar.', 'SAVI_Late Mar.', 'WDRVI_Late Mar.', 'Fpar_Late Mar.', 'LAI_Late Mar.', 'ET_Late Mar.', 'LE_Late Mar.', 'LST_Day_Late Mar.', 'LST_Night_Late Mar.', 'SPI14d_Lat

In [None]:
6*27

162

In [None]:
num_samples = data.shape[0]
num_samples

12119

                        +-----------------------+
                        |       time_inputs      |
                        +-----------+-----------+
                                    |
                                    |
                            +-------v-------+
                            |   Time Layer  |
                            +---------------+
                                    |
                                    |
                            +-------v-------+
                            |  Batch Normal |
                            +---------------+
                                    |
                                    |
                        +-------v-------+  +-----------------------+
                        |   phology_inputs |  |     static_inputs      |
                        +-----------+----+  +-----------+-----------+
                                    |                  |
                                    |                  |
                            +-------v-------+  +-------v-------+
                            |  Phology Layer |  | Concatenation |
                            +---------------+  +---------------+
                                    |                  |
                                    |                  |
                            +-------v-------+  +-------v-------+
                            |  Batch Normal |  |    Dense 1    |
                            +---------------+  +---------------+
                                    |                  |
                                    |                  |
                            +-------v-------+  +-------v-------+
                            |    Dense 2    |  |  Batch Normal |
                            +---------------+  +---------------+
                                    |                  |
                                    |                  |
                            +-------v-------+  +-------v-------+
                            |  Output Layer |  |    Dense 3    |
                            +---------------+  +---------------+
                                                      |
                                                      |
                                              +-------v-------+
                                              |  Output Layer |
                                              +---------------+



In [None]:
                        +-----------------------+
                        |       time_inputs      |
                        +-----------+-----------+
                                    |
                                    |
                            +-------v-------+
                            |   Time Layer  |
                            +---------------+
                                    |
                                    |
                            +-------v-------+
                            |  Batch Normal |
                            +---------------+
                                    |
                                    |
                        +-------v-------+  +-----------------------+
                        |   phology_inputs |  |     static_inputs      |
                        +-----------+----+  +-----------+-----------+
                                    |                  |
                                    |                  |
                            +-------v-------+  +-------v-------+
                            |  Phology Layer |  | Concatenation |
                            +---------------+  +---------------+
                                    |                  |
                                    |                  |
                            +-------v-------+  +-------v-------+
                            |  Batch Normal |  |    Dense 1    |
                            +---------------+  +---------------+
                                    |                  |
                                    |                  |
                            +-------v-------+  +-------v-------+
                            |    Dense 2    |  |  Batch Normal |
                            +---------------+  +---------------+
                                    |                  |
                                    |                  |
                            +-------v-------+  +-------v-------+
                            |  Output Layer |  |    Dense 3    |
                            +---------------+  +---------------+
                                                      |
                                                      |
                                              +-------v-------+
                                              |  Output Layer |
                                              +---------------+



TypeError: ignored

In [None]:
class LSTMModel(tf.keras.Model):
    def __init__(self, time_shape, phology_shape, static_shape, time_lstm_units=32, phology_lstm_units=32, dense_units=128, learning_rate=0.001):
        super(LSTMModel, self).__init__()

        self.time_lstm_units = time_lstm_units
        self.phology_lstm_units = phology_lstm_units
        self.dense_units = dense_units

        self.time_layer = tf.keras.layers.LSTM(units=time_lstm_units, return_sequences=False, input_shape=time_shape)
        self.phology_layer = tf.keras.layers.LSTM(units=phology_lstm_units, return_sequences=False, input_shape=phology_shape)
        self.batch_norm1 = tf.keras.layers.BatchNormalization()
        self.batch_norm2 = tf.keras.layers.BatchNormalization()
        self.concat_layer = tf.keras.layers.Concatenate(axis=-1)
        self.dense_layer1 = tf.keras.layers.Dense(units=dense_units, activation='relu')
        self.batch_norm3 = tf.keras.layers.BatchNormalization()
        self.dense_layer2 = tf.keras.layers.Dense(units=dense_units, activation='relu')
        self.output_layer = tf.keras.layers.Dense(units=1, activation='linear')

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        self.loss = tf.keras.losses.MeanSquaredError()

    def call(self, inputs):
        time_inputs, phology_inputs, static_inputs = inputs
        x1 = self.time_layer(time_inputs)
        x2 = self.phology_layer(phology_inputs)
        x1 = self.batch_norm1(x1)
        x2 = self.batch_norm2(x2)
        x = self.concat_layer([x1, x2, static_inputs])
        x = self.dense_layer1(x)
        x = self.batch_norm3(x)
        x = self.dense_layer2(x)
        output = self.output_layer(x)
        return output



class DNNModel(tf.keras.Model):
    def __init__(self, func_act='relu'):
        super(DNNModel, self).__init__()
        
        self.dense0 = tf.keras.layers.Dense(units=128, activation=func_act)
        self.dense1 = tf.keras.layers.Dense(units=128, activation=func_act)
        self.dense2 = tf.keras.layers.Dense(units=64, activation=func_act)
        self.dense3 = tf.keras.layers.Dense(units=32, activation=func_act)
        self.dense4 = tf.keras.layers.Dense(units=16, activation=func_act)
        self.drop0 = tf.keras.layers.Dropout(0.2)
        self.outp = tf.keras.layers.Dense(units=1, activation='linear')

    def call(self, inputs):
        x = self.dense0(inputs)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        x = self.drop0(x)
        outp = self.outp(x)
        return outp

In [None]:
def yield_predictor(data, test_year=[2020], N=2, experiment = 'GDT', croptype = 'soybean', model = ['XGBoost'], verbose = 0, 
                    tune_model = False, model_parameter=None, result = False, result_dir = '', feature_importance = False,
                    fea_eng = False,seed = 99,save_model=False, save_model_dir='', trend_n = 30,
                    importance_dir = '', feature_list = None, n_features = 200, metric_list = []):
    '''Yield predictor
    Parameters:
    
    '''
    if model == ['DNN']:
      feature_name = feature_list.iloc[0:n_features]['feature'].tolist()
      feature_name = ['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield']+feature_name
      #print(feature_name)
      data = data[feature_name]
    # split train-validation and test data
    # testset
    data_test = data[data['Year'].isin(test_year)]
    # tra-val set
    data_train_val = data[data['Year'].isin([i for i in range(test_year[0]-N,test_year[0])])]
    # split input and output
    y_train = data_train_val['yield(t/ha)']
    y_test = data_test['yield(t/ha)']
    
    if experiment == 'GDT': # GDT
        # de-trend the yield using linear model
        yield_mean = data_train_val['yield(t/ha)'].mean()
        linear_m = linear_model.LinearRegression()
        
        data_train_val1 = data[data['Year'].isin([i for i in range(test_year[0]-trend_n,test_year[0])])]
        X99 = np.array(data_train_val1['Year']).reshape(-1,1)
        Y99 = np.array(data_train_val1['yield(t/ha)']).reshape(-1,1)

        linear_m.fit(X99, Y99)
        if linear_m.coef_[0,0]<0:
            yield_mean = data_train_val['yield(t/ha)'].max()
        print('The foluma of de-trend the yield: yield_new = yield - {:.3f}×year + {:.3f} - ({:.3f})'
              .format(linear_m.coef_[0,0],yield_mean,linear_m.intercept_[0]))

        # de-trend data_train_val yield 
        data1 = data_train_val.copy()
        data1.loc[:,'yield(t/ha)_fix'] = data_train_val['yield(t/ha)'] - linear_m.coef_[0] * data_train_val['Year'] + yield_mean - linear_m.intercept_
        data1 = data1.drop(['yield(t/ha)'], axis = 1).rename({'yield(t/ha)_fix':'yield(t/ha)'}, axis=1)
        data_train_val = data1
        # de-trend data_test yield 
        data1 = data_test.copy()
        data1.loc[:,'yield(t/ha)_fix'] = data_test['yield(t/ha)'] - linear_m.coef_[0] * data_test['Year'] + yield_mean - linear_m.intercept_

        data1 = data1.drop(['yield(t/ha)'], axis = 1).rename({'yield(t/ha)_fix':'yield(t/ha)'}, axis=1)
        data_test = data1
        y_train_val = data_train_val['yield(t/ha)']
        y_test = data_test['yield(t/ha)']
        
        X_train_val = data_train_val.drop(['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield'], axis=1)# 
        X_test = data_test.drop(['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield'], axis=1) # 'Value'
    else: None
 
    # Scale numeric features
    columns_to_scale = X_train_val.columns.tolist()
    std_scaler = preprocessing.StandardScaler().fit(X_train_val[columns_to_scale])
    X_train_val.loc[:,columns_to_scale] = std_scaler.transform(X_train_val[columns_to_scale])
    X_test.loc[:,columns_to_scale] = std_scaler.transform(X_test[columns_to_scale])

    if model == ['DNN']:

      X_train = np.array(X_train_val)
      X_test = np.array(X_test)
      y_train = np.array(y_train)
      y_test = np.array(y_test)
      X_train, X_val, y_train, y_val= train_test_split(X_train_val, y_train_val, test_size=0.15, random_state= 99)
      est = DNNModel()
      est.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.01), loss=tf.keras.losses.MeanSquaredError()) 
      est.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val),verbose=0)

      # Evaluate the model on the test set
      y_test_pred = est.predict(X_test)

    
    if model == ['LSTM']:
      # 区分时间、物候、静态特征
      str_list = X_train_val.columns.tolist()

      # 时间变量
      sub_str_list = [
      'Mid Mar.','Late Mar.','Early Apr.','Mid Apr.','Late Apr.','Early May.','Mid May.','Late May.','Early Jun.','Mid Jun.','Late Jun.',
      'Early Jul.','Mid Jul.','Late Jul.','Early Aug.','Mid Aug.','Late Aug.','Early Sep.','Mid Sep.','Late Sep.','Early Oct.','Mid Oct.',
      'Late Oct.','Early Nov.','Mid Nov.','Late Nov.','Early Dec.']
      time_order = [s for s in str_list if any(sub in s for sub in sub_str_list)]

      # 物候变量
      sub_str_list = ['_Planting', '_Emerging', '_Blooming', '_Podding', '_Dropping leaves', '_Harvest']
      result_list = [s for s in str_list if any(sub in s for sub in sub_str_list)]
      # 按照顺序排序
      order = ['_Planting', '_Emerging', '_Blooming', '_Podding', '_Dropping leaves', '_Harvest']
      phology_order = []
      for o in order:
          for d in result_list:
              if o in d:
                  phology_order.append(d)

      static_order = [
      'AREA_Irrigated', 'AREA', 'PIC','CaCO3','CEC','Drainage','EC','I_class','N_class','Max_OM','PAWS','pH','SAR','Texture','Sand','Silt','Clay',
      'Longitude', 'Latitude','IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI'
      ]

      # print('时间特征维度：',len(time_order),time_order)
      # print('物候特征维度：',len(phology_order),phology_order)
      # print('静态特征维度：',len(static_order),static_order)
      # # 多余的应该删去的 'sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield'
      print('时间输入维度（样本量，时间步长，变量）：',('...',27,len(time_order)/27))
      print('物候输入维度：',('...',6,len(phology_order)/6))
      print('静态输入维度：',('...',len(static_order)))

      X_train_feature_time = X_train_val[time_order].reindex(columns=time_order)
      X_train_feature_phology = X_train_val[phology_order].reindex(columns=phology_order)
      X_train_feature_static = X_train_val[static_order].reindex(columns=static_order)
      print(X_train_feature_time.shape)
      print(X_train_feature_phology.shape)

      X_test_feature_time = X_test[time_order].reindex(columns=time_order)
      X_test_feature_phology = X_test[phology_order].reindex(columns=phology_order)
      X_test_feature_static = X_test[static_order].reindex(columns=static_order)

      # Reshape the data
      X_train_time = np.array(X_train_feature_time).reshape(X_train_feature_time.shape[0],27,int(len(time_order)/27))
      X_train_phology = np.array(X_train_feature_phology).reshape(X_train_feature_phology.shape[0],6,int(len(phology_order)/6))
      X_train_static = np.array(X_train_feature_static).reshape(X_train_feature_static.shape[0],int(len(static_order)/1))

      y_train = np.array(y_train).reshape(X_train_feature_time.shape[0],1)

      # train and val
      X_train_time, X_val_time, X_train_phology, X_val_phology, X_train_static, X_val_static, y_train, y_val = train_test_split(X_train_time,X_train_phology,
      X_train_static, y_train, test_size=0.15, random_state= 99)
      print(X_train_time.shape, X_val_time.shape, X_train_phology.shape, X_val_phology.shape, X_train_static.shape, X_val_static.shape, y_train.shape, y_val.shape)

      X_test_time = np.array(X_test_feature_time).reshape(X_test_feature_time.shape[0],27,int(len(time_order)/27))
      X_test_phology = np.array(X_test_feature_phology).reshape(X_test_feature_phology.shape[0],6,int(len(phology_order)/6))
      X_test_static = np.array(X_test_feature_static).reshape(X_test_feature_static.shape[0],int(len(static_order)/1))

      y_test = np.array(y_test).reshape(X_test_feature_time.shape[0],1)

      # Create the model
      time_shape = X_train_time.shape[1:]
      pholopy_shape = X_train_phology.shape[1:]
      static_shape = X_train_static.shape[1:]
      est = LSTMModel(time_shape, pholopy_shape, static_shape, time_lstm_units=64, phology_lstm_units=64, dense_units=64,)

      # Compile the model
      est.compile(optimizer=est.optimizer, loss=est.loss)

      # Train the model
      est.fit([X_train_time, X_train_phology, X_train_static], y_train, validation_data=([X_val_time, X_val_phology, X_val_static], y_val), epochs=100, verbose = 0)

      # Evaluate the model on the test set
      y_test_pred = est.predict([X_test_time, X_test_phology, X_test_static])
    # print(y_test_pred)
    RMSE, R2 = assess_metrics(y_test, y_test_pred)
    print('RMSE:',RMSE,'R2:',R2)

    if save_model:
      dump(est, save_model_dir + croptype+'/'+ str(test_year[0]) + '_' + model[0] + '_' + 'exp'+str(experiment)+'_N'+str(N)) 
    if result:
        if croptype == 'maize':
            result = pd.concat([data.loc[data_test.index,['sta_con', 'yield(t/ha)']], pd.DataFrame(y_test_pred,index = data_test.index,columns=['pred'])],axis=1)
        else:
            result = pd.concat([data.loc[data_test.index,['sta_con', 'yield(t/ha)']], pd.DataFrame(y_test_pred,index = data_test.index,columns=['pred'])],axis=1)
        if experiment == 'GDT':
            result.loc[:,'pred'] = result.loc[:,'pred']+linear_m.coef_[0] * data_test['Year'] - yield_mean + linear_m.intercept_
        # result.loc[:,['yield','pred']] = result.loc[:,['yield','pred']]*62.719012*0.001
        result.loc[:,'residual'] =  result.loc[:,'pred'] - result.loc[:,'yield(t/ha)']
        result.to_csv(result_dir +croptype+'/'+str(seed) + '_' + str(test_year[0]) + '_' + model[0] + '_' + 'exp'+str(experiment)+'_N'+str(N)+'_预测结果.csv',header=True)
        # print('Results have been saved！')



    return metric_list

In [None]:
featurelist = read_data('/content/drive/MyDrive/csv_data/average_feature_importance.csv')

crop_type = 'soybean'
metric_list = []
for year in range(2014,2022):# range(2014,2021):# 2014 2015,2019, 2020
  for n in [50]:
    yield_predictor(data, test_year=[year], N=10, experiment = 'GDT', croptype = crop_type, model = ['DNN'], verbose = 0,
                    tune_model = False, model_parameter=None, result = True, result_dir = '/content/drive/MyDrive/csv_data/', feature_importance = False,
                    seed = 99, save_model=True, save_model_dir='/content/drive/MyDrive/model/', trend_n = 30,
                    feature_list = featurelist, n_features = n, metric_list = [])

In [None]:
crop_type = 'soybean'
metric_list = []
for year in range(2014,2022):# range(2014,2021):# 2014 2015,2019, 2020
  for n in [50]:
    yield_predictor(data, test_year=[year], N=10, experiment = 'GDT', croptype = crop_type, model = ['LSTM'], verbose = 0,
                    tune_model = False, model_parameter=None, result = True, result_dir = '/content/drive/MyDrive/csv_data/', feature_importance = False,
                    seed = 99, save_model=True, save_model_dir='/content/drive/MyDrive/model/', trend_n = 30,
                    feature_list = featurelist, n_features = n, metric_list = [])

In [None]:
x_train,x_test, y_train,y_test = train_test_split(data.iloc[0:,3:],data['yield'],test_size=0.3, random_state=0)
# x_train：1991 rows × 717 columns
# y_train：1991 rows × 1 columns
# x_test：854 rows × 717 columns
# y_test：854 rows × 1 columns

In [None]:
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
x_train.shape#(1991, 717)

(1991, 717)

In [None]:
# variables for window dataset
window_size = 18
batch_size = 64
shuffle_buffer_size = 1000

In [None]:
# x_train = tf.expand_dims(x_train, axis=-1)
# y_train = tf.expand_dims(y_train, axis=-1)

In [None]:
def window_dataset(series, window_size, batch_size, shuffle_buffer_size):
    '''
    windowing the time series data to be a windowed dataset
    
        Parameters:
            series: time series data 
            window_size: desired number of data points as inputs of the network
            batch_size: batching the input of the network
            shuffle_buffer_size: buffer size for shuffle method
            
        Returns:
            windowed dataset
    '''
    series = tf.expand_dims(series, axis=-1)
    ds = Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda x: x.batch(window_size + 1))
    ds = ds.map(lambda x: (x[:-1], x[-1:]))
    ds = ds.shuffle(shuffle_buffer_size)
    return ds.batch(batch_size).prefetch(1)
    
def forecast(model, series, window_size):
    '''
    forecasting the time series data
    
        Parameters:
            model: trained network model
            series: time series data to be forecasted
            window_size: desired number of data points as inputs of the network
            
        Returns:
            forecast: forecasted time series data
    '''
    series = tf.expand_dims(series, axis=-1)
    ds = Dataset.from_tensor_slices(series)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda x: x.batch(window_size))
    ds = ds.batch(batch_size).prefetch(1)
    forecast = model.predict(ds)
    return forecast

In [None]:
tf.keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Lambda, Bidirectional, LSTM, Dense, Conv1D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import Huber
from tensorflow.keras.callbacks import LearningRateScheduler

In [None]:
def create_model():
    model = Sequential([
        #(filters=32, kernel_size=3, strides=1, padding='causal', activation='relu', input_shape=[None, 717]),
        LSTM(32, return_sequences=False, input_shape=[1991, 717]),
       # LSTM(32, return_sequences=False),
        Dense(30, activation='relu'),
        Dense(10, activation='relu'),
        Dense(1),
        Lambda(lambda x: x * 400.0)
    ])
    return model

In [None]:
model = create_model()
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 32)                96000     
_________________________________________________________________
dense_6 (Dense)              (None, 30)                990       
_________________________________________________________________
dense_7 (Dense)              (None, 10)                310       
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 11        
_________________________________________________________________
lambda_2 (Lambda)            (None, 1)                 0         
Total params: 97,311
Trainable params: 97,311
Non-trainable params: 0
_________________________________________________________________


In [None]:
# 寻找最优学习率:1e-5
model = create_model()
model.compile(loss=Huber(), optimizer=SGD(learning_rate=1e-8, momentum=0.9), metrics=["mae"])
lr_scheduler = LearningRateScheduler(lambda epoch: 1e-8 * 10**(epoch / 20))
# fit
hist = model.fit(x_train,y_train, epochs=100, callbacks=[lr_scheduler])

In [None]:
import matplotlib.pyplot as plt

In [None]:
# check best lr value
plt.semilogx(hist.history["lr"], hist.history["loss"])
plt.axis([1e-8, 1e-4, 0, 60])
plt.xlabel("Learning Rate")
plt.ylabel("Loss")
plt.show()

In [None]:
x_train.reshape(1,1991,717)
x_train=tf.convert_to_tensor(x_train)

In [None]:
tf.keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

n_epochs = 5

model = create_model()
optimizer = SGD(learning_rate=1e-5, momentum=0.9)
model.compile(loss=Huber(), optimizer=optimizer, metrics=["mae"])
history = model.fit(x_train,y_train, epochs=n_epochs)

Epoch 1/5


ValueError: ignored

In [None]:
x_train.shape

(1991, 717)

In [None]:
x_test, y_test = np.array(x_test), np.array(y_test)
x_test = tf.expand_dims(x_test, axis=-1)
y_test = tf.expand_dims(y_test, axis=-1)

In [None]:
x_test.shape

TensorShape([854, 717, 1])

In [None]:
from tensorflow.keras.metrics import mean_absolute_error
x_test, y_test = np.array(x_test), np.array(y_test)
x_test = tf.expand_dims(x_test, axis=-1)
y_test = tf.expand_dims(y_test, axis=-1)
# forecasting on test set
lstm_forecast = model(x_test)
# lstm_forecast_test = lstm_forecast[np.numpy(x_train) - window_size:-1, -1, 0]
mean_absolute_error(y_test, lstm_forecast)

<tf.Tensor: shape=(854, 717), dtype=float32, numpy=
array([[ 4.2188225 ,  4.763893  ,  3.731903  , ...,  6.3800964 ,
         6.8982544 ,  4.9664764 ],
       [ 0.09924698,  0.99913025,  0.05299377, ...,  2.480339  ,
         2.7898598 ,  2.0228958 ],
       [ 4.50037   ,  4.9837112 ,  4.9867897 , ...,  1.8967934 ,
         1.8967934 ,  2.8836021 ],
       ...,
       [15.100924  , 14.942965  , 13.7045765 , ..., 17.467756  ,
        18.336233  , 17.552923  ],
       [ 6.797676  ,  6.257553  ,  7.3501244 , ...,  4.9861565 ,
         3.6148758 ,  4.7575455 ],
       [12.697609  , 12.199997  , 13.500484  , ..., 10.096794  ,
        10.069801  , 10.096794  ]], dtype=float32)>

In [None]:
np.save('/content/drive/MyDrive/csv_data/y_test',y_test.numpy())

In [None]:
np.save('/content/drive/MyDrive/csv_data/lstm_forecast',lstm_forecast.numpy())

In [None]:
# save baseline network
model.save("/content/drive/MyDrive/model/forecast_lstm.h5", save_format="h5")

In [None]:
data5 = pd.read_csv('data27.csv',"," )
data6 = pd.read_csv('data33.csv',"," )
data7 = pd.read_csv('data40.csv',",") # here I connect three csv file which is having same column 
data5 = pd.DataFrame(data5, columns= ['date','x1','x2','x3','x4'])
data6 = data5.copy()
data7 = data5.copy()
data8 = data5.append([data6, data7])

data8.set_index('date', inplace=True)

data8 = data8.values

sc = MinMaxScaler(feature_range=(0, 1))
train_data = sc.fit_transform(data8)

x_train = []
y_train = []
for i in range(60,len(train_data)):
   x_train.append(train_data[i-60:i,0])
   y_train.append(train_data[i,0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))


model = Sequential()
model.add(LSTM(units=10, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=10))
model.add(Dense(units=1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=10, batch_size=32)

### 玉米预测

In [None]:
# 大豆预测
# read data
# 2014-2021，每10年预测一年
data = read_data('/content/drive/MyDrive/csv_data/input_maize_rename.csv')

# 删掉空缺值
data = data.dropna()

# 打印数据维度
print(data.shape)

# 打印特征列表区分时间特征、物候特征、静态特征
print(data.columns.tolist())

(12445, 1274)
['Year', 'State', 'Value', 'sta_con', 'yield(t/ha)', 'NDVI_Mid Mar.', 'EVI_Mid Mar.', 'LSWI_Mid Mar.', 'GCVI_Mid Mar.', 'RVI_Mid Mar.', 'SAVI_Mid Mar.', 'WDRVI_Mid Mar.', 'Fpar_Mid Mar.', 'LAI_Mid Mar.', 'ET_Mid Mar.', 'LE_Mid Mar.', 'LST_Day_Mid Mar.', 'LST_Night_Mid Mar.', 'SPI14d_Mid Mar.', 'SPI30d_Mid Mar.', 'SPI90d_Mid Mar.', 'EDDI14d_Mid Mar.', 'EDDI30d_Mid Mar.', 'EDDI90d_Mid Mar.', 'SPEI14d_Mid Mar.', 'SPEI30d_Mid Mar.', 'SPEI90d_Mid Mar.', 'PDSI_Mid Mar.', 'Palmer Z_Mid Mar.', 'Red_Mid Mar.', 'Nir_Mid Mar.', 'Blue_Mid Mar.', 'Green_Mid Mar.', 'Nir1_Mid Mar.', 'Swir1_Mid Mar.', 'Swir2_Mid Mar.', 'Precipitation_Mid Mar.', 'Temp_Mid Mar.', 'Humidity_Mid Mar.', 'Pressure_Mid Mar.', 'Shortwave_Mid Mar.', 'Longwave_Mid Mar.', 'NDVI_Late Mar.', 'EVI_Late Mar.', 'LSWI_Late Mar.', 'GCVI_Late Mar.', 'RVI_Late Mar.', 'SAVI_Late Mar.', 'WDRVI_Late Mar.', 'Fpar_Late Mar.', 'LAI_Late Mar.', 'ET_Late Mar.', 'LE_Late Mar.', 'LST_Day_Late Mar.', 'LST_Night_Late Mar.', 'SPI14d_Lat

In [None]:
num_samples = data.shape[0]
num_samples

12445

In [None]:
class LSTMModel(tf.keras.Model):
    def __init__(self, time_shape, phology_shape, static_shape, time_lstm_units=32, phology_lstm_units=32, dense_units=128, learning_rate=0.001):
        super(LSTMModel, self).__init__()

        self.time_lstm_units = time_lstm_units
        self.phology_lstm_units = phology_lstm_units
        self.dense_units = dense_units

        self.time_layer = tf.keras.layers.LSTM(units=time_lstm_units, return_sequences=False, input_shape=time_shape)
        self.phology_layer = tf.keras.layers.LSTM(units=phology_lstm_units, return_sequences=False, input_shape=phology_shape)
        self.batch_norm1 = tf.keras.layers.BatchNormalization()
        self.batch_norm2 = tf.keras.layers.BatchNormalization()
        self.concat_layer = tf.keras.layers.Concatenate(axis=-1)
        self.dense_layer1 = tf.keras.layers.Dense(units=dense_units, activation='relu')
        self.batch_norm3 = tf.keras.layers.BatchNormalization()
        self.dense_layer2 = tf.keras.layers.Dense(units=dense_units, activation='relu')
        self.output_layer = tf.keras.layers.Dense(units=1, activation='linear')

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        self.loss = tf.keras.losses.MeanSquaredError()

    def call(self, inputs):
        time_inputs, phology_inputs, static_inputs = inputs
        x1 = self.time_layer(time_inputs)
        x2 = self.phology_layer(phology_inputs)
        x1 = self.batch_norm1(x1)
        x2 = self.batch_norm2(x2)
        x = self.concat_layer([x1, x2, static_inputs])
        x = self.dense_layer1(x)
        x = self.batch_norm3(x)
        x = self.dense_layer2(x)
        output = self.output_layer(x)
        return output



class DNNModel(tf.keras.Model):
    def __init__(self, func_act='relu'):
        super(DNNModel, self).__init__()
        
        self.dense0 = tf.keras.layers.Dense(units=128, activation=func_act)
        self.dense1 = tf.keras.layers.Dense(units=128, activation=func_act)
        self.dense2 = tf.keras.layers.Dense(units=64, activation=func_act)
        self.dense3 = tf.keras.layers.Dense(units=32, activation=func_act)
        self.dense4 = tf.keras.layers.Dense(units=16, activation=func_act)
        self.drop0 = tf.keras.layers.Dropout(0.2)
        self.outp = tf.keras.layers.Dense(units=1, activation='linear')

    def call(self, inputs):
        x = self.dense0(inputs)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        x = self.drop0(x)
        outp = self.outp(x)
        return outp

In [None]:
def yield_predictor(data, test_year=[2020], N=2, experiment = 'GDT', croptype = 'soybean', model = ['XGBoost'], verbose = 0, 
                    tune_model = False, model_parameter=None, result = False, result_dir = '', feature_importance = False,
                    fea_eng = False,seed = 99,save_model=False, save_model_dir='', trend_n = 30,
                    importance_dir = '', feature_list = None, n_features = 200, metric_list = []):
    '''Yield predictor
    Parameters:
    
    '''
    if model == ['DNN']:
      feature_name = feature_list.iloc[0:n_features]['feature'].tolist()
      feature_name = ['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield']+feature_name
      #print(feature_name)
      data = data[feature_name]
    # split train-validation and test data
    # testset
    data_test = data[data['Year'].isin(test_year)]
    # tra-val set
    data_train_val = data[data['Year'].isin([i for i in range(test_year[0]-N,test_year[0])])]
    # split input and output
    y_train = data_train_val['yield(t/ha)']
    y_test = data_test['yield(t/ha)']
    
    if experiment == 'GDT': # GDT
        # de-trend the yield using linear model
        yield_mean = data_train_val['yield(t/ha)'].mean()
        linear_m = linear_model.LinearRegression()
        
        data_train_val1 = data[data['Year'].isin([i for i in range(test_year[0]-trend_n,test_year[0])])]
        X99 = np.array(data_train_val1['Year']).reshape(-1,1)
        Y99 = np.array(data_train_val1['yield(t/ha)']).reshape(-1,1)

        linear_m.fit(X99, Y99)
        if linear_m.coef_[0,0]<0:
            yield_mean = data_train_val['yield(t/ha)'].max()
        print('The foluma of de-trend the yield: yield_new = yield - {:.3f}×year + {:.3f} - ({:.3f})'
              .format(linear_m.coef_[0,0],yield_mean,linear_m.intercept_[0]))

        # de-trend data_train_val yield 
        data1 = data_train_val.copy()
        data1.loc[:,'yield(t/ha)_fix'] = data_train_val['yield(t/ha)'] - linear_m.coef_[0] * data_train_val['Year'] + yield_mean - linear_m.intercept_
        data1 = data1.drop(['yield(t/ha)'], axis = 1).rename({'yield(t/ha)_fix':'yield(t/ha)'}, axis=1)
        data_train_val = data1
        # de-trend data_test yield 
        data1 = data_test.copy()
        data1.loc[:,'yield(t/ha)_fix'] = data_test['yield(t/ha)'] - linear_m.coef_[0] * data_test['Year'] + yield_mean - linear_m.intercept_

        data1 = data1.drop(['yield(t/ha)'], axis = 1).rename({'yield(t/ha)_fix':'yield(t/ha)'}, axis=1)
        data_test = data1
        y_train_val = data_train_val['yield(t/ha)']
        y_test = data_test['yield(t/ha)']
        
        X_train_val = data_train_val.drop(['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield'], axis=1)# 
        X_test = data_test.drop(['sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield'], axis=1) # 'Value'
    else: None
 
    # Scale numeric features
    columns_to_scale = X_train_val.columns.tolist()
    std_scaler = preprocessing.StandardScaler().fit(X_train_val[columns_to_scale])
    X_train_val.loc[:,columns_to_scale] = std_scaler.transform(X_train_val[columns_to_scale])
    X_test.loc[:,columns_to_scale] = std_scaler.transform(X_test[columns_to_scale])

    if model == ['DNN']:

      X_train = np.array(X_train_val)
      X_test = np.array(X_test)
      y_train = np.array(y_train)
      y_test = np.array(y_test)
      X_train, X_val, y_train, y_val= train_test_split(X_train_val, y_train_val, test_size=0.15, random_state= 99)
      est = DNNModel()
      est.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.01), loss=tf.keras.losses.MeanSquaredError()) 
      est.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val),verbose=0)

      # Evaluate the model on the test set
      y_test_pred = est.predict(X_test)

    
    if model == ['LSTM']:
      # 区分时间、物候、静态特征
      str_list = X_train_val.columns.tolist()

      # 时间变量
      sub_str_list = [
      'Mid Mar.','Late Mar.','Early Apr.','Mid Apr.','Late Apr.','Early May.','Mid May.','Late May.','Early Jun.','Mid Jun.','Late Jun.',
      'Early Jul.','Mid Jul.','Late Jul.','Early Aug.','Mid Aug.','Late Aug.','Early Sep.','Mid Sep.','Late Sep.','Early Oct.','Mid Oct.',
      'Late Oct.','Early Nov.','Mid Nov.','Late Nov.','Early Dec.']
      time_order = [s for s in str_list if any(sub in s for sub in sub_str_list)]

      # 物候变量
      sub_str_list = ['_Planting','_Emerging','_Silking','_Dough','_Dent','_Maturity','_Harvest']# '_Planting', '_Emerging', '_Blooming', '_Podding', '_Dropping leaves', '_Harvest'
      result_list = [s for s in str_list if any(sub in s for sub in sub_str_list)]
      # 按照顺序排序
      order = ['_Planting','_Emerging','_Silking','_Dough','_Dent','_Maturity','_Harvest']
      phology_order = []
      for o in order:
          for d in result_list:
              if o in d:
                  phology_order.append(d)

      static_order = [
      'AREA_Irrigated', 'AREA', 'PIC','CaCO3','CEC','Drainage','EC','I_class','N_class','Max_OM','PAWS','pH','SAR','Texture','Sand','Silt','Clay',
      'Longitude', 'Latitude','IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI'
      ]

      # print('时间特征维度：',len(time_order),time_order)
      # print('物候特征维度：',len(phology_order),phology_order)
      # print('静态特征维度：',len(static_order),static_order)
      # # 多余的应该删去的 'sta_con','State','yield(t/ha)','Year','Value','GDT_trend_yield'
      print('时间输入维度（样本量，时间步长，变量）：',('...',27,len(time_order)/27))
      print('物候输入维度：',('...',7,len(phology_order)/7))
      print('静态输入维度：',('...',len(static_order)))

      X_train_feature_time = X_train_val[time_order].reindex(columns=time_order)
      X_train_feature_phology = X_train_val[phology_order].reindex(columns=phology_order)
      X_train_feature_static = X_train_val[static_order].reindex(columns=static_order)
      print(X_train_feature_time.shape)
      print(X_train_feature_phology.shape)

      X_test_feature_time = X_test[time_order].reindex(columns=time_order)
      X_test_feature_phology = X_test[phology_order].reindex(columns=phology_order)
      X_test_feature_static = X_test[static_order].reindex(columns=static_order)

      # Reshape the data
      X_train_time = np.array(X_train_feature_time).reshape(X_train_feature_time.shape[0],27,int(len(time_order)/27))
      X_train_phology = np.array(X_train_feature_phology).reshape(X_train_feature_phology.shape[0],7,int(len(phology_order)/7))
      X_train_static = np.array(X_train_feature_static).reshape(X_train_feature_static.shape[0],int(len(static_order)/1))

      y_train = np.array(y_train).reshape(X_train_feature_time.shape[0],1)

      # train and val
      X_train_time, X_val_time, X_train_phology, X_val_phology, X_train_static, X_val_static, y_train, y_val = train_test_split(X_train_time,X_train_phology,
      X_train_static, y_train, test_size=0.15, random_state= 99)
      print(X_train_time.shape, X_val_time.shape, X_train_phology.shape, X_val_phology.shape, X_train_static.shape, X_val_static.shape, y_train.shape, y_val.shape)

      X_test_time = np.array(X_test_feature_time).reshape(X_test_feature_time.shape[0],27,int(len(time_order)/27))
      X_test_phology = np.array(X_test_feature_phology).reshape(X_test_feature_phology.shape[0],7,int(len(phology_order)/7))
      X_test_static = np.array(X_test_feature_static).reshape(X_test_feature_static.shape[0],int(len(static_order)/1))

      y_test = np.array(y_test).reshape(X_test_feature_time.shape[0],1)

      # Create the model
      time_shape = X_train_time.shape[1:]
      pholopy_shape = X_train_phology.shape[1:]
      static_shape = X_train_static.shape[1:]
      est = LSTMModel(time_shape, pholopy_shape, static_shape, time_lstm_units=64, phology_lstm_units=64, dense_units=64,)

      # Compile the model
      est.compile(optimizer=est.optimizer, loss=est.loss)

      # Train the model
      est.fit([X_train_time, X_train_phology, X_train_static], y_train, validation_data=([X_val_time, X_val_phology, X_val_static], y_val), epochs=100, verbose = 0)

      # Evaluate the model on the test set
      y_test_pred = est.predict([X_test_time, X_test_phology, X_test_static])
    # print(y_test_pred)
    RMSE, R2 = assess_metrics(y_test, y_test_pred)
    print('RMSE:',RMSE,'R2:',R2)

    if save_model:
      dump(est, save_model_dir + croptype+'/'+ str(test_year[0]) + '_' + model[0] + '_' + 'exp'+str(experiment)+'_N'+str(N)) 
    if result:
        if croptype == 'maize':
            result = pd.concat([data.loc[data_test.index,['sta_con', 'yield(t/ha)']], pd.DataFrame(y_test_pred,index = data_test.index,columns=['pred'])],axis=1)
        else:
            result = pd.concat([data.loc[data_test.index,['sta_con', 'yield(t/ha)']], pd.DataFrame(y_test_pred,index = data_test.index,columns=['pred'])],axis=1)
        if experiment == 'GDT':
            result.loc[:,'pred'] = result.loc[:,'pred']+linear_m.coef_[0] * data_test['Year'] - yield_mean + linear_m.intercept_
        # result.loc[:,['yield','pred']] = result.loc[:,['yield','pred']]*62.719012*0.001
        result.loc[:,'residual'] =  result.loc[:,'pred'] - result.loc[:,'yield(t/ha)']
        result.to_csv(result_dir +croptype+'/'+str(seed) + '_' + str(test_year[0]) + '_' + model[0] + '_' + 'exp'+str(experiment)+'_N'+str(N)+'_预测结果.csv',header=True)
        # print('Results have been saved！')



    return metric_list

In [None]:
featurelist = read_data('/content/drive/MyDrive/csv_data/average_feature_importance (1).csv')
print(featurelist)
crop_type = 'maize'
metric_list = []
for year in range(2014,2022):# range(2014,2021):# 2014 2015,2019, 2020
  for n in [50]:
    yield_predictor(data, test_year=[year], N=10, experiment = 'GDT', croptype = crop_type, model = ['DNN'], verbose = 0,
                    tune_model = False, model_parameter=None, result = True, result_dir = '/content/drive/MyDrive/csv_data/', feature_importance = False,
                    seed = 99, save_model=True, save_model_dir='/content/drive/MyDrive/model/', trend_n = 30,
                    feature_list = featurelist, n_features = n, metric_list = [])

                feature  weight      gain         cover  total_gain  \
0             EVI_Dough  53.250  3.545882  14585.360352  188.582326   
1         EVI_Late Jul.  56.250  2.162339   6700.992157  122.461014   
2             Nir_Dough  46.625  2.084768   4973.655914   94.935937   
3            LSWI_Dough  61.500  1.890120   7386.067200  116.996874   
4     Humidity_Mid Mar.  21.875  1.452210   3929.587570   38.413425   
...                 ...     ...       ...           ...         ...   
1261    SAVI_Early Sep.  27.250  0.001704    562.779545    0.045984   
1262      SAVI_Maturity  26.125  0.001634    513.262009    0.039726   
1263   WDRVI_Early Oct.   7.375  0.001624    633.493099    0.014717   
1264    WDRVI_Late Sep.   7.375  0.001310    721.730068    0.009058   
1265                 MI   1.000  0.000144    164.434128    0.000144   

        total_cover  
0     774852.531250  
1     375416.640625  
2     235378.677246  
3     476097.298828  
4     101354.221069  
...            

In [None]:
metric_list = []
for year in range(2014,2022):# range(2014,2021):# 2014 2015,2019, 2020
  for n in [50]:
    yield_predictor(data, test_year=[year], N=10, experiment = 'GDT', croptype = crop_type, model = ['LSTM'], verbose = 0,
                    tune_model = False, model_parameter=None, result = True, result_dir = '/content/drive/MyDrive/csv_data/', feature_importance = False,
                    seed = 99, save_model=True, save_model_dir='/content/drive/MyDrive/model/', trend_n = 30,
                    feature_list = featurelist, n_features = n, metric_list = [])

The foluma of de-trend the yield: yield_new = yield - -0.036×year + 13.296 - (80.972)
时间输入维度（样本量，时间步长，变量）： ('...', 27, 37.0)
物候输入维度： ('...', 7, 34.0)
静态输入维度： ('...', 31)
(7314, 999)
(7314, 238)
(6216, 27, 37) (1098, 27, 37) (6216, 7, 34) (1098, 7, 34) (6216, 31) (1098, 31) (6216, 1) (1098, 1)
----
MAE: 4.8116
MSE: 24.3077
RMSE: 4.9303
R2: -7.2654
RMSE: 4.930280193509503 R2: -7.26536299504162
Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...batch_norm1
......vars
.........0
.........1
.........2
.........3
...batch_norm2
......vars
.........0
.........1
.........2
.........3
...batch_norm3
......vars
.........0
.........1
.........2
.........3
...concat_layer
......vars
...dense_layer1
......vars
.........0
.........1
...dense_layer2
......vars
.........0
.........1
...layers
......dense_2
.........vars
............0
............1
......lstm
.........cell
............vars
...............0
...............1
...............2
.........vars
......lstm_1
.........cell
....