### 开始

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner
from tensorflow.keras import backend as K

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(devices=physical_devices[0], device_type='GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [3]:
keras.backend.set_image_data_format('channels_last')

In [4]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [5]:
#自定义matries 计算r2
def R2(y_true, y_pred):
    sst = K.sum(K.square(y_true - K.mean(y_true)))
    ssr = K.sum(K.square(y_pred - y_true))
    R2 = 1 - ssr/sst
    return R2

In [6]:
training_mode = 'parameter' # parameter
max_trials = 50

In [2]:
path_data = 'D:/SGYL/SM_Downscaling_data/Train/data_LSTM'
for i in os.listdir(path_data):
    print(i)
    data = pd.read_csv(os.path.join(path_data,i))
    data_train,data_validate = train_test_split(data,test_size=0.3,random_state=42)
    data_validate,data_test = train_test_split(data_validate,test_size=0.3,random_state=42)
    print(i)
    print(len(data_train),len(data_validate),len(data_test))
    data_train.to_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_'+i),index = False)
    data_validate.to_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_'+i),index = False)
    data_test.to_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_'+i),index = False)


LSTM_data_2001.csv
LSTM_data_2001.csv
417715 125314 53707
LSTM_data_2002.csv
LSTM_data_2002.csv
503072 150922 64681
LSTM_data_2003.csv
LSTM_data_2003.csv
505493 151648 64993
LSTM_data_2004.csv
LSTM_data_2004.csv
498220 149466 64057
LSTM_data_2005.csv
LSTM_data_2005.csv
502855 150857 64653
LSTM_data_2006.csv
LSTM_data_2006.csv
510911 153274 65689
LSTM_data_2007.csv
LSTM_data_2007.csv
615807 184742 79176
LSTM_data_2008.csv
LSTM_data_2008.csv
623097 186929 80113
LSTM_data_2009.csv
LSTM_data_2009.csv
611429 183429 78613
LSTM_data_2010.csv
LSTM_data_2010.csv
648450 194535 83373
LSTM_data_2011.csv
LSTM_data_2011.csv
654811 196443 84191
LSTM_data_2012.csv
LSTM_data_2012.csv
658150 197445 84620
LSTM_data_2013.csv
LSTM_data_2013.csv
644698 193410 82890
LSTM_data_2014.csv
LSTM_data_2014.csv
601606 180482 77350
LSTM_data_2015.csv
LSTM_data_2015.csv
603806 181141 77633
LSTM_data_2016.csv
LSTM_data_2016.csv
634299 190290 81553
LSTM_data_2017.csv
LSTM_data_2017.csv
635114 190535 81658
LSTM_data_2018

### Model

In [8]:
# Create the keras tuner model.
class MyHyperModel(keras_tuner.HyperModel):
    
    def build(self, hp):

        drop = hp.Boolean('dropout')
        input_layer = layers.Input(shape = (4,19))  # 这里shape需要修改
        out = input_layer
        for i in range(hp.Int("num_layers",2,3)):
            out = layers.LSTM(units=hp.Choice(f"units_{i}",values = [64,128,256,512]),#hp.Choice(f"units_{i}", min_value=300, max_value=600, step=100),
                                return_sequences=True,
                                kernel_initializer=hp.Choice('initializer',values = ["glorot_uniform",'he_uniform']),
                                name = 'LSTM_'+str(i))(out)
        unit = hp.Choice('units_last',values = [64,128,256,512])
        out = layers.LSTM(units=unit,kernel_initializer=hp.Choice('initializer_last',values = ["glorot_uniform",'he_uniform']),name = 'LSTM_last')(out)
        if drop:
            out = layers.Dropout(0.3)(out)
        
        final_output = layers.Dense(units=1,activation=hp.Choice('activate_dense_last',values = ['linear','sigmoid']),name = 'final_dense')(out)
        model = keras.Model(inputs = input_layer,outputs = final_output)
        learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),loss='mse',metrics=[R2,'mae'])
        return model

In [None]:
    def build():

        unit = 256
        input_layer = layers.Input(shape = (4,19))  # 这里shape需要修改
        out = input_layer
        for i in range(2):
            out = layers.LSTM(units=unit,return_sequences=True,name = 'LSTM_'+str(i))(out)
        out = layers.LSTM(units=unit,name = 'LSTM_last')(out)
        out = layers.Dropout(0.3)(out)
        
        final_output = layers.Dense(units=1,activation='linear',name = 'final_dense')(out)
        model = keras.Model(inputs = input_layer,outputs = final_output)
        return model

In [None]:
model = build()
model.summary()

### 2001

In [6]:
from sklearn.preprocessing import StandardScaler
year = 2001
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Unnamed: 0,NDVI_1,NDVI_2,NDVI_3,NDVI_4,EVI_1,EVI_2,EVI_3,EVI_4,LST_1,LST_2,...,Lon_3,Lon_4,Lat_1,Lat_2,Lat_3,Lat_4,DOY_1,DOY_2,DOY_3,DOY_4
0,0.459257,0.462296,0.463587,0.463064,0.277595,0.283132,0.288187,0.292669,23.425981,23.515612,...,100.624786,100.624786,26.379028,26.379028,26.379028,26.379028,212,213,214,215
1,0.234662,0.231919,0.229744,0.228109,0.139260,0.136887,0.134887,0.133243,39.463562,39.480511,...,78.625046,78.625046,26.379028,26.379028,26.379028,26.379028,264,265,266,267
2,0.412674,0.425673,0.438702,0.451657,0.248028,0.254859,0.261959,0.269290,30.352411,30.329779,...,103.625160,103.625160,27.124630,27.124630,27.124630,27.124630,158,159,160,161
3,0.099966,0.100220,0.100396,0.100480,0.059500,0.059636,0.059715,0.059730,14.457657,14.368966,...,90.123482,90.123482,29.127872,29.127872,29.127872,29.127872,310,311,312,313
4,0.036440,0.036864,0.037294,0.037728,0.029261,0.029421,0.029593,0.029773,43.355152,43.819313,...,81.122360,81.122360,37.374409,37.374409,37.374409,37.374409,183,184,185,186
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417710,0.271146,0.279652,0.288058,0.296327,0.196908,0.202782,0.208605,0.214360,30.623158,31.059143,...,75.876198,75.876198,39.377651,39.377651,39.377651,39.377651,110,111,112,113
417711,0.312128,0.317131,0.322058,0.326907,0.203569,0.205784,0.208062,0.210409,24.956324,24.670488,...,92.378250,92.378250,32.370792,32.370792,32.370792,32.370792,191,192,193,194
417712,0.422850,0.396757,0.370154,0.343165,0.289963,0.276435,0.262686,0.248790,22.404798,22.282812,...,101.621918,101.621918,27.879215,27.879215,27.879215,27.879215,226,227,228,229
417713,0.128748,0.128723,0.128653,0.128536,0.129816,0.128594,0.127541,0.126670,30.979700,31.189383,...,104.622292,104.622292,31.373661,31.373661,31.373661,31.373661,124,125,126,127


In [None]:
X_train = X_train.to_numpy().reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

In [4]:
X_train.shape

(417715, 4, 19)

In [5]:
X_train[0,:,:]

array([[ 4.59256798e-01,  2.77595252e-01,  2.34259815e+01,
         8.63553810e+00,  0.00000000e+00,  2.74091840e-01,
         9.02078390e-01, -1.88046440e-01,  5.25000000e+00,
         1.63610535e+01,  1.66900000e+03,  1.56376276e+01,
         1.56376276e+01,  3.62000000e+02,  2.40000000e+02,
         3.99000000e+02,  1.00624786e+02,  2.63790283e+01,
         2.12000000e+02],
       [ 4.62295741e-01,  2.83131868e-01,  2.35156116e+01,
         8.75780010e+00,  0.00000000e+00,  2.52540410e-01,
         1.03535557e+00, -1.61548957e-01,  3.68000007e+00,
         1.63610535e+01,  1.66900000e+03,  1.56376276e+01,
         1.56376276e+01,  3.62000000e+02,  2.40000000e+02,
         3.99000000e+02,  1.00624786e+02,  2.63790283e+01,
         2.13000000e+02],
       [ 4.63587016e-01,  2.88186520e-01,  2.35981598e+01,
         8.88237190e+00,  0.00000000e+00,  3.02049965e-01,
         9.89692569e-01, -2.82274246e-01,  4.67000008e+00,
         1.63610535e+01,  1.66900000e+03,  1.56376276e+01,
    

In [10]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 03m 10s]
val_R2: 0.8423041701316833

Best val_R2 So Far: 0.8751782178878784
Total elapsed time: 02h 28m 29s
INFO:tensorflow:Oracle triggered exit


In [11]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00160, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2001_best.hdf5
102/102 - 3s - loss: 0.0036 - R2: 0.5090 - mae: 0.0411 - val_loss: 0.0016 - val_R2: 0.7848 - val_mae: 0.0305
Epoch 2/300

Epoch 00002: val_loss improved from 0.00160 to 0.00149, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2001_best.hdf5
102/102 - 2s - loss: 0.0015 - R2: 0.7945 - mae: 0.0297 - val_loss: 0.0015 - val_R2: 0.7996 - val_mae: 0.0295
Epoch 3/300

Epoch 00003: val_loss improved from 0.00149 to 0.00144, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2001_best.hdf5
102/102 - 2s - loss: 0.0015 - R2: 0.8048 - mae: 0.0289 - val_loss: 0.0014 - val_R2: 0.8069 - val_mae: 0.0287
Epoch 4/300

Epoch 00004: val_loss improved from 0.00144 to 0.00138, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2001_best.hdf5
102/102 - 2s - loss: 0.0014 - R2: 0.8128 - mae: 0.0282 - val_loss: 0.0014 - val_R2: 0.814

<tensorflow.python.keras.callbacks.History at 0x193f1edb948>

In [12]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2001
{'dropout': False, 'num_layers': 3, 'units_0': 256, 'initializer': 'he_uniform', 'units_1': 512, 'units_last': 64, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'linear', 'lr': 0.006776427061943408, 'units_2': 128}


[0.000946618674788624, 0.8614243268966675, 0.022261783480644226]

### 2002

In [13]:
from sklearn.preprocessing import StandardScaler
year = 2002
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (503072, 1, 76)
X_validate shape: (150922, 1, 76)


In [14]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 03m 38s]
val_R2: 0.8632396459579468

Best val_R2 So Far: 0.8826588988304138
Total elapsed time: 03h 31m 15s
INFO:tensorflow:Oracle triggered exit


In [15]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00149, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2002_best.hdf5
123/123 - 3s - loss: 0.0027 - R2: 0.6089 - mae: 0.0370 - val_loss: 0.0015 - val_R2: 0.7875 - val_mae: 0.0295
Epoch 2/300

Epoch 00002: val_loss improved from 0.00149 to 0.00136, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2002_best.hdf5
123/123 - 2s - loss: 0.0014 - R2: 0.7965 - mae: 0.0284 - val_loss: 0.0014 - val_R2: 0.8055 - val_mae: 0.0275
Epoch 3/300

Epoch 00003: val_loss improved from 0.00136 to 0.00133, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2002_best.hdf5
123/123 - 2s - loss: 0.0013 - R2: 0.8097 - mae: 0.0273 - val_loss: 0.0013 - val_R2: 0.8101 - val_mae: 0.0273
Epoch 4/300

Epoch 00004: val_loss improved from 0.00133 to 0.00125, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2002_best.hdf5
123/123 - 2s - loss: 0.0013 - R2: 0.8201 - mae: 0.0264 - val_loss: 0.0012 - val_R2: 0.822

<tensorflow.python.keras.callbacks.History at 0x193853ec188>

In [16]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2002
{'dropout': False, 'num_layers': 3, 'units_0': 512, 'initializer': 'glorot_uniform', 'units_1': 128, 'units_last': 128, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'linear', 'lr': 0.008779360094579483, 'units_2': 128}


[0.0008561494178138673, 0.8693453669548035, 0.0204787440598011]

### 2003

In [17]:
from sklearn.preprocessing import StandardScaler
year = 2003
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (505493, 1, 76)
X_validate shape: (151648, 1, 76)


In [18]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 03m 44s]
val_R2: 0.8719651103019714

Best val_R2 So Far: 0.9043444395065308
Total elapsed time: 02h 49m 30s
INFO:tensorflow:Oracle triggered exit


In [19]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00133, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2003_best.hdf5
124/124 - 3s - loss: 0.0031 - R2: 0.5507 - mae: 0.0374 - val_loss: 0.0013 - val_R2: 0.8082 - val_mae: 0.0273
Epoch 2/300

Epoch 00002: val_loss improved from 0.00133 to 0.00121, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2003_best.hdf5
124/124 - 2s - loss: 0.0013 - R2: 0.8176 - mae: 0.0267 - val_loss: 0.0012 - val_R2: 0.8263 - val_mae: 0.0262
Epoch 3/300

Epoch 00003: val_loss improved from 0.00121 to 0.00112, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2003_best.hdf5
124/124 - 2s - loss: 0.0012 - R2: 0.8314 - mae: 0.0256 - val_loss: 0.0011 - val_R2: 0.8383 - val_mae: 0.0249
Epoch 4/300

Epoch 00004: val_loss improved from 0.00112 to 0.00110, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2003_best.hdf5
124/124 - 2s - loss: 0.0011 - R2: 0.8413 - mae: 0.0248 - val_loss: 0.0011 - val_R2: 0.842

<tensorflow.python.keras.callbacks.History at 0x1938097ec48>

In [20]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2003
{'dropout': False, 'num_layers': 3, 'units_0': 256, 'initializer': 'he_uniform', 'units_1': 512, 'units_last': 64, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'linear', 'lr': 0.006776427061943408, 'units_2': 128}


[0.0006658125785179436, -inf, 0.018138328567147255]

### 2004

In [21]:
from sklearn.preprocessing import StandardScaler
year = 2004
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (498220, 1, 76)
X_validate shape: (149466, 1, 76)


In [22]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 02m 24s]
val_R2: 0.8811090588569641

Best val_R2 So Far: 0.9033181667327881
Total elapsed time: 02h 42m 37s
INFO:tensorflow:Oracle triggered exit


In [23]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00134, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2004_best.hdf5
122/122 - 3s - loss: 0.0037 - R2: 0.4640 - mae: 0.0406 - val_loss: 0.0013 - val_R2: 0.8071 - val_mae: 0.0274
Epoch 2/300

Epoch 00002: val_loss improved from 0.00134 to 0.00119, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2004_best.hdf5
122/122 - 2s - loss: 0.0013 - R2: 0.8090 - mae: 0.0274 - val_loss: 0.0012 - val_R2: 0.8286 - val_mae: 0.0256
Epoch 3/300

Epoch 00003: val_loss improved from 0.00119 to 0.00110, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2004_best.hdf5
122/122 - 2s - loss: 0.0012 - R2: 0.8288 - mae: 0.0258 - val_loss: 0.0011 - val_R2: 0.8412 - val_mae: 0.0248
Epoch 4/300

Epoch 00004: val_loss improved from 0.00110 to 0.00109, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2004_best.hdf5
122/122 - 2s - loss: 0.0011 - R2: 0.8397 - mae: 0.0249 - val_loss: 0.0011 - val_R2: 0.842

<tensorflow.python.keras.callbacks.History at 0x19382541588>

In [24]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2004
{'dropout': True, 'num_layers': 3, 'units_0': 512, 'initializer': 'he_uniform', 'units_1': 128, 'units_last': 256, 'initializer_last': 'he_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.008461849980494839, 'units_2': 128}


[0.0006838921108283103, 0.8959334492683411, 0.01827489212155342]

### 2005

In [25]:
from sklearn.preprocessing import StandardScaler
year = 2005
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (502855, 1, 76)
X_validate shape: (150857, 1, 76)


In [26]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 05m 11s]
val_R2: 0.8957223892211914

Best val_R2 So Far: 0.9015474319458008
Total elapsed time: 02h 59m 30s
INFO:tensorflow:Oracle triggered exit


In [27]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00141, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2005_best.hdf5
123/123 - 4s - loss: 0.0040 - R2: 0.4274 - mae: 0.0400 - val_loss: 0.0014 - val_R2: 0.8008 - val_mae: 0.0284
Epoch 2/300

Epoch 00002: val_loss improved from 0.00141 to 0.00127, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2005_best.hdf5
123/123 - 3s - loss: 0.0013 - R2: 0.8102 - mae: 0.0276 - val_loss: 0.0013 - val_R2: 0.8198 - val_mae: 0.0269
Epoch 3/300

Epoch 00003: val_loss improved from 0.00127 to 0.00120, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2005_best.hdf5
123/123 - 3s - loss: 0.0012 - R2: 0.8234 - mae: 0.0266 - val_loss: 0.0012 - val_R2: 0.8302 - val_mae: 0.0261
Epoch 4/300

Epoch 00004: val_loss improved from 0.00120 to 0.00117, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2005_best.hdf5
123/123 - 3s - loss: 0.0012 - R2: 0.8318 - mae: 0.0259 - val_loss: 0.0012 - val_R2: 0.834

<tensorflow.python.keras.callbacks.History at 0x19381b7c5c8>

In [28]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2005
{'dropout': False, 'num_layers': 3, 'units_0': 256, 'initializer': 'he_uniform', 'units_1': 512, 'units_last': 512, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'linear', 'lr': 0.007282286591355298, 'units_2': 128}


[0.0006891107186675072, 0.895930290222168, 0.018792232498526573]

### 2006

In [29]:
from sklearn.preprocessing import StandardScaler
year = 2006
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (510911, 1, 76)
X_validate shape: (153274, 1, 76)


In [30]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 02m 21s]
val_R2: 0.8571662902832031

Best val_R2 So Far: 0.8932822346687317
Total elapsed time: 02h 44m 55s
INFO:tensorflow:Oracle triggered exit


In [31]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00138, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2006_best.hdf5
125/125 - 3s - loss: 0.0038 - R2: 0.4383 - mae: 0.0408 - val_loss: 0.0014 - val_R2: 0.7952 - val_mae: 0.0282
Epoch 2/300

Epoch 00002: val_loss improved from 0.00138 to 0.00121, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2006_best.hdf5
125/125 - 2s - loss: 0.0014 - R2: 0.7982 - mae: 0.0278 - val_loss: 0.0012 - val_R2: 0.8202 - val_mae: 0.0260
Epoch 3/300

Epoch 00003: val_loss improved from 0.00121 to 0.00113, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2006_best.hdf5
125/125 - 2s - loss: 0.0012 - R2: 0.8156 - mae: 0.0265 - val_loss: 0.0011 - val_R2: 0.8314 - val_mae: 0.0252
Epoch 4/300

Epoch 00004: val_loss improved from 0.00113 to 0.00108, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2006_best.hdf5
125/125 - 2s - loss: 0.0012 - R2: 0.8263 - mae: 0.0256 - val_loss: 0.0011 - val_R2: 0.839

<tensorflow.python.keras.callbacks.History at 0x193d7bdc748>

In [32]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2006
{'dropout': True, 'num_layers': 2, 'units_0': 512, 'initializer': 'he_uniform', 'units_1': 128, 'units_last': 256, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.007522684510012844, 'units_2': 256}


[0.000732546963263303, 0.8852928280830383, 0.019086945801973343]

### 2007

In [33]:
from sklearn.preprocessing import StandardScaler
year = 2007
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (615807, 1, 76)
X_validate shape: (184742, 1, 76)


In [34]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 03m 33s]
val_R2: 0.8786143660545349

Best val_R2 So Far: 0.908476710319519
Total elapsed time: 03h 25m 30s


INFO:tensorflow:Oracle triggered exit


In [35]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00148, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2007_best.hdf5
151/151 - 4s - loss: 0.0031 - R2: 0.5949 - mae: 0.0394 - val_loss: 0.0015 - val_R2: 0.8064 - val_mae: 0.0293
Epoch 2/300

Epoch 00002: val_loss improved from 0.00148 to 0.00135, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2007_best.hdf5
151/151 - 3s - loss: 0.0015 - R2: 0.8049 - mae: 0.0296 - val_loss: 0.0014 - val_R2: 0.8233 - val_mae: 0.0278
Epoch 3/300

Epoch 00003: val_loss improved from 0.00135 to 0.00128, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2007_best.hdf5
151/151 - 3s - loss: 0.0014 - R2: 0.8223 - mae: 0.0281 - val_loss: 0.0013 - val_R2: 0.8322 - val_mae: 0.0273
Epoch 4/300

Epoch 00004: val_loss improved from 0.00128 to 0.00122, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2007_best.hdf5
151/151 - 3s - loss: 0.0013 - R2: 0.8321 - mae: 0.0272 - val_loss: 0.0012 - val_R2: 0.841

<tensorflow.python.keras.callbacks.History at 0x193891bb048>

In [36]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2007
{'dropout': True, 'num_layers': 3, 'units_0': 512, 'initializer': 'glorot_uniform', 'units_1': 128, 'units_last': 64, 'initializer_last': 'he_uniform', 'activate_dense_last': 'linear', 'lr': 0.00747055290241083, 'units_2': 512}


[0.0006955971475690603, 0.9024136066436768, 0.019116200506687164]

### 2008

In [37]:
from sklearn.preprocessing import StandardScaler
year = 2008
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (623097, 1, 76)
X_validate shape: (186929, 1, 76)


In [38]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 04m 04s]
val_R2: 0.8775420188903809

Best val_R2 So Far: 0.9232114553451538
Total elapsed time: 03h 23m 24s
INFO:tensorflow:Oracle triggered exit


In [39]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00125, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2008_best.hdf5
153/153 - 4s - loss: 0.0024 - R2: 0.6927 - mae: 0.0340 - val_loss: 0.0013 - val_R2: 0.8393 - val_mae: 0.0267
Epoch 2/300

Epoch 00002: val_loss improved from 0.00125 to 0.00118, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2008_best.hdf5
153/153 - 3s - loss: 0.0012 - R2: 0.8481 - mae: 0.0259 - val_loss: 0.0012 - val_R2: 0.8479 - val_mae: 0.0260
Epoch 3/300

Epoch 00003: val_loss improved from 0.00118 to 0.00111, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2008_best.hdf5
153/153 - 3s - loss: 0.0011 - R2: 0.8597 - mae: 0.0248 - val_loss: 0.0011 - val_R2: 0.8580 - val_mae: 0.0253
Epoch 4/300

Epoch 00004: val_loss improved from 0.00111 to 0.00102, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2008_best.hdf5
153/153 - 4s - loss: 0.0010 - R2: 0.8674 - mae: 0.0241 - val_loss: 0.0010 - val_R2: 0.869

<tensorflow.python.keras.callbacks.History at 0x19380b2b048>

In [40]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2008
{'dropout': False, 'num_layers': 3, 'units_0': 512, 'initializer': 'he_uniform', 'units_1': 512, 'units_last': 128, 'initializer_last': 'he_uniform', 'activate_dense_last': 'linear', 'lr': 0.003872084305659864, 'units_2': 128}


[0.0006119741592556238, 0.9159145355224609, 0.01772398129105568]

### 2009

In [41]:
from sklearn.preprocessing import StandardScaler
year = 2009
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (611429, 1, 76)
X_validate shape: (183429, 1, 76)


In [42]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 04m 15s]
val_R2: 0.8899696469306946

Best val_R2 So Far: 0.9101680517196655
Total elapsed time: 03h 25m 01s
INFO:tensorflow:Oracle triggered exit


In [43]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00133, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2009_best.hdf5
150/150 - 3s - loss: 0.0034 - R2: 0.5489 - mae: 0.0380 - val_loss: 0.0013 - val_R2: 0.8218 - val_mae: 0.0275
Epoch 2/300

Epoch 00002: val_loss improved from 0.00133 to 0.00125, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2009_best.hdf5
150/150 - 2s - loss: 0.0013 - R2: 0.8293 - mae: 0.0268 - val_loss: 0.0012 - val_R2: 0.8335 - val_mae: 0.0269
Epoch 3/300

Epoch 00003: val_loss improved from 0.00125 to 0.00115, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2009_best.hdf5
150/150 - 2s - loss: 0.0012 - R2: 0.8418 - mae: 0.0257 - val_loss: 0.0012 - val_R2: 0.8464 - val_mae: 0.0252
Epoch 4/300

Epoch 00004: val_loss improved from 0.00115 to 0.00110, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2009_best.hdf5
150/150 - 2s - loss: 0.0011 - R2: 0.8485 - mae: 0.0251 - val_loss: 0.0011 - val_R2: 0.852

<tensorflow.python.keras.callbacks.History at 0x193905713c8>

In [44]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2009
{'dropout': False, 'num_layers': 3, 'units_0': 128, 'initializer': 'he_uniform', 'units_1': 512, 'units_last': 128, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.00908820412978174, 'units_2': 128}


[0.0006751564214937389, 0.9057345986366272, 0.018407950177788734]

### 2010

In [45]:
from sklearn.preprocessing import StandardScaler
year = 2010
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (648450, 1, 76)
X_validate shape: (194535, 1, 76)


In [46]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 03m 40s]
val_R2: 0.8629266619682312

Best val_R2 So Far: 0.9125432372093201
Total elapsed time: 03h 17m 46s
INFO:tensorflow:Oracle triggered exit


In [47]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00135, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2010_best.hdf5
159/159 - 4s - loss: 0.0028 - R2: 0.6427 - mae: 0.0363 - val_loss: 0.0013 - val_R2: 0.8269 - val_mae: 0.0275
Epoch 2/300

Epoch 00002: val_loss did not improve from 0.00135
159/159 - 3s - loss: 0.0013 - R2: 0.8293 - mae: 0.0275 - val_loss: 0.0014 - val_R2: 0.8225 - val_mae: 0.0280
Epoch 3/300

Epoch 00003: val_loss improved from 0.00135 to 0.00119, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2010_best.hdf5
159/159 - 3s - loss: 0.0012 - R2: 0.8423 - mae: 0.0263 - val_loss: 0.0012 - val_R2: 0.8475 - val_mae: 0.0256
Epoch 4/300

Epoch 00004: val_loss improved from 0.00119 to 0.00112, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2010_best.hdf5
159/159 - 3s - loss: 0.0012 - R2: 0.8500 - mae: 0.0256 - val_loss: 0.0011 - val_R2: 0.8558 - val_mae: 0.0251
Epoch 5/300

Epoch 00005: val_loss improved from 0.00112 to 0.0

<tensorflow.python.keras.callbacks.History at 0x1938a556b88>

In [48]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2010
{'dropout': True, 'num_layers': 2, 'units_0': 256, 'initializer': 'he_uniform', 'units_1': 512, 'units_last': 512, 'initializer_last': 'he_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.0070492237192470684, 'units_2': 256}


[0.000678066280670464, 0.9079712629318237, 0.01870129443705082]

### 2011

In [49]:
from sklearn.preprocessing import StandardScaler
year = 2011
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (654811, 1, 76)
X_validate shape: (196443, 1, 76)


In [50]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 02m 54s]
val_R2: 0.8380882143974304

Best val_R2 So Far: 0.9138379693031311
Total elapsed time: 03h 25m 14s
INFO:tensorflow:Oracle triggered exit


In [51]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00124, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2011_best.hdf5
160/160 - 4s - loss: 0.0026 - R2: 0.6403 - mae: 0.0346 - val_loss: 0.0012 - val_R2: 0.8294 - val_mae: 0.0265
Epoch 2/300

Epoch 00002: val_loss improved from 0.00124 to 0.00112, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2011_best.hdf5
160/160 - 3s - loss: 0.0012 - R2: 0.8407 - mae: 0.0255 - val_loss: 0.0011 - val_R2: 0.8457 - val_mae: 0.0250
Epoch 3/300

Epoch 00003: val_loss improved from 0.00112 to 0.00103, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2011_best.hdf5
160/160 - 3s - loss: 0.0011 - R2: 0.8537 - mae: 0.0244 - val_loss: 0.0010 - val_R2: 0.8581 - val_mae: 0.0239
Epoch 4/300

Epoch 00004: val_loss improved from 0.00103 to 0.00100, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2011_best.hdf5
160/160 - 3s - loss: 0.0010 - R2: 0.8621 - mae: 0.0236 - val_loss: 9.9510e-04 - val_R2: 0

<tensorflow.python.keras.callbacks.History at 0x193c0669648>

In [52]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2011
{'dropout': False, 'num_layers': 2, 'units_0': 512, 'initializer': 'he_uniform', 'units_1': 256, 'units_last': 512, 'initializer_last': 'he_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.006614038547293072, 'units_2': 128}


[0.0006263594259507954, 0.9079516530036926, 0.017926108092069626]

### 2012 

In [53]:
from sklearn.preprocessing import StandardScaler
year = 2012
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (658150, 1, 76)
X_validate shape: (197445, 1, 76)


In [54]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 02m 31s]
val_R2: 0.9184640645980835

Best val_R2 So Far: 0.9225761294364929
Total elapsed time: 03h 36m 14s
INFO:tensorflow:Oracle triggered exit


In [55]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00126, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2012_best.hdf5
161/161 - 4s - loss: 0.0027 - R2: 0.6738 - mae: 0.0346 - val_loss: 0.0013 - val_R2: 0.8445 - val_mae: 0.0267
Epoch 2/300

Epoch 00002: val_loss improved from 0.00126 to 0.00112, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2012_best.hdf5
161/161 - 4s - loss: 0.0012 - R2: 0.8520 - mae: 0.0260 - val_loss: 0.0011 - val_R2: 0.8616 - val_mae: 0.0251
Epoch 3/300

Epoch 00003: val_loss improved from 0.00112 to 0.00110, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2012_best.hdf5
161/161 - 4s - loss: 0.0011 - R2: 0.8639 - mae: 0.0248 - val_loss: 0.0011 - val_R2: 0.8646 - val_mae: 0.0247
Epoch 4/300

Epoch 00004: val_loss improved from 0.00110 to 0.00101, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2012_best.hdf5
161/161 - 4s - loss: 0.0010 - R2: 0.8736 - mae: 0.0238 - val_loss: 0.0010 - val_R2: 0.876

<tensorflow.python.keras.callbacks.History at 0x19388adcb88>

In [56]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2012
{'dropout': False, 'num_layers': 2, 'units_0': 512, 'initializer': 'he_uniform', 'units_1': 512, 'units_last': 256, 'initializer_last': 'he_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.007212977064041875, 'units_2': 128}


[0.0006435877294279635, 0.915126383304596, 0.017990592867136]

### 2013

In [57]:
from sklearn.preprocessing import StandardScaler
year = 2013
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (644698, 1, 76)
X_validate shape: (193410, 1, 76)


In [58]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 03m 43s]
val_R2: 0.8840814232826233

Best val_R2 So Far: 0.9277251362800598
Total elapsed time: 03h 49m 49s
INFO:tensorflow:Oracle triggered exit


In [59]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00127, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2013_best.hdf5
158/158 - 4s - loss: 0.0030 - R2: 0.6131 - mae: 0.0366 - val_loss: 0.0013 - val_R2: 0.8369 - val_mae: 0.0270
Epoch 2/300

Epoch 00002: val_loss improved from 0.00127 to 0.00113, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2013_best.hdf5
158/158 - 4s - loss: 0.0013 - R2: 0.8388 - mae: 0.0267 - val_loss: 0.0011 - val_R2: 0.8546 - val_mae: 0.0256
Epoch 3/300

Epoch 00003: val_loss improved from 0.00113 to 0.00105, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2013_best.hdf5
158/158 - 3s - loss: 0.0011 - R2: 0.8525 - mae: 0.0255 - val_loss: 0.0011 - val_R2: 0.8650 - val_mae: 0.0245
Epoch 4/300

Epoch 00004: val_loss improved from 0.00105 to 0.00103, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2013_best.hdf5
158/158 - 3s - loss: 0.0011 - R2: 0.8608 - mae: 0.0247 - val_loss: 0.0010 - val_R2: 0.867

<tensorflow.python.keras.callbacks.History at 0x19448401b08>

In [60]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2013
{'dropout': True, 'num_layers': 2, 'units_0': 256, 'initializer': 'he_uniform', 'units_1': 512, 'units_last': 512, 'initializer_last': 'he_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.008929992169469712, 'units_2': 512}


[0.0005665220669470727, 0.921843409538269, 0.01709827594459057]

### 2014

In [61]:
from sklearn.preprocessing import StandardScaler
year = 2014
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (601606, 1, 76)
X_validate shape: (180482, 1, 76)


In [62]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 04m 21s]
val_R2: 0.8829584717750549

Best val_R2 So Far: 0.9360734820365906
Total elapsed time: 03h 15m 22s
INFO:tensorflow:Oracle triggered exit


In [63]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00125, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2014_best.hdf5
147/147 - 4s - loss: 0.0037 - R2: 0.5534 - mae: 0.0391 - val_loss: 0.0013 - val_R2: 0.8496 - val_mae: 0.0267
Epoch 2/300

Epoch 00002: val_loss improved from 0.00125 to 0.00112, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2014_best.hdf5
147/147 - 4s - loss: 0.0012 - R2: 0.8534 - mae: 0.0263 - val_loss: 0.0011 - val_R2: 0.8648 - val_mae: 0.0250
Epoch 3/300

Epoch 00003: val_loss improved from 0.00112 to 0.00105, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2014_best.hdf5
147/147 - 4s - loss: 0.0011 - R2: 0.8658 - mae: 0.0251 - val_loss: 0.0010 - val_R2: 0.8743 - val_mae: 0.0240
Epoch 4/300

Epoch 00004: val_loss improved from 0.00105 to 0.00100, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2014_best.hdf5
147/147 - 4s - loss: 0.0010 - R2: 0.8757 - mae: 0.0241 - val_loss: 9.9534e-04 - val_R2: 0

<tensorflow.python.keras.callbacks.History at 0x1938bcf64c8>

In [64]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2014
{'dropout': True, 'num_layers': 2, 'units_0': 512, 'initializer': 'he_uniform', 'units_1': 512, 'units_last': 512, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.00947836300226011, 'units_2': 128}


[0.0005298690521158278, 0.9312880635261536, 0.01643565483391285]

### 2015

In [65]:
from sklearn.preprocessing import StandardScaler
year = 2015
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (603806, 1, 76)
X_validate shape: (181141, 1, 76)


In [66]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 04m 03s]
val_R2: 0.8668781518936157

Best val_R2 So Far: 0.9234832525253296
Total elapsed time: 03h 39m 37s


INFO:tensorflow:Oracle triggered exit


In [67]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00149, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2015_best.hdf5
148/148 - 4s - loss: 0.0026 - R2: 0.6743 - mae: 0.0365 - val_loss: 0.0015 - val_R2: 0.8154 - val_mae: 0.0294
Epoch 2/300

Epoch 00002: val_loss improved from 0.00149 to 0.00132, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2015_best.hdf5
148/148 - 2s - loss: 0.0014 - R2: 0.8306 - mae: 0.0283 - val_loss: 0.0013 - val_R2: 0.8360 - val_mae: 0.0280
Epoch 3/300

Epoch 00003: val_loss improved from 0.00132 to 0.00126, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2015_best.hdf5
148/148 - 1s - loss: 0.0013 - R2: 0.8444 - mae: 0.0270 - val_loss: 0.0013 - val_R2: 0.8441 - val_mae: 0.0267
Epoch 4/300

Epoch 00004: val_loss improved from 0.00126 to 0.00116, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2015_best.hdf5
148/148 - 1s - loss: 0.0012 - R2: 0.8525 - mae: 0.0262 - val_loss: 0.0012 - val_R2: 0.855

<tensorflow.python.keras.callbacks.History at 0x1944bc770c8>

In [68]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2015
{'dropout': False, 'num_layers': 2, 'units_0': 256, 'initializer': 'glorot_uniform', 'units_1': 128, 'units_last': 64, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'linear', 'lr': 0.006859377633228802, 'units_2': 64}


[0.000619807280600071, -inf, 0.018151791766285896]

### 2016

In [69]:
from sklearn.preprocessing import StandardScaler
year = 2016
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (634299, 1, 76)
X_validate shape: (190290, 1, 76)


In [70]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 07m 46s]
val_R2: 0.8795233964920044

Best val_R2 So Far: 0.937090277671814
Total elapsed time: 03h 47m 31s
INFO:tensorflow:Oracle triggered exit


In [71]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00133, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2016_best.hdf5
155/155 - 3s - loss: 0.0035 - R2: 0.5937 - mae: 0.0387 - val_loss: 0.0013 - val_R2: 0.8456 - val_mae: 0.0275
Epoch 2/300

Epoch 00002: val_loss improved from 0.00133 to 0.00122, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2016_best.hdf5
155/155 - 2s - loss: 0.0013 - R2: 0.8526 - mae: 0.0268 - val_loss: 0.0012 - val_R2: 0.8578 - val_mae: 0.0262
Epoch 3/300

Epoch 00003: val_loss improved from 0.00122 to 0.00119, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2016_best.hdf5
155/155 - 2s - loss: 0.0012 - R2: 0.8642 - mae: 0.0257 - val_loss: 0.0012 - val_R2: 0.8614 - val_mae: 0.0256
Epoch 4/300

Epoch 00004: val_loss improved from 0.00119 to 0.00107, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2016_best.hdf5
155/155 - 2s - loss: 0.0011 - R2: 0.8718 - mae: 0.0249 - val_loss: 0.0011 - val_R2: 0.875

<tensorflow.python.keras.callbacks.History at 0x194a39ea3c8>

In [72]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2016
{'dropout': False, 'num_layers': 2, 'units_0': 256, 'initializer': 'glorot_uniform', 'units_1': 512, 'units_last': 64, 'initializer_last': 'he_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.006526328879826285, 'units_2': 256}


[0.0005395450862124562, 0.9326825141906738, 0.01681557670235634]

### 2017

In [73]:
from sklearn.preprocessing import StandardScaler
year = 2017
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (635114, 1, 76)
X_validate shape: (190535, 1, 76)


In [74]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 02m 41s]
val_R2: 0.9215372800827026

Best val_R2 So Far: 0.9364216327667236
Total elapsed time: 03h 44m 02s
INFO:tensorflow:Oracle triggered exit


In [75]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00128, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2017_best.hdf5
156/156 - 3s - loss: 0.0023 - R2: 0.7338 - mae: 0.0332 - val_loss: 0.0013 - val_R2: 0.8520 - val_mae: 0.0269
Epoch 2/300

Epoch 00002: val_loss improved from 0.00128 to 0.00124, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2017_best.hdf5
156/156 - 2s - loss: 0.0012 - R2: 0.8649 - mae: 0.0257 - val_loss: 0.0012 - val_R2: 0.8562 - val_mae: 0.0272
Epoch 3/300

Epoch 00003: val_loss improved from 0.00124 to 0.00105, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2017_best.hdf5
156/156 - 3s - loss: 0.0011 - R2: 0.8765 - mae: 0.0246 - val_loss: 0.0010 - val_R2: 0.8789 - val_mae: 0.0243
Epoch 4/300

Epoch 00004: val_loss improved from 0.00105 to 0.00102, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2017_best.hdf5
156/156 - 2s - loss: 9.9525e-04 - R2: 0.8848 - mae: 0.0237 - val_loss: 0.0010 - val_R2: 0

<tensorflow.python.keras.callbacks.History at 0x1944bc95988>

In [76]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2017
{'dropout': False, 'num_layers': 3, 'units_0': 512, 'initializer': 'he_uniform', 'units_1': 128, 'units_last': 128, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'linear', 'lr': 0.006290880081587463, 'units_2': 64}


[0.0005584562895819545, 0.9308784604072571, 0.016857683658599854]

### 2018

In [77]:
from sklearn.preprocessing import StandardScaler
year = 2018
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (612157, 1, 76)
X_validate shape: (183647, 1, 76)


In [78]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 03m 08s]
val_R2: 0.8659489750862122

Best val_R2 So Far: 0.9405680894851685
Total elapsed time: 03h 18m 14s
INFO:tensorflow:Oracle triggered exit


In [79]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00122, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2018_best.hdf5
150/150 - 4s - loss: 0.0027 - R2: 0.6984 - mae: 0.0360 - val_loss: 0.0012 - val_R2: 0.8616 - val_mae: 0.0261
Epoch 2/300

Epoch 00002: val_loss improved from 0.00122 to 0.00110, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2018_best.hdf5
150/150 - 3s - loss: 0.0013 - R2: 0.8567 - mae: 0.0270 - val_loss: 0.0011 - val_R2: 0.8752 - val_mae: 0.0247
Epoch 3/300

Epoch 00003: val_loss improved from 0.00110 to 0.00107, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2018_best.hdf5
150/150 - 3s - loss: 0.0011 - R2: 0.8728 - mae: 0.0253 - val_loss: 0.0011 - val_R2: 0.8789 - val_mae: 0.0248
Epoch 4/300

Epoch 00004: val_loss improved from 0.00107 to 0.00095, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2018_best.hdf5
150/150 - 3s - loss: 0.0010 - R2: 0.8827 - mae: 0.0243 - val_loss: 9.4583e-04 - val_R2: 0

<tensorflow.python.keras.callbacks.History at 0x194df17f408>

In [80]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2018
{'dropout': True, 'num_layers': 3, 'units_0': 512, 'initializer': 'he_uniform', 'units_1': 256, 'units_last': 64, 'initializer_last': 'glorot_uniform', 'activate_dense_last': 'linear', 'lr': 0.008361465883857158, 'units_2': 64}


[0.0005271413829177618, 0.9358778595924377, 0.016327114775776863]

### 2019

In [81]:
from sklearn.preprocessing import StandardScaler
year = 2019
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (623859, 1, 76)
X_validate shape: (187158, 1, 76)


In [82]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 03m 52s]
val_R2: 0.8888119459152222

Best val_R2 So Far: 0.9403006434440613
Total elapsed time: 03h 19m 00s
INFO:tensorflow:Oracle triggered exit


In [83]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00129, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2019_best.hdf5
153/153 - 4s - loss: 0.0039 - R2: 0.5303 - mae: 0.0410 - val_loss: 0.0013 - val_R2: 0.8455 - val_mae: 0.0271
Epoch 2/300

Epoch 00002: val_loss improved from 0.00129 to 0.00115, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2019_best.hdf5
153/153 - 3s - loss: 0.0013 - R2: 0.8494 - mae: 0.0268 - val_loss: 0.0011 - val_R2: 0.8629 - val_mae: 0.0252
Epoch 3/300

Epoch 00003: val_loss improved from 0.00115 to 0.00111, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2019_best.hdf5
153/153 - 3s - loss: 0.0012 - R2: 0.8623 - mae: 0.0256 - val_loss: 0.0011 - val_R2: 0.8666 - val_mae: 0.0248
Epoch 4/300

Epoch 00004: val_loss improved from 0.00111 to 0.00098, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2019_best.hdf5
153/153 - 3s - loss: 0.0011 - R2: 0.8716 - mae: 0.0247 - val_loss: 9.8392e-04 - val_R2: 0

<tensorflow.python.keras.callbacks.History at 0x194c3879788>

In [84]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2019
{'dropout': True, 'num_layers': 3, 'units_0': 512, 'initializer': 'glorot_uniform', 'units_1': 64, 'units_last': 256, 'initializer_last': 'he_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.007109425844697379, 'units_2': 256}


[0.0005159497959539294, 0.934382438659668, 0.016334176063537598]

### 2020

In [85]:
from sklearn.preprocessing import StandardScaler
year = 2020
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','train_LSTM_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','validate_LSTM_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split_LSTM/','test_LSTM_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

print(X_train.columns)
standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)


X_train = X_train.reshape((-1,19,4))
X_train = np.moveaxis(X_train,2,1)

X_validate = X_validate.reshape((-1,19,4))
X_validate = np.moveaxis(X_validate,2,1)

X_test = X_test.reshape((-1,19,4))
X_test = np.moveaxis(X_test,2,1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

# Standard Scalor
means = []
stds = []
features = X_train.shape[2]
for index in range(0, features):
    mean_i = np.nanmean(X_train[:,:,index])
    std_i = np.nanstd(X_train[:,:,index])
    means.append(mean_i)
    stds.append(std_i)
    X_train[:,:,index] = (X_train[:,:,index] - mean_i)/std_i


features = X_train.shape[2]
for index in range(0, features):
    mean_i = means[index]
    std_i = stds[index]
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i
    X_validate[:,:,index] = (X_validate[:,:,index] - mean_i)/std_i

print('means',means)
print('stds',stds)

Index(['NDVI_1', 'NDVI_2', 'NDVI_3', 'NDVI_4', 'EVI_1', 'EVI_2', 'EVI_3',
       'EVI_4', 'LST_1', 'LST_2', 'LST_3', 'LST_4', 'LST_Diff_1', 'LST_Diff_2',
       'LST_Diff_3', 'LST_Diff_4', 'Pre_1', 'Pre_2', 'Pre_3', 'Pre_4',
       'SWCI_1', 'SWCI_2', 'SWCI_3', 'SWCI_4', 'VSDI_1', 'VSDI_2', 'VSDI_3',
       'VSDI_4', 'SIWSI_1', 'SIWSI_2', 'SIWSI_3', 'SIWSI_4', 'ET_1', 'ET_2',
       'ET_3', 'ET_4', 'TWI_1', 'TWI_2', 'TWI_3', 'TWI_4', 'Dem_1', 'Dem_2',
       'Dem_3', 'Dem_4', 'Aspect_1', 'Aspect_2', 'Aspect_3', 'Aspect_4',
       'Slope_1', 'Slope_2', 'Slope_3', 'Slope_4', 'Clay_1', 'Clay_2',
       'Clay_3', 'Clay_4', 'Sand_1', 'Sand_2', 'Sand_3', 'Sand_4', 'Silt_1',
       'Silt_2', 'Silt_3', 'Silt_4', 'Lon_1', 'Lon_2', 'Lon_3', 'Lon_4',
       'Lat_1', 'Lat_2', 'Lat_3', 'Lat_4', 'DOY_1', 'DOY_2', 'DOY_3', 'DOY_4'],
      dtype='object')
X_train shape: (707492, 1, 76)
X_validate shape: (212248, 1, 76)


In [86]:

def scheduler(epoch,lr):
    # 每隔30个epoch，学习率减小为原来的1/10
    if epoch % 30 == 0 and epoch != 0:
        print("lr changed to {}".format(lr * 0.1))
        return lr * 0.1
    else :
        return lr

callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
hypermodel = MyHyperModel()
tuner = keras_tuner.BayesianOptimization(
            hypermodel,
            objective=keras_tuner.Objective('val_R2', direction="max"),
            num_initial_points=50,
            max_trials=max_trials,
            overwrite = True,
            directory='D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/',
            project_name=('LSTM_'+str(year)))
tuner.search(X_train,y_train,epochs=100,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = [reduce_lr,callback_early_stopping],verbose = 2)

Trial 50 Complete [00h 05m 32s]
val_R2: 0.8798959255218506

Best val_R2 So Far: 0.9381866455078125
Total elapsed time: 04h 11m 30s
INFO:tensorflow:Oracle triggered exit


In [87]:
filepath="LSTM_"+str(year)+"_best.hdf5"
callback_checkpoints = keras.callbacks.ModelCheckpoint(os.path.join('D:/SGYL/SM_results_data/check_points/LSTM/',filepath),monitor='val_loss',save_best_only=True,verbose=1)
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',patience=50,verbose=1)
reduce_lr = keras.callbacks.LearningRateScheduler(scheduler)
callbacks = [reduce_lr,callback_early_stopping,callback_checkpoints]

if training_mode == 'model':
    model_best = tuner.get_best_models()[0]
else :
    model_best = tuner.hypermodel.build(tuner.get_best_hyperparameters()[0])
model_best.fit(X_train,y_train,epochs=300,batch_size = 4096,validation_data=(X_validate,y_validate),shuffle = True,callbacks = callbacks,verbose = 2)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.00120, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2020_best.hdf5
173/173 - 3s - loss: 0.0031 - R2: 0.6004 - mae: 0.0374 - val_loss: 0.0012 - val_R2: 0.8468 - val_mae: 0.0262
Epoch 2/300

Epoch 00002: val_loss improved from 0.00120 to 0.00107, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2020_best.hdf5
173/173 - 3s - loss: 0.0012 - R2: 0.8476 - mae: 0.0262 - val_loss: 0.0011 - val_R2: 0.8625 - val_mae: 0.0249
Epoch 3/300

Epoch 00003: val_loss improved from 0.00107 to 0.00098, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2020_best.hdf5
173/173 - 3s - loss: 0.0011 - R2: 0.8618 - mae: 0.0249 - val_loss: 9.8004e-04 - val_R2: 0.8745 - val_mae: 0.0235
Epoch 4/300

Epoch 00004: val_loss improved from 0.00098 to 0.00091, saving model to D:/SGYL/SM_results_data/check_points/LSTM\LSTM_2020_best.hdf5
173/173 - 3s - loss: 0.0010 - R2: 0.8710 - mae: 0.0240 - val_loss: 9.1130e-04 - val_R

<tensorflow.python.keras.callbacks.History at 0x194eec94188>

In [88]:
#save model
save_path = os.path.join('D:/SGYL/SM_results_data/model/LSTM/','LSTM_'+str(year)+'.hdf5')
model_best.save(save_path)
if (os.path.exists(save_path)):
    print('save model for year:',year)

#save model config
import json
bestConfig=tuner.get_best_hyperparameters()[0].get_config()
f = open(os.path.join('D:/SGYL/SM_results_data/Bayesian_Opt/LSTM/Best_Config/','LSTM_'+str(year)+'.json'), 'w')
json.dump(bestConfig,f)
f.close()
print(bestConfig['values'])

model_best.evaluate(X_test,y_test)

save model for year: 2020
{'dropout': True, 'num_layers': 3, 'units_0': 512, 'initializer': 'he_uniform', 'units_1': 128, 'units_last': 256, 'initializer_last': 'he_uniform', 'activate_dense_last': 'sigmoid', 'lr': 0.004441749364901241, 'units_2': 256}


[0.0004796572611667216, 0.9326000213623047, 0.016010593622922897]

### aaa