In [None]:
from tifffile import imread
from tifffile import imwrite
from tifffile import imsave
from matplotlib import pyplot
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.utils import resample
import pickle
import os
from os import listdir
import itertools

In [None]:
# get the index of NoData pixels
PS_20201115p = imread('file_name')
PS_20201115w = imread('file_name')
PS_20201115p_fl = PS_20201115p.reshape(-1, *PS_20201115p.shape[2:])
PS_20201115w_fl = PS_20201115w.reshape(-1, *PS_20201115w.shape[2:])
PS_20201115p_clean = np.nan_to_num(PS_20201115p_fl, nan=0)
PS_20201115w_clean = np.nan_to_num(PS_20201115w_fl, nan=0)
NoData_P = np.where(PS_20201115p_clean == 0)
NoData_W = np.where(PS_20201115w_clean == 0)
boundary_P = NoData_P[0].tolist()
boundary_W = NoData_W[0].tolist()
print('P pixel number:', 13015-len(NoData_P[0])) # NoData_p[0] refers to index of values == 0 in axis 0
print('W pixel number:', 18669-len(NoData_W[0]))

In [None]:
# load calibrated PS images 
folder = 'folder_name'

for images in os.listdir(folder):
    if (images.endswith('.tif')):
        a = imread(os.path.join(folder,images))
        a_fl = a.reshape(-1, *a.shape[2:])
        a_fl = a_fl[~np.isnan(a_fl)] # retrieve just the non-NaN values, ~ refers to the opposite action
        locals()[os.path.splitext(images)[0]] = a_fl # modify the value of the given string through the local’s dictionary.
        print(os.path.splitext(images)[0], a.shape, a_fl.shape, a_fl.dtype)

In [None]:
# load terrain layers (elevation, apparent electrical conductivity, and slope)
DEM_P = imread('file_name')
DEM_W = imread('file_name')
EC_P = imread('file_name')
EC_W = imread('file_name')
slope_P = imread('file_name')
slope_W = imread('file_name')

DEM_P_fl = DEM_P.reshape(-1, *DEM_P.shape[2:])
DEM_W_fl = DEM_W.reshape(-1, *DEM_W.shape[2:])
EC_P_fl = EC_P.reshape(-1, *EC_P.shape[2:])
EC_W_fl = EC_W.reshape(-1, *EC_W.shape[2:])
slope_P_fl = slope_P.reshape(-1, *slope_P.shape[2:])
slope_W_fl = slope_W.reshape(-1, *slope_W.shape[2:])

for i in boundary_P:
    DEM_P_fl[i] = np.nan
    EC_P_fl[i] = np.nan
    slope_P_fl[i] = np.nan
    
for i in boundary_W:
    DEM_W_fl[i] = np.nan
    EC_W_fl[i] = np.nan
    slope_W_fl[i] = np.nan

DEM_P_fl = DEM_P_fl[~np.isnan(DEM_P_fl)]
DEM_W_fl = DEM_W_fl[~np.isnan(DEM_W_fl)]
EC_P_fl = EC_P_fl[~np.isnan(EC_P_fl)]
EC_W_fl = EC_W_fl[~np.isnan(EC_W_fl)]
slope_P_fl = slope_P_fl[~np.isnan(slope_P_fl)]
slope_W_fl = slope_W_fl[~np.isnan(slope_W_fl)]

print('DEM_P_fl shape:',DEM_P_fl.shape, 'dtype:',DEM_P_fl.dtype)
print('DEM_W_fl shape:',DEM_W_fl.shape, 'dtype:',DEM_W_fl.dtype)
print('EC_P_fl shape:',EC_P_fl.shape, 'dtype:',EC_P_fl.dtype)
print('EC_W_fl shape:',EC_W_fl.shape, 'dtype:',EC_W_fl.dtype)
print('slope_P_fl shape:',slope_P_fl.shape, 'dtype:',slope_P_fl.dtype)
print('slope_W_fl shape:',slope_W_fl.shape, 'dtype:',slope_W_fl.dtype)

In [None]:
# load vegetation index (VI) layers
TCARI_2021w = imread('file_name')
ExG_2021w = imread('file_name')
NDVI_2021w = imread('file_name')
TCARI_2021p = imread('file_name')
ExG_2021p = imread('file_name')
NDVI_2021p = imread('file_name')
TCARI_2021w = np.insert(TCARI_2021w, -1, np.array([0 for i in range(127)]), axis=1)
ExG_2021w = np.insert(ExG_2021w, -1, np.array([0 for i in range(127)]), axis=1)
NDVI_2021w = np.insert(NDVI_2021w, -1, np.array([0 for i in range(127)]), axis=1)

TCARI_2021w_fl = TCARI_2021w.reshape(-1, *TCARI_2021w.shape[2:])
ExG_2021w_fl =ExG_2021w.reshape(-1, *ExG_2021w.shape[2:])
NDVI_2021w_fl = NDVI_2021w.reshape(-1, *NDVI_2021w.shape[2:])
TCARI_2021p_fl = TCARI_2021p.reshape(-1, *TCARI_2021p.shape[2:])
ExG_2021p_fl = ExG_2021p.reshape(-1, *ExG_2021p.shape[2:])
NDVI_2021p_fl = NDVI_2021p.reshape(-1, *NDVI_2021p.shape[2:])

for i in boundary_P:
    TCARI_2021p_fl[i] = np.nan
    ExG_2021p_fl[i] = np.nan
    NDVI_2021p_fl[i] = np.nan
    
for i in boundary_W:
    TCARI_2021w_fl[i] = np.nan
    ExG_2021w_fl[i] = np.nan
    NDVI_2021w_fl[i] = np.nan

TCARI_2021w_fl = TCARI_2021w_fl[~np.isnan(TCARI_2021w_fl)]
TCARI_2021w_fl_nan = np.where(TCARI_2021w_fl == -10, np.nan, TCARI_2021w_fl)
ExG_2021w_fl = ExG_2021w_fl[~np.isnan(ExG_2021w_fl)]
ExG_2021w_fl_nan = np.where(ExG_2021w_fl == -10, np.nan, ExG_2021w_fl)
NDVI_2021w_fl = NDVI_2021w_fl[~np.isnan(NDVI_2021w_fl)]
NDVI_2021w_fl_nan = np.where(NDVI_2021w_fl == -10, np.nan, NDVI_2021w_fl)
TCARI_2021p_fl = TCARI_2021p_fl[~np.isnan(TCARI_2021p_fl)]
TCARI_2021p_fl_nan = np.where(TCARI_2021p_fl == -10, np.nan, TCARI_2021p_fl)
ExG_2021p_fl = ExG_2021p_fl[~np.isnan(ExG_2021p_fl)]
ExG_2021p_fl_nan = np.where(ExG_2021p_fl == -10, np.nan, ExG_2021p_fl)
NDVI_2021p_fl = NDVI_2021p_fl[~np.isnan(NDVI_2021p_fl)]
NDVI_2021p_fl_nan = np.where(NDVI_2021p_fl == -10, np.nan, NDVI_2021p_fl)

print('TCARI_2021w_fl shape:',TCARI_2021w_fl.shape, 'dtype:',TCARI_2021w_fl.dtype)
print('ExG_2021w_fl shape:',ExG_2021w_fl.shape, 'dtype:',ExG_2021w_fl.dtype)
print('NDVI_2021w_fl shape:',NDVI_2021w_fl.shape, 'dtype:',NDVI_2021w_fl.dtype)
print('TCARI_2021p_fl shape:',TCARI_2021p_fl.shape, 'dtype:',TCARI_2021p_fl.dtype)
print('ExG_2021p_fl shape:',ExG_2021p_fl.shape, 'dtype:',ExG_2021p_fl.dtype)
print('NDVI_2021p_fl shape:',NDVI_2021p_fl.shape, 'dtype:',NDVI_2021p_fl.dtype)

In [None]:
# concatenate all layers and add columns of key and location
keylist = list(locals().keys()) # get keys from all variables from local dic, locals() can be used to list all variables in memory. 
variable_list_P = []
variable_list_W = []
VI_P_fl = NDVI_2021p_fl_nan # need change!
VI_W_fl = NDVI_2021w_fl_nan # need change!
df_P_2021 = pd.DataFrame(columns = ['key', 'GWS', 'elevation', 'EC', 'slope','VI'])
df_W_2021 = pd.DataFrame(columns = ['key', 'GWS', 'elevation', 'EC', 'slope','VI'])
for variables in keylist:
    if variables.endswith('_p'):
        key = [variables for i in range(5382)] # use date_vineyard as key for later joining between dataframes
        key1 = np.array(key)
        variables1 = np.concatenate([key1[...,np.newaxis], locals()[variables][...,np.newaxis], 
                                     DEM_P_fl[...,np.newaxis], EC_P_fl[...,np.newaxis],
                                     slope_P_fl[...,np.newaxis], VI_P_fl[...,np.newaxis]], axis=1)
        df = pd.DataFrame(variables1, columns = ['key', 'GWS', 'elevation', 'EC', 'slope','VI'])
        df = df.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'VI':float})
        df_drop = df.dropna(axis=0, how='any')
        df_P_2021 = df_P_2021.append(df_drop)
        variable_list_P.append(variables)
        
    elif variables.endswith('_w'):
        key = [variables for i in range(7321)]
        key1 = np.array(key)
        variables1 = np.concatenate([key1[...,np.newaxis], locals()[variables][...,np.newaxis], 
                                     DEM_W_fl[...,np.newaxis], EC_W_fl[...,np.newaxis],
                                     slope_W_fl[...,np.newaxis], VI_W_fl[...,np.newaxis]], axis=1)
        df = pd.DataFrame(variables1, columns = ['key', 'GWS', 'elevation', 'EC', 'slope','VI'])
        df = df.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'VI':float})
        df_drop = df.dropna(axis=0, how='any')
        df_W_2021 = df_W_2021.append(df_drop)
        variable_list_W.append(variables)

print('p image number:', len(variable_list_P))
print('w image number:', len(variable_list_W))
print('p image list:', variable_list_P)
print('w image list:', variable_list_W)
print('input_P shape', df_P_2021.shape)
print('input_W shape', df_W_2021.shape)

In [None]:
# load other input variables (day of the year, irrigation_fertigation, pluck_trim, and weather)
input_2021_P = pd.read_excel('file_name')
input_2021_W = pd.read_excel('file_name')
# join dataframes according to key column
df_join_P_2021 = df_P_2021.join(input_2021_P.set_index('key'), on='key')
df_join_W_2021 = df_W_2021.join(input_2021_W.set_index('key'), on='key')
df_all_2021 = pd.concat([df_join_P_2021, df_join_W_2021])
col_DOY_2021 = df_all_2021.pop('DOY')
df_all_2021.insert(2, 'DOY', col_DOY_2021)
df_all_2021 = df_all_2021.iloc[:,1:128]

In [None]:
# load reference images derived from calibrated UAV images in the second growing season
ref_20211123w = imread('file_name')
ref_20211123p = imread('file_name')
ref_20211129w = imread('file_name')
ref_20211129p = imread('file_name')
ref_20211209w = imread('file_name')
ref_20211209p = imread('file_name')
ref_20220111p = imread('file_name')
ref_20220121w = imread('file_name')
ref_20220121p = imread('file_name')
ref_20211129w = np.insert(ref_20211129w, 0, np.array([0 for i in range(127)]), axis=1)
ref_20211123p = np.insert(ref_20211123p, 0, np.array([0 for i in range(137)]), axis=0)
ref_20211129p = np.insert(ref_20211129p, 0, np.array([0 for i in range(137)]), axis=0)
print('20211123w shape:',ref_20211123w.shape, 'dtype:',ref_20211123w.dtype)
print('20211123p shape:',ref_20211123p.shape, 'dtype:',ref_20211123p.dtype)
print('20211129w shape:',ref_20211129w.shape, 'dtype:',ref_20211129w.dtype)
print('20211129p shape:',ref_20211129p.shape, 'dtype:',ref_20211129p.dtype)
print('20211209w shape:',ref_20211209w.shape, 'dtype:',ref_20211209w.dtype)
print('20211209p shape:',ref_20211209p.shape, 'dtype:',ref_20211209p.dtype)
print('20220111p shape:',ref_20220111p.shape, 'dtype:',ref_20220111p.dtype)
print('20220121w shape:',ref_20220121w.shape, 'dtype:',ref_20220121w.dtype)
print('20220121p shape:',ref_20220121p.shape, 'dtype:',ref_20220121p.dtype)

In [None]:
ref_20211123fl_w = ref_20211123w.reshape(-1, *ref_20211123w.shape[2:])
ref_20211123fl_p = ref_20211123p.reshape(-1, *ref_20211123p.shape[2:])
ref_20211129fl_w = ref_20211129w.reshape(-1, *ref_20211129w.shape[2:])
ref_20211129fl_p = ref_20211129p.reshape(-1, *ref_20211129p.shape[2:])
ref_20211209fl_w = ref_20211209w.reshape(-1, *ref_20211209w.shape[2:])
ref_20211209fl_p = ref_20211209p.reshape(-1, *ref_20211209p.shape[2:])
ref_20220111fl_p = ref_20220111p.reshape(-1, *ref_20220111p.shape[2:])
ref_20220121fl_w = ref_20220121w.reshape(-1, *ref_20220121w.shape[2:])
ref_20220121fl_p = ref_20220121p.reshape(-1, *ref_20220121p.shape[2:])
print('20211123fl_w shape:',ref_20211123fl_w.shape, 'dtype:',ref_20211123fl_w.dtype)
print('20211123fl_p shape:',ref_20211123fl_p.shape, 'dtype:',ref_20211123fl_p.dtype)
print('20211129fl_w shape:',ref_20211129fl_w.shape, 'dtype:',ref_20211129fl_w.dtype)
print('20211129fl_p shape:',ref_20211129fl_p.shape, 'dtype:',ref_20211129fl_p.dtype)
print('20211209fl_w shape:',ref_20211209fl_w.shape, 'dtype:',ref_20211209fl_w.dtype)
print('20211209fl_p shape:',ref_20211209fl_p.shape, 'dtype:',ref_20211209fl_p.dtype)
print('20220111fl_p shape:',ref_20220111fl_p.shape, 'dtype:',ref_20220111fl_p.dtype)
print('20220121fl_w shape:',ref_20220121fl_w.shape, 'dtype:',ref_20220121fl_w.dtype)
print('20220121fl_p shape:',ref_20220121fl_p.shape, 'dtype:',ref_20220121fl_p.dtype)

In [None]:
# crop the data
for i in boundary_P:
    ref_20211123fl_p[i] = np.nan
    ref_20211129fl_p[i] = np.nan
    ref_20211209fl_p[i] = np.nan
    ref_20220111fl_p[i] = np.nan
    ref_20220121fl_p[i] = np.nan
    
for i in boundary_W:
    ref_20211123fl_w[i] = np.nan
    ref_20211129fl_w[i] = np.nan
    ref_20211209fl_w[i] = np.nan
    ref_20220121fl_w[i] = np.nan
    
ref_20211123fl_p = ref_20211123fl_p[~np.isnan(ref_20211123fl_p)]
ref_20211123fl_p_nan = np.where(ref_20211123fl_p == 0, np.nan, ref_20211123fl_p)
ref_20211129fl_p = ref_20211129fl_p[~np.isnan(ref_20211129fl_p)]
ref_20211129fl_p_nan = np.where(ref_20211129fl_p == 0, np.nan, ref_20211129fl_p)
ref_20211209fl_p = ref_20211209fl_p[~np.isnan(ref_20211209fl_p)]
ref_20211209fl_p_nan = np.where(ref_20211209fl_p == 0, np.nan, ref_20211209fl_p)
ref_20220111fl_p = ref_20220111fl_p[~np.isnan(ref_20220111fl_p)]
ref_20220111fl_p_nan = np.where(ref_20220111fl_p == 0, np.nan, ref_20220111fl_p)
ref_20220121fl_p = ref_20220121fl_p[~np.isnan(ref_20220121fl_p)]
ref_20220121fl_p_nan = np.where(ref_20220121fl_p == 0, np.nan, ref_20220121fl_p)
ref_20211123fl_w = ref_20211123fl_w[~np.isnan(ref_20211123fl_w)]
ref_20211123fl_w_nan = np.where(ref_20211123fl_w == 0, np.nan, ref_20211123fl_w)
ref_20211129fl_w = ref_20211129fl_w[~np.isnan(ref_20211129fl_w)]
ref_20211129fl_w_nan = np.where(ref_20211129fl_w == 0, np.nan, ref_20211129fl_w)
ref_20211209fl_w = ref_20211209fl_w[~np.isnan(ref_20211209fl_w)]
ref_20211209fl_w_nan = np.where(ref_20211209fl_w == 0, np.nan, ref_20211209fl_w)
ref_20220121fl_w = ref_20220121fl_w[~np.isnan(ref_20220121fl_w)]
ref_20220121fl_w_nan = np.where(ref_20220121fl_w == 0, np.nan, ref_20220121fl_w)

print('20211123w_cut shape:',ref_20211123fl_w.shape)
print('20211123p_cut shape:',ref_20211123fl_p.shape)
print('20211129w_cut shape:',ref_20211129fl_w.shape)
print('20211129p_cut shape:',ref_20211129fl_p.shape)
print('20211209w_cut shape:',ref_20211209fl_w.shape)
print('20211209p_cut shape:',ref_20211209fl_p.shape)
print('20220111p_cut shape:',ref_20220111fl_p.shape)
print('20220121w_cut shape:',ref_20220121fl_w.shape)
print('20220121p_cut shape:',ref_20220121fl_p.shape)

In [None]:
# load VI layers
TCARI_2022w = imread('file_name')
ExG_2022w = imread('file_name')
NDVI_2022w = imread('file_name')
TCARI_2022p = imread('file_name')
ExG_2022p = imread('file_name')
NDVI_2022p = imread('file_name')
TCARI_2022p = np.insert(TCARI_2022p, 0, np.array([0 for i in range(137)]), axis=0)
ExG_2022p = np.insert(ExG_2022p, 0, np.array([0 for i in range(137)]), axis=0)
NDVI_2022p = np.insert(NDVI_2022p, 0, np.array([0 for i in range(137)]), axis=0)

TCARI_2022w_fl = TCARI_2022w.reshape(-1, *TCARI_2022w.shape[2:])
ExG_2022w_fl =ExG_2022w.reshape(-1, *ExG_2022w.shape[2:])
NDVI_2022w_fl = NDVI_2022w.reshape(-1, *NDVI_2022w.shape[2:])
TCARI_2022p_fl = TCARI_2022p.reshape(-1, *TCARI_2022p.shape[2:])
ExG_2022p_fl = ExG_2022p.reshape(-1, *ExG_2022p.shape[2:])
NDVI_2022p_fl = NDVI_2022p.reshape(-1, *NDVI_2022p.shape[2:])

for i in boundary_P:
    TCARI_2022p_fl[i] = np.nan
    ExG_2022p_fl[i] = np.nan
    NDVI_2022p_fl[i] = np.nan
    
for i in boundary_W:
    TCARI_2022w_fl[i] = np.nan
    ExG_2022w_fl[i] = np.nan
    NDVI_2022w_fl[i] = np.nan

TCARI_2022w_fl = TCARI_2022w_fl[~np.isnan(TCARI_2022w_fl)]
TCARI_2022w_fl_nan = np.where(TCARI_2022w_fl == -10, np.nan, TCARI_2022w_fl)
ExG_2022w_fl = ExG_2022w_fl[~np.isnan(ExG_2022w_fl)]
ExG_2022w_fl_nan = np.where(ExG_2022w_fl == -10, np.nan, ExG_2022w_fl)
NDVI_2022w_fl = NDVI_2022w_fl[~np.isnan(NDVI_2022w_fl)]
NDVI_2022w_fl_nan = np.where(NDVI_2022w_fl == -10, np.nan, NDVI_2022w_fl)
TCARI_2022p_fl = TCARI_2022p_fl[~np.isnan(TCARI_2022p_fl)]
TCARI_2022p_fl_nan = np.where(TCARI_2022p_fl == -10, np.nan, TCARI_2022p_fl)
ExG_2022p_fl = ExG_2022p_fl[~np.isnan(ExG_2022p_fl)]
ExG_2022p_fl_nan = np.where(ExG_2022p_fl == -10, np.nan, ExG_2022p_fl)
NDVI_2022p_fl = NDVI_2022p_fl[~np.isnan(NDVI_2022p_fl)]
NDVI_2022p_fl_nan = np.where(NDVI_2022p_fl == -10, np.nan, NDVI_2022p_fl)

print('TCARI_2022w_fl shape:',TCARI_2022w_fl.shape, 'dtype:',TCARI_2022w_fl.dtype)
print('ExG_2022w_fl shape:',ExG_2022w_fl.shape, 'dtype:',ExG_2022w_fl.dtype)
print('NDVI_2022w_fl shape:',NDVI_2022w_fl.shape, 'dtype:',NDVI_2022w_fl.dtype)
print('TCARI_2022p_fl shape:',TCARI_2022p_fl.shape, 'dtype:',TCARI_2022p_fl.dtype)
print('ExG_2022p_fl shape:',ExG_2022p_fl.shape, 'dtype:',ExG_2022p_fl.dtype)
print('NDVI_2022p_fl shape:',NDVI_2022p_fl.shape, 'dtype:',NDVI_2022p_fl.dtype)

In [None]:
# concatenate all layers
key_20211123w = np.array(['20211123w' for i in range(7321)])   
key_20211123p = np.array(['20211123p' for i in range(5382)])
key_20211129w = np.array(['20211129w' for i in range(7321)])
key_20211129p = np.array(['20211129p' for i in range(5382)])
key_20211209w = np.array(['20211209w' for i in range(7321)])
key_20211209p = np.array(['20211209p' for i in range(5382)])
key_20220111p = np.array(['20220111p' for i in range(5382)])
key_20220121w = np.array(['20220121w' for i in range(7321)])
key_20220121p = np.array(['20220121p' for i in range(5382)])
VI_P_fl = ExG_2022p_fl_nan # need change!
VI_W_fl = ExG_2022w_fl_nan # need change!
location_P = np.array(range(0, 5382))
location_W = np.array(range(5382, 12703))
key_20211123w_cc = np.concatenate([key_20211123w[...,np.newaxis], ref_20211123fl_w_nan[...,np.newaxis], 
                                   DEM_W_fl[...,np.newaxis], EC_W_fl[...,np.newaxis],
                                   slope_W_fl[...,np.newaxis], location_W[...,np.newaxis],
                                   VI_W_fl[...,np.newaxis]], axis=1)
key_20211123p_cc = np.concatenate([key_20211123p[...,np.newaxis], ref_20211123fl_p_nan[...,np.newaxis], 
                                   DEM_P_fl[...,np.newaxis], EC_P_fl[...,np.newaxis],
                                   slope_P_fl[...,np.newaxis], location_P[...,np.newaxis],
                                   VI_P_fl[...,np.newaxis]], axis=1)
key_20211129w_cc = np.concatenate([key_20211129w[...,np.newaxis], ref_20211129fl_w_nan[...,np.newaxis], 
                                   DEM_W_fl[...,np.newaxis], EC_W_fl[...,np.newaxis],
                                   slope_W_fl[...,np.newaxis], location_W[...,np.newaxis],
                                   VI_W_fl[...,np.newaxis]], axis=1)
key_20211129p_cc = np.concatenate([key_20211129p[...,np.newaxis], ref_20211129fl_p_nan[...,np.newaxis], 
                                   DEM_P_fl[...,np.newaxis,], EC_P_fl[...,np.newaxis],
                                   slope_P_fl[...,np.newaxis], location_P[...,np.newaxis],
                                   VI_P_fl[...,np.newaxis]], axis=1)
key_20211209w_cc = np.concatenate([key_20211209w[...,np.newaxis], ref_20211209fl_w_nan[...,np.newaxis], 
                                   DEM_W_fl[...,np.newaxis], EC_W_fl[...,np.newaxis],
                                   slope_W_fl[...,np.newaxis], location_W[...,np.newaxis],
                                   VI_W_fl[...,np.newaxis]], axis=1)
key_20211209p_cc = np.concatenate([key_20211209p[...,np.newaxis], ref_20211209fl_p_nan[...,np.newaxis], 
                                   DEM_P_fl[...,np.newaxis], EC_P_fl[...,np.newaxis],
                                   slope_P_fl[...,np.newaxis], location_P[...,np.newaxis],
                                   VI_P_fl[...,np.newaxis]], axis=1)
key_20220111p_cc = np.concatenate([key_20220111p[...,np.newaxis], ref_20220111fl_p_nan[...,np.newaxis], 
                                   DEM_P_fl[...,np.newaxis], EC_P_fl[...,np.newaxis],
                                   slope_P_fl[...,np.newaxis], location_P[...,np.newaxis],
                                   VI_P_fl[...,np.newaxis]], axis=1)
key_20220121w_cc = np.concatenate([key_20220121w[...,np.newaxis], ref_20220121fl_w_nan[...,np.newaxis], 
                                   DEM_W_fl[...,np.newaxis], EC_W_fl[...,np.newaxis],
                                   slope_W_fl[...,np.newaxis], location_W[...,np.newaxis],
                                   VI_W_fl[...,np.newaxis]], axis=1)
key_20220121p_cc = np.concatenate([key_20220121p[...,np.newaxis], ref_20220121fl_p_nan[...,np.newaxis], 
                                   DEM_P_fl[...,np.newaxis], EC_P_fl[...,np.newaxis],
                                   slope_P_fl[...,np.newaxis], location_P[...,np.newaxis],
                                   VI_P_fl[...,np.newaxis]], axis=1)
df_20211123w = pd.DataFrame(key_20211123w_cc, columns = ['key','GWS','elevation','EC','slope','location','VI'])
df_20211123p = pd.DataFrame(key_20211123p_cc, columns = ['key','GWS','elevation','EC','slope','location','VI'])
df_20211129w = pd.DataFrame(key_20211129w_cc, columns = ['key','GWS','elevation','EC','slope','location','VI'])
df_20211129p = pd.DataFrame(key_20211129p_cc, columns = ['key','GWS','elevation','EC','slope','location','VI'])
df_20211209w = pd.DataFrame(key_20211209w_cc, columns = ['key','GWS','elevation','EC','slope','location','VI'])
df_20211209p = pd.DataFrame(key_20211209p_cc, columns = ['key','GWS','elevation','EC','slope','location','VI'])
df_20220111p = pd.DataFrame(key_20220111p_cc, columns = ['key','GWS','elevation','EC','slope','location','VI'])
df_20220121w = pd.DataFrame(key_20220121w_cc, columns = ['key','GWS','elevation','EC','slope','location','VI'])
df_20220121p = pd.DataFrame(key_20220121p_cc, columns = ['key','GWS','elevation','EC','slope','location','VI'])
df_20211123w = df_20211123w.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'location':float,'VI':float})
df_20211123p = df_20211123p.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'location':float,'VI':float})
df_20211129w = df_20211129w.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'location':float,'VI':float})
df_20211129p = df_20211129p.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'location':float,'VI':float})
df_20211209w = df_20211209w.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'location':float,'VI':float})
df_20211209p = df_20211209p.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'location':float,'VI':float})
df_20220111p = df_20220111p.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'location':float,'VI':float})
df_20220121w = df_20220121w.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'location':float,'VI':float})
df_20220121p = df_20220121p.astype({'GWS':float,'elevation':float,'EC':float,'slope':float,'location':float,'VI':float})

In [None]:
# drop the rows with missing values
df_20211123w_drop = df_20211123w.dropna(axis=0, how='any')
df_20211123p_drop = df_20211123p.dropna(axis=0, how='any')
df_20211129w_drop = df_20211129w.dropna(axis=0, how='any')
df_20211129p_drop = df_20211129p.dropna(axis=0, how='any')
df_20211209w_drop = df_20211209w.dropna(axis=0, how='any')
df_20211209p_drop = df_20211209p.dropna(axis=0, how='any')
df_20220111p_drop = df_20220111p.dropna(axis=0, how='any')
df_20220121w_drop = df_20220121w.dropna(axis=0, how='any')
df_20220121p_drop = df_20220121p.dropna(axis=0, how='any')
print('df_20211123w_drop shape:',df_20211123w_drop.shape)
print('df_20211123p_drop shape:',df_20211123p_drop.shape)
print('df_20211129w_drop shape:',df_20211129w_drop.shape)
print('df_20211129p_drop shape:',df_20211129p_drop.shape)
print('df_20211209w_drop shape:',df_20211209w_drop.shape)
print('df_20211209p_drop shape:',df_20211209p_drop.shape)
print('df_20220111p_drop shape:',df_20220111p_drop.shape)
print('df_20220121w_drop shape:',df_20220121w_drop.shape)
print('df_20220121p_drop shape:',df_20220121p_drop.shape)

In [None]:
df_P_2022 = pd.concat([df_20211123p_drop, df_20211129p_drop, df_20211209p_drop, df_20220111p_drop, df_20220121p_drop])
df_W_2022 = pd.concat([df_20211123w_drop, df_20211129w_drop, df_20211209w_drop, df_20220121w_drop])
# load other input variables (day of the year, irrigation_fertigation, pluck_trim, weather)
input_2022_P = pd.read_excel('file_name')
input_2022_W = pd.read_excel('file_name')
# join dataframes according to key column
df_join_P_2022 = df_P_2022.join(input_2022_P.set_index('key'), on='key')
df_join_W_2022 = df_W_2022.join(input_2022_W.set_index('key'), on='key')
df_all_2022 = pd.concat([df_join_P_2022, df_join_W_2022])
col_DOY_2022 = df_all_2022.pop('DOY')
df_all_2022.insert(2, 'DOY', col_DOY_2022)
df_all_2022 = df_all_2022.iloc[:,1:128]

In [None]:
# choose optimal random state number
test_list = []
for i in range(0, 45):
    X_2021, y_2021 = df_all_2021.iloc[:,1:123].values, df_all_2021.iloc[:,0].values
    X_train_2021, X_test_2021, y_train_2021, y_test_2021 = train_test_split(X_2021, y_2021, test_size=0.3, 
                                    random_state=i, stratify=df_all_2021.iloc[:,2])
    output = [i,np.mean(y_train_2021),np.mean(y_test_2021),np.std(y_train_2021),np.std(y_test_2021)]
    test_list.append(output)
df_test_list = pd.DataFrame(test_list, columns =['random state','train_mean','test_mean','train_std','test_std'])

In [None]:
df_all_2021 = pd.read_csv('file_name')
df_all_2022 = pd.read_csv('file_name')
# train_test set splitting 
X_2021, y_2021 = df_all_2021.iloc[:,1:125].values, df_all_2021.iloc[:,0].values
# [:,1:125] for 2 climate, [:,np.r_[1:65, 95:245]] for 5 climate/3 terrain
X_train_2021, X_test_2021, y_train_2021, y_test_2021 = train_test_split(X_2021, y_2021, test_size=0.3, 
                                    random_state=26, stratify=df_all_2021.iloc[:,1]) # rf is 26, mlp is 9

# standardization
scaler_st = StandardScaler()
X_train_2021_st = scaler_st.fit_transform(X_train_2021)
X_test_2021_st = scaler_st.transform(X_test_2021)

# External validation
X_2022, y_2022 = df_all_2022.iloc[:,1:125].values, df_all_2022.iloc[:,0].values
X_2022_st = scaler_st.transform(X_2022)

In [None]:
X_2021, y_2021 = df_all_2021.iloc[:,1:].values, df_all_2021.iloc[:,0].values
# check data 
print('any NaN in X_2021?', np.any(np.isnan(X_2021)))
print('any NaN in X_train_2021_st?', np.any(np.isnan(X_train_2021_st)))
print('any NaN in X_test_2021_st?', np.any(np.isnan(X_test_2021_st)))
print('any NaN in y_2021?', np.any(np.isnan(y_2021)))
print('any infinity in X_2021?', np.any(np.isinf(X_2021)))
print('any infinity in X_train_2021_st?', np.any(np.isinf(X_train_2021_st)))
print('any infinity in X_test_2021_st?', np.any(np.isinf(X_test_2021_st)))
print('any infinity in y_2021?', np.any(np.isinf(y_2021)))

In [None]:
X_2022, y_2022 = df_all_2022.iloc[:,1:].values, df_all_2022.iloc[:,0].values
# check data 
print('any NaN in X_2022?', np.any(np.isnan(X_2022)))
print('any NaN in X_2022_st?', np.any(np.isnan(X_2022_st)))
print('any NaN in y_2022?', np.any(np.isnan(y_2022)))
print('any infinity in X_2022?', np.any(np.isinf(X_2022)))
print('any infinity in X_2022_st?', np.any(np.isinf(X_2022_st)))
print('any infinity in y_2022?', np.any(np.isinf(y_2022)))

In [None]:
# Random forest regression
rf = RandomForestRegressor(n_estimators=500, random_state=0, n_jobs=-1)
rf_para = {'max_depth':[5,7,9,11,13,15,17,19], 'max_features':['auto', 'sqrt', 'log2']}
rf_gs = GridSearchCV(rf, rf_para, cv = 10, scoring='r2', n_jobs=-1)
rf_gs.fit(X_train_2021_st, y_train_2021)
rf_train_r2 = rf_gs.score(X_train_2021_st, y_train_2021)
rf_test_r2_2021 = rf_gs.score(X_test_2021_st, y_test_2021)
rf_test_rmse_2021 = mean_squared_error(y_test_2021, 
                                       rf_gs.predict(X_test_2021_st), 
                                      squared=False)
rf_test_r2_2022 = rf_gs.score(X_2022_st, y_2022)
rf_test_rmse_2022 = mean_squared_error(y_2022, 
                                       rf_gs.predict(X_2022_st), 
                                      squared=False)
print(rf_train_r2, rf_test_r2_2021, rf_test_rmse_2021, rf_test_r2_2022, 
      rf_test_rmse_2022)

In [None]:
# Multi-layer Perceptron 
mlp = MLPRegressor(random_state=0, max_iter=8000)
n = 243
mlp_para = {'hidden_layer_sizes': [(n,),
                                   (n, n//2),
                                   (n, n//2, (n//2)//2), 
                                   (n, n//2, (n//2)//2, (n//2)//2),
                                   (n, n//2, (n//2)//2, ((n//2)//2)//2, (((n//2)//2)//2)//2), 
                                   (n, n//2, (n//2)//2, ((n//2)//2)//2, (((n//2)//2)//2)//2, ((((n//2)//2)//2)//2)//2), 
                                   (n, n//2, (n//2)//2, ((n//2)//2)//2, (((n//2)//2)//2)//2, ((((n//2)//2)//2)//2)//2, (((((n//2)//2)//2)//2)//2)//2)],
            'activation': ['tanh', 'relu'], 'solver': ['sgd', 'adam'], 
            'learning_rate': ['constant','adaptive'],
            'alpha': [0.0001,0.001,0.01]}
mlp_gs = GridSearchCV(mlp, mlp_para, cv = 10, scoring='r2', n_jobs=-1)
mlp_gs.fit(X_train_2021_st, y_train_2021)
mlp_train_r2 = mlp_gs.score(X_train_2021_st, y_train_2021)
mlp_test_r2_2021 = mlp_gs.score(X_test_2021_st, y_test_2021)
mlp_test_rmse_2021 = mean_squared_error(y_test_2021, 
                                        mlp_gs.predict(X_test_2021_st), 
                                      squared=False)
mlp_test_r2_2022 = mlp_gs.score(X_2022_st, y_2022)
mlp_test_rmse_2022 = mean_squared_error(y_2022, 
                                        mlp_gs.predict(X_2022_st), 
                                      squared=False)
print(mlp_train_r2, mlp_test_r2_2021, mlp_test_rmse_2021, mlp_test_r2_2022,
     mlp_test_rmse_2022)

In [None]:
# Support vector regression
svr = SVR()
svr_para = {'kernel':['linear','poly','rbf'], 'C':[0.01,0.1,1,10,100], 
                        'gamma':['scale','auto'], 'epsilon':[0.1,0.5,0.9]}
svr_gs = GridSearchCV(svr, svr_para, cv = 10, scoring='r2', n_jobs=-1)
svr_gs.fit(X_train_2021_st, y_train_2021)
svr_train_r2 = svr_gs.score(X_train_2021_st, y_train_2021)
svr_test_r2_2021 = svr_gs.score(X_test_2021_st, y_test_2021)
svr_test_rmse_2021 = mean_squared_error(y_test_2021, 
                                        svr_gs.predict(X_test_2021_st),
                                       squared=False)
svr_test_r2_2022 = svr_gs.score(X_2022_st, y_2022)
svr_test_rmse_2022 = mean_squared_error(y_2022, 
                                        svr_gs.predict(X_2022_st),
                                       squared=False)
print(svr_train_r2, svr_test_r2_2021, svr_test_rmse_2021, svr_test_r2_2022, 
      svr_test_rmse_2022)