### Start

In [1]:
import numpy as np
from osgeo import gdal
from osgeo import osr
import time
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner
from tensorflow.keras import backend as K

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(devices=physical_devices[0], device_type='GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [3]:
keras.backend.set_image_data_format('channels_last')

In [4]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [5]:
#自定义matries 计算r2
def R2(y_true, y_pred):
    sst = K.sum(K.square(y_true - K.mean(y_true)))
    ssr = K.sum(K.square(y_pred - y_true))
    R2 = 1 - ssr/sst
    return R2

## Helper functions

In [6]:
def Array2Image(data,lats,lons):
  # get the unique coordinates
  uniqueLats = np.unique(lats)
  uniqueLons = np.unique(lons)
 
  # get number of columns and rows from coordinates
  ncols = len(uniqueLons)
  nrows = len(uniqueLats)
 
  # determine pixelsizes
  ys = uniqueLats[1] - uniqueLats[0]  
  xs = uniqueLons[1] - uniqueLons[0]
 
  # create an array with dimensions of image
  # 这里把缺失值设置成了0，之后需要修改
  arr = np.zeros([nrows, ncols], np.float32) #-9999
  arr.fill(np.nan)
 
  # fill the array with values
  for counter in range(0,len(data),1):
    index_lon = np.where(uniqueLons == lons[counter])[0][0]
    index_lat = np.where(uniqueLats == lats[counter])[0][0]
    arr[len(uniqueLats)-1-index_lat,index_lon] = data[counter]
  return arr

In [7]:
def ReadData(df:pd.DataFrame):
    allData = []
    for j in range(df.shape[1]):
        data = []
        for i in range(df.shape[0]):
            #print(df.iloc[i,j])
            data_i = gdal.Open(df.iloc[i,j])
            band_i = data_i.GetRasterBand(1)
            array_i = band_i.ReadAsArray()
            #array_i[array_i == noData] = np.nan
            data.append(np.reshape(array_i,(-1)))
        allData.append(data)
    allData = np.array(allData,dtype = np.float64).squeeze()
    return pd.DataFrame(allData.T,columns=df.columns.to_list())

In [8]:
templete  = gdal.Open('F:/SGYL/SM_Downscaling2/Lon.tif')
def convert2Tif(data,outputPath):
    driver = gdal.GetDriverByName('GTiff')
    tif = driver.Create(outputPath,data.shape[1],data.shape[0],1,gdal.GDT_Int16)
    #geotransform = (73.49117339381806,0.008983152841195215,0.0,39.831299697859585,0.0,-0.008983152841195215)
    tif.SetGeoTransform(templete.GetGeoTransform())
    tif.SetProjection(templete.GetProjection())
    tif.GetRasterBand(1).WriteArray(data)
    #tif.GetRasterBand(1).SetNoDataValue()
    tif.FlushCache()
    del tif


## Preprocessing

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
noData_i = gdal.Open('F:/SGYL/SM_results_data/DATA/data_2020/LST_Diff/LST_Diff_2020_1.tif')
noData_band = noData_i.GetRasterBand(1)
noData_arr = noData_band.ReadAsArray()
noData = np.min(noData_arr)
print(noData)

-3.4e+38


## Predict

In [11]:
#['NDVI', 'EVI', 'LST', 'LST_Diff', 'Pre', 'SWCI', 'VSDI', 'SIWSI', 'ET', 'TWI', 'dem', 'aspect', 'slope', 'clay', 'sand', 'silt', 'Lon', 'Lat']
error_index_year = []
error_index_day = []
batch_size = 4096
for year in range(2003,2006):
    try:
        print(year,'************************************************')
        print('load model')
        model_path = os.path.join('D:/SGYL/SM_results_data/check_points/Full_ResNet/','Full_ResNet_'+str(year)+'_best.hdf5')
        #model_path = os.path.join('D:/SGYL/SM_results_data/model/Full_ResNet/','Full_ResNet_'+str(year)+'.hdf5')
        print(model_path)
        model = keras.models.load_model(model_path,custom_objects={'R2':R2})

        print('load training data')
        #load training data
        data_train = pd.read_csv(os.path.join('D:/SGYL/SM_Downscaling_data/Train/split/','train_data_'+str(year)+'.csv'))
        X_train = data_train.drop(['SM'],axis = 1)
        X_train_columns = X_train.columns.to_list()
        print(X_train.columns.to_list())
        standarder = StandardScaler()
        X_train = standarder.fit_transform(X_train)

        print('prepare file path')
        # prepare data path
        feature_order = ['NDVI','EVI','LST','LST_Diff','Pre','SWCI','VSDI','SIWSI','ET'] #
        static_feature = ['TWI','dem','aspect','slope','clay','sand','silt','Lon','Lat']

        root = 'F:/SGYL/SM_results_data/DATA/data_'+str(year)+'/'

        predictor_path = dict()
        for dir in feature_order:
            path = os.path.join(root,dir)
            if dir == 'ET':
                files = [os.path.join(path+'/',i) for i in os.listdir(path+'/')]
            elif year in [2004,2008,2012,2016,2020]:
                files = [os.path.join(path+'/',dir+'_'+str(year)+'_'+str(i)+'.tif') for i in range(1,367)]
            else:
                files = [os.path.join(path+'/',dir+'_'+str(year)+'_'+str(i)+'.tif') for i in range(1,366)]
            predictor_path[dir] = files

        path_length = len(predictor_path['LST'])

        for var in static_feature:
            path_i = os.path.join('F:/SGYL/SM_results_data/Static/',var+'.tif')
            files = [path_i for i in range(path_length)]
            predictor_path[var] = files

        predictor_path = pd.DataFrame(predictor_path)
        predictor_path.columns = feature_order+static_feature
        
        print('start predict','************************************************')       
        
        for i in range(predictor_path.shape[0]):#predictor_path.shape[0]
            try:
                #print(i,'************************************************')
                data_i = ReadData(predictor_path.iloc[i:(i+1),:])
                lon_lat = data_i[['Lon','Lat']]
                data_i['DOY'] = int(i+1)
                data_i.columns = X_train_columns
                data_i = data_i.to_numpy()
                data_i[data_i == noData] = np.nan
                    #print(data_i.shape)
                    #dealing with missing values median
                for index in range(data_i.shape[1]):
                    median_i = np.nanmedian(data_i[:,index])
                    data_i[:,index][np.isnan(data_i[:,index])] = median_i
                data_i = pd.DataFrame(data_i,columns=X_train_columns)
                data_i = standarder.transform(data_i)
                    # XGBoost 不需要reshape
                data_i = data_i.reshape(data_i.shape[0],data_i.shape[1],1)
                #print(data_i.shape)
                data_ii = [data_i[i*batch_size:(i+1)*batch_size,:,:] for i in range(data_i.shape[0]//batch_size+1)]
                data_pred = np.concatenate([model(i,training = False).numpy() for i in data_ii])#model.predict(data_i,batch_size=4096)
                #print(data_pred.shape)
                predicted_arr = Array2Image(data_pred,lon_lat['Lat'],lon_lat['Lon'])
                save_path = 'F:/SGYL/SM_results_data/results/'+str(year)+'/'+'Full_ResNet'+'/'+'ResNet_'+str(year)+'_'+str(i+1)+'.tif'
                predicted_arr = predicted_arr * 10000
                convert2Tif(predicted_arr,save_path)
                print(i,': save the %d tif successfully for the year %d '%(i+1,year))
            except Exception as e:
                print('failed at year index %d'%(year,i+1)) # 这里的index是第几天
                print(repr(e))
                error_index_day.append((year,i))    
            continue
    except:
        print('failed at index %d'%(i+1)) # 这里的index是第几天
        error_index_year.append((year,i))
    continue
    

2003 ************************************************
load model
D:/SGYL/SM_results_data/check_points/Full_ResNet/Full_ResNet_2003_best.hdf5
load training data
['NDVI', 'EVI', 'LST', 'LST_Diff', 'Pre', 'SWCI', 'VSDI', 'SIWSI', 'ET', 'TWI', 'DEM', 'Aspect', 'Slope', 'Clay', 'Sand', 'Silt', 'Lon', 'Lat', 'DOY']
prepare file path
start predict ************************************************
0 : save the 1 tif successfully for the year 2003 
1 : save the 2 tif successfully for the year 2003 
2 : save the 3 tif successfully for the year 2003 
3 : save the 4 tif successfully for the year 2003 
4 : save the 5 tif successfully for the year 2003 
5 : save the 6 tif successfully for the year 2003 
6 : save the 7 tif successfully for the year 2003 
7 : save the 8 tif successfully for the year 2003 
8 : save the 9 tif successfully for the year 2003 
9 : save the 10 tif successfully for the year 2003 
10 : save the 11 tif successfully for the year 2003 
11 : save the 12 tif successfully for the ye

In [None]:
print(error_index_day)

[]


## Predict

### here

In [11]:
from sklearn.preprocessing import StandardScaler
year = 2017
data_train = pd.read_csv(os.path.join('D:/SGYL/SM_results_data/Train/split/','train_data_'+str(year)+'.csv'))
data_validate = pd.read_csv(os.path.join('D:/SGYL/SM_results_data/Train/split/','validate_data_'+str(year)+'.csv'))
data_test = pd.read_csv(os.path.join('D:/SGYL/SM_results_data/Train/split/','test_data_'+str(year)+'.csv'))

X_train = data_train.drop(['SM'],axis = 1)
y_train = data_train['SM'].copy()   
X_validate = data_validate.drop(['SM'],axis = 1)
y_validate = data_validate['SM'].copy()
X_test = data_test.drop(['SM'],axis = 1)
y_test = data_test['SM'].copy()

standarder = StandardScaler()
X_train = standarder.fit_transform(X_train)
X_validate = standarder.transform(X_validate)
X_test = standarder.transform(X_test)

X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],1)
X_validate = X_validate.reshape(X_validate.shape[0],X_validate.shape[1],1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

print('X_train shape:',X_train.shape)
print('X_validate shape:',X_validate.shape)

X_train shape: (637490, 19, 1)
X_validate shape: (191247, 19, 1)


In [12]:
model_path = os.path.join('D:/SGYL/SM_results_data/check_points/Full_ResNet/','Full_ResNet_'+str(year)+'_best.hdf5')
#model_path = os.path.join('D:/SGYL/SM_results_data/model/Full_ResNet/','Full_ResNet_'+str(year)+'.hdf5')
print(model_path)

D:/SGYL/SM_results_data/check_points/Full_ResNet/Full_ResNet_2017_best.hdf5


In [13]:
model = keras.models.load_model(model_path,custom_objects={'R2':R2})



In [14]:
model.evaluate(X_test,y_test)



[0.0008008122094906867, 0.900081217288971, 0.020775100216269493]