## 1. Load the libraries for calculation (Gymnosperm Wood density)

In [None]:
# Import the modules of interest
import pandas as pd
import geopandas as gpd
import ee
from sklearn.metrics import r2_score
from termcolor import colored # this is allocate colour and fonts type for the print title and text
from IPython.display import display, HTML
import numpy as np

In [2]:
# Intialize the ee API connection
ee.Initialize()

In [3]:
# earthengine rm -r users/leonidmoore/WoodDensityProject/BufferBasedSubsampling/

## Extract the covariates to Google drive

### 2.1 Load the data points

In [4]:
# load the full shapefile
fullDataPoints = ee.FeatureCollection('users/leonidmoore/WoodDensityProject/Full_Shapefiles/WoodDensity_Diversity_Pixel_Gymno')
print(fullDataPoints.size().getInfo())

407137


### 2.2 Load the covairates composite

In [5]:
# Load the composite for root shoot ratio analysis
compositeRaw = ee.Image("users/leonidmoore/ForestBiomass/20200915_Forest_Biomass_Predictors_Image")
# get the projection
stdProj = compositeRaw.projection()

In [6]:
forestAgeData = ee.Image("projects/crowtherlab/johan/Besnard_ForestAge")
print(colored('Band in Forest age data:\n', 'blue', attrs=['bold']),forestAgeData.bandNames().getInfo())
# we choose the band "forest_age_TC030" for following modeling
forestAge = forestAgeData.select(['forest_age_TC000']).reproject(crs=stdProj).rename('ForestAge')

[1m[34mBand in Forest age data:
[0m ['forest_age_TC000', 'forest_age_TC010', 'forest_age_TC020', 'forest_age_TC030', 'forest_age_TCloss_intensity', 'forest_age_LastTimeTCloss_std']


In [7]:
# load the additional layers and uniform the projection
soilmoisture = ee.Image('users/haozhima95/wld_soil_moisture').reproject(crs=stdProj).rename('SoilMoisture')
compositeImg = compositeRaw.addBands(soilmoisture).addBands(forestAge)
# check the band names in the composite
compositeBandNames = compositeImg.bandNames()
print(colored('Band in composite:\n', 'blue', attrs=['bold']),compositeBandNames.getInfo())

[1m[34mBand in composite:
[0m ['Aridity_Index', 'CHELSA_Annual_Mean_Temperature', 'CHELSA_Annual_Precipitation', 'CHELSA_Isothermality', 'CHELSA_Max_Temperature_of_Warmest_Month', 'CHELSA_Mean_Diurnal_Range', 'CHELSA_Mean_Temperature_of_Coldest_Quarter', 'CHELSA_Mean_Temperature_of_Driest_Quarter', 'CHELSA_Mean_Temperature_of_Warmest_Quarter', 'CHELSA_Mean_Temperature_of_Wettest_Quarter', 'CHELSA_Min_Temperature_of_Coldest_Month', 'CHELSA_Precipitation_Seasonality', 'CHELSA_Precipitation_of_Coldest_Quarter', 'CHELSA_Precipitation_of_Driest_Month', 'CHELSA_Precipitation_of_Driest_Quarter', 'CHELSA_Precipitation_of_Warmest_Quarter', 'CHELSA_Precipitation_of_Wettest_Month', 'CHELSA_Precipitation_of_Wettest_Quarter', 'CHELSA_Temperature_Annual_Range', 'CHELSA_Temperature_Seasonality', 'Depth_to_Water_Table', 'EarthEnvCloudCover_MODCF_interannualSD', 'EarthEnvCloudCover_MODCF_intraannualSD', 'EarthEnvCloudCover_MODCF_meanannual', 'EarthEnvTopoMed_AspectCosine', 'EarthEnvTopoMed_AspectSin

### 2.3 Define the list of predictors

In [8]:
# define the list of covariates to use
predictorsSelected = ["Aridity_Index",
                      "CHELSA_Annual_Mean_Temperature",
                      "CHELSA_Annual_Precipitation",
                      "CHELSA_Isothermality",
                      "CHELSA_Max_Temperature_of_Warmest_Month",
                      "CHELSA_Mean_Diurnal_Range",
                      "CHELSA_Mean_Temperature_of_Coldest_Quarter",
                      "CHELSA_Mean_Temperature_of_Driest_Quarter",
                      "CHELSA_Mean_Temperature_of_Warmest_Quarter",
                      "CHELSA_Mean_Temperature_of_Wettest_Quarter",
                      "CHELSA_Min_Temperature_of_Coldest_Month",
                      "CHELSA_Precipitation_Seasonality",
                      "CHELSA_Precipitation_of_Coldest_Quarter",
                      "CHELSA_Precipitation_of_Driest_Month",
                      "CHELSA_Precipitation_of_Driest_Quarter",
                      "CHELSA_Precipitation_of_Warmest_Quarter",
                      "CHELSA_Precipitation_of_Wettest_Month",
                      "CHELSA_Precipitation_of_Wettest_Quarter",
                      "CHELSA_Temperature_Annual_Range",
                      "CHELSA_Temperature_Seasonality",
                      "Depth_to_Water_Table",
                      "EarthEnvCloudCover_MODCF_interannualSD",
                      "EarthEnvCloudCover_MODCF_intraannualSD",
                      "EarthEnvCloudCover_MODCF_meanannual",
                      "EarthEnvTopoMed_Eastness",
                      "EarthEnvTopoMed_Elevation",
                      "EarthEnvTopoMed_Northness",
                      "EarthEnvTopoMed_ProfileCurvature",
                      "EarthEnvTopoMed_Roughness",
                      "EarthEnvTopoMed_Slope",
                      "EarthEnvTopoMed_TopoPositionIndex",
                      "SG_Absolute_depth_to_bedrock",
                      "WorldClim2_SolarRadiation_AnnualMean",
                      "WorldClim2_WindSpeed_AnnualMean",
                      "WorldClim2_H2OVaporPressure_AnnualMean",
                      "NDVI",
                      "EVI",
                      "Lai",
                      "Fpar",
                      "Npp",
                      "Tree_Density",
                      "PET",
                      "SG_Clay_Content_0_100cm",
                      "SG_Coarse_fragments_0_100cm",
                      "SG_Sand_Content_0_100cm",
                      "SG_Silt_Content_0_100cm",
                      "SG_Soil_pH_H2O_0_100cm",
                      "LandCoverClass_Cultivated_and_Managed_Vegetation",
                      "LandCoverClass_Urban_Builtup",
                      "Human_Disturbance",
                      "PresentTreeCover",
                      "Nitrogen",
                      "CanopyHeight",
                      "cropland",
                      "grazing",
                      "pasture",
                      "rangeland",
                      "Fire_Frequency",
                      "cnRatio",
                      "Cation",
                      "SoilMoisture",
                      "ForestAge"]
# show the predictors
print(colored('The predictors are:', 'blue', attrs=['bold']),predictorsSelected)

[1m[34mThe predictors are:[0m ['Aridity_Index', 'CHELSA_Annual_Mean_Temperature', 'CHELSA_Annual_Precipitation', 'CHELSA_Isothermality', 'CHELSA_Max_Temperature_of_Warmest_Month', 'CHELSA_Mean_Diurnal_Range', 'CHELSA_Mean_Temperature_of_Coldest_Quarter', 'CHELSA_Mean_Temperature_of_Driest_Quarter', 'CHELSA_Mean_Temperature_of_Warmest_Quarter', 'CHELSA_Mean_Temperature_of_Wettest_Quarter', 'CHELSA_Min_Temperature_of_Coldest_Month', 'CHELSA_Precipitation_Seasonality', 'CHELSA_Precipitation_of_Coldest_Quarter', 'CHELSA_Precipitation_of_Driest_Month', 'CHELSA_Precipitation_of_Driest_Quarter', 'CHELSA_Precipitation_of_Warmest_Quarter', 'CHELSA_Precipitation_of_Wettest_Month', 'CHELSA_Precipitation_of_Wettest_Quarter', 'CHELSA_Temperature_Annual_Range', 'CHELSA_Temperature_Seasonality', 'Depth_to_Water_Table', 'EarthEnvCloudCover_MODCF_interannualSD', 'EarthEnvCloudCover_MODCF_intraannualSD', 'EarthEnvCloudCover_MODCF_meanannual', 'EarthEnvTopoMed_Eastness', 'EarthEnvTopoMed_Elevation', '

### 2.3 Extract the covariates for grid search of hyperparameters in GEE

#### 2.3.1 Buffer zone based subsampling

In [9]:
# Define list contains the buffer sizes to test
bufferSize = 50000 # 50km
# define the function to do the buffer based interation function
def interateFunc(el,ini):
    ini = ee.List(ini)
    fcini = ee.FeatureCollection(ini)
    buf = ee.Feature(el).geometry().buffer(bufferSize)
    s = fcini.filterBounds(buf).size()
    cond = s.lte(0)
    return ee.Algorithms.If(cond, ini.add(el), ini)
# define the function to do the iteration based on the wood density points
def filterDistanceFunc(points):
    filt2 = ee.List([])
    filt = points.iterate(interateFunc,filt2)
    filtered = ee.FeatureCollection(ee.List(filt))
    return filtered

In [10]:
# generate a ee.List to save the seeds
seedList = np.arange(0, 100, 1).tolist()
print(colored('The seeds are:', 'blue', attrs=['bold']),seedList)
print(colored('Model is running!', 'blue', attrs=['bold']))

# Define the function to map through each element
for seed in (seedList):
    # el = seedList[0]
    # print(el)

    filterSubSamplePoints =filterDistanceFunc(fullDataPoints.randomColumn(columnName= 'rep', seed= seed).sort('rep')).map(lambda f: f.set('rep', seed))
    # extract the covariates and remove the observations with NA
    filteredPointsWithCovariatesRaw = compositeImg.reduceRegions(collection=filterSubSamplePoints,reducer = ee.Reducer.first())
    filteredPointsWithCovariatesNA = filteredPointsWithCovariatesRaw.filter(ee.Filter.notNull(filteredPointsWithCovariatesRaw.first().propertyNames()))
    # add the cross validaton folds
    subampleWithCovariates = filteredPointsWithCovariatesNA.randomColumn('CV_Fold',seed).map(lambda f: f.set('CV_Fold',ee.Number(f.get('CV_Fold')).multiply(10).toInt()))
    # export the filtered data
    subsamplingExport = ee.batch.Export.table.toAsset(collection = subampleWithCovariates,
                                                      description = 'Gymnosperm_WD_Buffer_Zone_subsample_to_Asset_with_Seed_'+str(seed),
                                                      assetId = 'users/leonidmoore/WoodDensityProject/TrainTables/Wood_Density_BufferZone_Subsampled_Train_table_for_Gymno_Seed_'+str(seed))



    subsamplingExport.start()

[1m[34mThe seeds are:[0m [8]
[1m[34mModel is running![0m
