# HM2 Biomass potential grid search with natural observations

In [1]:
# import the libraries
import ee
import pandas as pd
import os
import numpy as np
import random
from random import sample
import itertools 
import geopandas as gpd
from sklearn.metrics import r2_score
from termcolor import colored # this is allocate colour and fonts type for the print title and text
from IPython.display import display, HTML

In [2]:
#check the working directory of local drive for Grid search result table loading
# os.getcwd()


In [3]:
# initialize the earth engine API
ee.Initialize()

## STEP 1 Data preperation and objects definition

In [4]:
# load the biomass map 
# transfer biomass to carbon stock with the factor 0.5
biomassDensityMapRaw = ee.Image("users/leonidmoore/ForestBiomass/SpawnMap/Spawn_Harmonized_AGB_density_Map_1km").select('agb')
# filter out the points with 0 carbon density in Spawn's carbon stock density map
biomassDensityMap = biomassDensityMapRaw.mask(biomassDensityMapRaw.gt(0)).rename('SpawnDensity')
print(biomassDensityMap.bandNames().getInfo())

['SpawnDensity']


In [5]:
# do random subsampling
exampleSamplePoints = ee.FeatureCollection("users/leonidmoore/ForestBiomass/SpawnMap/GridSampleShapefiles/HM2_Gridsubsampled_Natural_Seed_0")
print(exampleSamplePoints.size().getInfo())

4124


In [6]:
# define the boundary geography reference
unboundedGeo = ee.Geometry.Polygon([-180, 88, 0, 88, 180, 88, 180, -88, 0, -88, -180, -88], None, False)

In [7]:
# define the list of predictors
propertyOfInterest = ['Aridity_Index',
                      'CHELSA_Annual_Mean_Temperature',
                      'CHELSA_Annual_Precipitation',
                      'CHELSA_Isothermality',
                      'CHELSA_Max_Temperature_of_Warmest_Month',
                      'CHELSA_Mean_Diurnal_Range',
                      'CHELSA_Mean_Temperature_of_Coldest_Quarter',
                      'CHELSA_Mean_Temperature_of_Driest_Quarter',
                      'CHELSA_Mean_Temperature_of_Warmest_Quarter',
                      'CHELSA_Mean_Temperature_of_Wettest_Quarter',
                      'CHELSA_Min_Temperature_of_Coldest_Month',
                      'CHELSA_Precipitation_Seasonality',
                      'CHELSA_Precipitation_of_Coldest_Quarter',
                      'CHELSA_Precipitation_of_Driest_Month',
                      'CHELSA_Precipitation_of_Driest_Quarter',
                      'CHELSA_Precipitation_of_Warmest_Quarter',
                      'CHELSA_Precipitation_of_Wettest_Month',
                      'CHELSA_Precipitation_of_Wettest_Quarter',
                      'CHELSA_Temperature_Annual_Range',
                      'CHELSA_Temperature_Seasonality',
                      'Depth_to_Water_Table',
                      'EarthEnvTopoMed_Eastness',
                      'EarthEnvTopoMed_Elevation',
                      'EarthEnvTopoMed_Northness',
                      'EarthEnvTopoMed_ProfileCurvature',
                      'EarthEnvTopoMed_Roughness',
                      'EarthEnvTopoMed_Slope',
                      'SG_Absolute_depth_to_bedrock',
                      'WorldClim2_SolarRadiation_AnnualMean',
                      'WorldClim2_WindSpeed_AnnualMean',
                      'EarthEnvCloudCover_MODCF_interannualSD',
                      'EarthEnvCloudCover_MODCF_intraannualSD',
                      'EarthEnvCloudCover_MODCF_meanannual',
                      'EarthEnvTopoMed_AspectCosine',
                      'EarthEnvTopoMed_AspectSine',
                      'SG_Clay_Content_0_100cm',
                      'SG_Coarse_fragments_0_100cm',
                      'SG_Sand_Content_0_100cm',
                      'SG_Silt_Content_0_100cm',
                      'SG_Soil_pH_H2O_0_100cm',
                      'PresentTreeCover'] #
print(propertyOfInterest)

['Aridity_Index', 'CHELSA_Annual_Mean_Temperature', 'CHELSA_Annual_Precipitation', 'CHELSA_Isothermality', 'CHELSA_Max_Temperature_of_Warmest_Month', 'CHELSA_Mean_Diurnal_Range', 'CHELSA_Mean_Temperature_of_Coldest_Quarter', 'CHELSA_Mean_Temperature_of_Driest_Quarter', 'CHELSA_Mean_Temperature_of_Warmest_Quarter', 'CHELSA_Mean_Temperature_of_Wettest_Quarter', 'CHELSA_Min_Temperature_of_Coldest_Month', 'CHELSA_Precipitation_Seasonality', 'CHELSA_Precipitation_of_Coldest_Quarter', 'CHELSA_Precipitation_of_Driest_Month', 'CHELSA_Precipitation_of_Driest_Quarter', 'CHELSA_Precipitation_of_Warmest_Quarter', 'CHELSA_Precipitation_of_Wettest_Month', 'CHELSA_Precipitation_of_Wettest_Quarter', 'CHELSA_Temperature_Annual_Range', 'CHELSA_Temperature_Seasonality', 'Depth_to_Water_Table', 'EarthEnvTopoMed_Eastness', 'EarthEnvTopoMed_Elevation', 'EarthEnvTopoMed_Northness', 'EarthEnvTopoMed_ProfileCurvature', 'EarthEnvTopoMed_Roughness', 'EarthEnvTopoMed_Slope', 'SG_Absolute_depth_to_bedrock', 'World

In [8]:
# read the composite
compositeImage = ee.Image("users/leonidmoore/ForestBiomass/20200915_Forest_Biomass_Predictors_Image").select(propertyOfInterest).addBands(biomassDensityMap)
# show the band names of the composite image 
print('Composite Band Names:',compositeImage.bandNames().getInfo())

Composite Band Names: ['Aridity_Index', 'CHELSA_Annual_Mean_Temperature', 'CHELSA_Annual_Precipitation', 'CHELSA_Isothermality', 'CHELSA_Max_Temperature_of_Warmest_Month', 'CHELSA_Mean_Diurnal_Range', 'CHELSA_Mean_Temperature_of_Coldest_Quarter', 'CHELSA_Mean_Temperature_of_Driest_Quarter', 'CHELSA_Mean_Temperature_of_Warmest_Quarter', 'CHELSA_Mean_Temperature_of_Wettest_Quarter', 'CHELSA_Min_Temperature_of_Coldest_Month', 'CHELSA_Precipitation_Seasonality', 'CHELSA_Precipitation_of_Coldest_Quarter', 'CHELSA_Precipitation_of_Driest_Month', 'CHELSA_Precipitation_of_Driest_Quarter', 'CHELSA_Precipitation_of_Warmest_Quarter', 'CHELSA_Precipitation_of_Wettest_Month', 'CHELSA_Precipitation_of_Wettest_Quarter', 'CHELSA_Temperature_Annual_Range', 'CHELSA_Temperature_Seasonality', 'Depth_to_Water_Table', 'EarthEnvTopoMed_Eastness', 'EarthEnvTopoMed_Elevation', 'EarthEnvTopoMed_Northness', 'EarthEnvTopoMed_ProfileCurvature', 'EarthEnvTopoMed_Roughness', 'EarthEnvTopoMed_Slope', 'SG_Absolute_dep

## STEP 2 Subsampling and Covariates extraction

### 2.1 Export to Google earth engine

In [9]:
# define a seed list
seedList = np.arange(0, 100, 1).tolist()
print(colored('The seeds are:', 'blue', attrs=['bold']),seedList)
print(colored('Model is running!', 'blue', attrs=['bold']))
for seed in seedList:
    # add a random column into the feature collection
    # fullRandomPointsWithRandomCol = fullRandomPoints.randomColumn(columnName ='rd', seed=seed)
    # filterSubSamplePoints = fullRandomPointsWithRandomCol.filterMetadata(name='rd', operator='less_than', value=0.2)
    # print(filterSubSamplePoints.size().getInfo())
    # extract covariates
    filterSubSamplePoints = ee.FeatureCollection("users/leonidmoore/ForestBiomass/SpawnMap/GridSampleShapefiles/HM2_Gridsubsampled_Natural_Seed_"+str(seed))
    randomSubampleWithCovariatesRaw = compositeImage.reduceRegions(collection=filterSubSamplePoints,reducer = ee.Reducer.first())
    # remove the observations with NA
    subampleWithCovariates = randomSubampleWithCovariatesRaw.filter(ee.Filter.notNull(compositeImage.bandNames()))
    # add the random column with the name 'CV_fold'
    subampleWithCovariatesAndFold = subampleWithCovariates.randomColumn('CV_Fold',seed).map(lambda f: f.set('CV_Fold',ee.Number(f.get('CV_Fold')).multiply(10).toInt()))
    print(subampleWithCovariatesAndFold.size().getInfo())
    trainTableWithCovarites_Export = ee.batch.Export.table.toAsset(
        collection = subampleWithCovariatesAndFold,
        description = 'Train_Table_seed_'+str(seed)+'_Exportation',
        assetId = 'users/leonidmoore/ForestBiomass/SpawnMap/TrainTables/HM2_Grid_subsampled_Natural_Train_Table_seed_'+str(seed))
    
    # start the exportation
    trainTableWithCovarites_Export.start()

[1m[34mThe seeds are:[0m [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
[1m[34mModel is running![0m
3980
3989
3999
3988
3981
3982
3992
3989
3980
3973
3990
3994
3990
3995
3980
3994
3983
3989
3998
3993
3987
3995
3988
3994
3979
3992
3975
3984
4005
4006
3995
3995
3997
3994
3987
3992
3998
3998
3993
3984
3986
3988
4003
3995
3978
3982
3994
3989
3992
3988
3978
3993
3984
3987
3976
3989
3992
3990
4004
3998
3984
3989
3980
3994
3991
3986
3996
3978
3991
3984
4001
3986
3988
3994
4000
3992
3997
3986
3990
3984
3975
3999
3981
3993
3967
3983
3987
3980
4000
3990
3988
4002
3994
3995
3988
3990
3987
4001
3990
3987


## STEP 3 Grid search

### function defining

In [9]:
# generate the classifier list based on fullParameterSpace
def classifierListsGenerator (paramterSets, randomDiscrete = True, randomNumber = 12,nTrees = 20,modelType = 'REGRESSION',bagFraction=0.632,Seed=0):
    # define an empty list to load the defined models for grid search
    classifierList = []
    if randomDiscrete:
        # check the randomNumber
        if randomNumber is None:
            print('Warning! an integer number needs to be allocated to <randomNumber>!')
        else:
            print('A randomDiscrete approach has been applied to do grid search the paramter space! \n  The random model number is: '+str(randomNumber)+' !')
            # subset the fullParameterSpace randomly with the randomNumber
            random.seed(Seed)
            randomParameterApplied = random.sample(paramterSets,randomNumber)
            # print(randomSubsetParameter)
            
    else:
        print('The full space of the parameter sets is being running for grid search')
        random.seed(Seed)
        randomParameterApplied = sample(paramterSets,randomNumber)
    
    print(Seed)
    print('function use 20 as the default nTrees, \n You can define you own nTree value in the function argument settings!')
    # loop through the randomParameterApplied
    for ParaSet in randomParameterApplied:
        model_name = 'GridSeach_Model_'+str(ParaSet[0])+'_'+str(ParaSet[1])+'_'+str(ParaSet[2])
        # define the paramter setting of each model in the grid seach and allocate those parameters into the feature
        perRF = ee.Feature(ee.Geometry.Point([0,0])).set('ModelName',model_name,'PerClassifier',ee.Classifier.smileRandomForest(
            # the default ntrees we use 100
            numberOfTrees=nTrees,
            variablesPerSplit = ParaSet[0],
            minLeafPopulation = ParaSet[1],
            maxNodes = ParaSet[2],
            bagFraction=bagFraction).setOutputMode(modelType))
        classifierList.append(perRF)
    return(classifierList)

In [10]:
# Define the R^2 function for use with continuous valued models (i.e., regression based models)
def coefficientOfDetermination(anyVariableTable,propertyOfInterest,propertyOfInterest_Predicted):
    # Compute the mean of the property of interest
    propertyOfInterestMean = ee.Number(ee.Dictionary(ee.FeatureCollection(anyVariableTable).select([propertyOfInterest]).reduceColumns(ee.Reducer.mean(),[propertyOfInterest])).get('mean'));
    # Compute the total sum of squares
    def totalSoSFunction(f):
        return f.set('Difference_Squared',ee.Number(ee.Feature(f).get(propertyOfInterest)).subtract(propertyOfInterestMean).pow(ee.Number(2)))
    totalSumOfSquares = ee.Number(ee.Dictionary(ee.FeatureCollection(anyVariableTable).map(totalSoSFunction).select(['Difference_Squared']).reduceColumns(ee.Reducer.sum(),['Difference_Squared'])).get('sum'))
    # Compute the residual sum of squares
    def residualSoSFunction(f):
        return f.set('Residual_Squared',ee.Number(ee.Feature(f).get(propertyOfInterest)).subtract(ee.Number(ee.Feature(f).get(propertyOfInterest_Predicted))).pow(ee.Number(2)))
    residualSumOfSquares = ee.Number(ee.Dictionary(ee.FeatureCollection(anyVariableTable).map(residualSoSFunction).select(['Residual_Squared']).reduceColumns(ee.Reducer.sum(),['Residual_Squared'])).get('sum'))
    # Finalize the calculation
    r2 = ee.Number(1).subtract(residualSumOfSquares.divide(totalSumOfSquares))
    # print('I am running as well!')

    return ee.Number(r2)

In [11]:
# Define a function to take a feature with a classifier of interest
def computeCVAccuracy(featureWithClassifier,
                      propertyOfInterest,
                      modelType,
                      kFoldAssignmentFC,
                      cvFoldString,
                      classProperty,
                      accuracyMetricString,
                      extractedVariableTable):
    # Pull the classifier from the feature
    cOI = ee.Classifier(featureWithClassifier.get('PerClassifier'))
    # Create a function to map through the fold assignments and compute the overall accuracy
    # for all validation folds
    def computeAccuracyForFold(foldFeature):
        # Organize the training and validation data
        foldNumber = ee.Number(ee.Feature(foldFeature).get('Fold'))
        # print(foldNumber.getInfo())
        trainingData = extractedVariableTable.filterMetadata(cvFoldString,'not_equals',foldNumber)
        # print(trainingData.first().getInfo())
        validationData = extractedVariableTable.filterMetadata(cvFoldString,'equals',foldNumber)
        # Train the classifier and classify the validation dataset
        trainedClassifier = cOI.train(trainingData,classProperty,propertyOfInterest)
        outputtedPropName = classProperty+'_Predicted'
        classifiedValidationData = validationData.classify(trainedClassifier,outputtedPropName)
        # Create a central if/then statement that determines the type of accuracy values that are returned
        if modelType == 'CLASSIFICATION':
            # Compute the overall accuracy of the classification
            errorMatrix = classifiedValidationData.errorMatrix(classProperty,outputtedPropName,categoricalLevels)
            overallAccuracy = ee.Number(errorMatrix.accuracy())
            return foldFeature.set(accuracyMetricString,overallAccuracy)
        else:
            # Compute the R^2 of the regression
            r2ToSet = coefficientOfDetermination(classifiedValidationData,classProperty,outputtedPropName)
            return foldFeature.set(accuracyMetricString,r2ToSet)

    # Compute the accuracy values of the classifier across all folds
    accuracyFC = kFoldAssignmentFC.map(computeAccuracyForFold)
    meanAccuracy = accuracyFC.aggregate_mean(accuracyMetricString)
    tsdAccuracy = accuracyFC.aggregate_total_sd(accuracyMetricString)
    # print('I am running!')
    # Compute the feature to return
    featureToReturn = featureWithClassifier.select(['ModelName']).set('Mean_'+accuracyMetricString,meanAccuracy,'StDev_'+accuracyMetricString,tsdAccuracy)
    return featureToReturn

In [12]:
def gridSearchEarthEngine(inputTrainTable,# train data table in ee.FeatureCollection format
                          propertyOfInterest = propertyOfInterest, # list of predictors
                          classProperty = 'lgBD', # response varibale name in Google earth engine
                          nTrees = 20, # number of trees, default is 100
                          variablesPerSplitList = np.arange(3, 24, 3).tolist(), # list
                          minLeafPopulationList = np.arange(2, 22, 2).tolist(), # list
                          maxNodesList = np.arange(10, 110, 10).tolist(),# list
                          bagFraction = 0.632,
                          randomDiscrete = True, #boolean
                          randomNumber = 1, # if random discrete is True, you must set this value
                          foldsValue = 10,
                          modelType = 'REGRESSION',
                          cvFoldString = 'CV_Fold',
                          pyramidingPolicy = 'mean',
                          accuracyMetricString = 'R2',
                          Seeds = 0):
    
    parameterLists = [variablesPerSplitList,minLeafPopulationList,maxNodesList]
    # generate the list of all the possible paramter set combinations
    fullParamterSpace = list(itertools.product(*parameterLists))
    # generate the classifer in featureColletion format
    classifierList = classifierListsGenerator(paramterSets = fullParamterSpace,
                                              randomNumber = randomNumber,
                                              nTrees = nTrees,
                                              bagFraction = 0.632,
                                              Seed=Seeds)
    
    kList = list(range(0,foldsValue))
    kFoldAssignmentFC = ee.FeatureCollection(ee.List(kList).map(lambda n: ee.Feature(ee.Geometry.Point([0,0])).set('Fold',n)))
    # print(kFoldAssignmentFC.getInfo())
    classDf = pd.DataFrame(columns = ['Mean_R2','StDev_R2','ModelName','numberOfTrees','variablesPerSplit','minLeafPopulation','bagFraction','maxNodes'])

    for rf in classifierList:
        # print(rf.getInfo())
        accuracy_feature = ee.Feature(computeCVAccuracy(rf,propertyOfInterest,modelType='REGRESSION',kFoldAssignmentFC= kFoldAssignmentFC,cvFoldString = cvFoldString,classProperty=classProperty,accuracyMetricString =accuracyMetricString,extractedVariableTable = inputTrainTable))
        # extract the parameter information
        parameterDict = rf.getInfo().get('properties',{}).get('PerClassifier').get('classifier',{})
        parameterDF = pd.DataFrame(parameterDict,index = [0])
        # extract the metrics information
        metricDict = accuracy_feature.getInfo().get('properties')
        metricDF = pd.DataFrame(metricDict,index = [0])

        # print(metricDF)
        # print(parameterDF)
        resultDF = pd.concat([metricDF, parameterDF], axis=1, sort=False)
        # print(resultDF)
        classDf = pd.concat([classDf,resultDF],sort=False)# classDf.append(resultDF, sort=False)#
    # sort the grid search result by descending of Mean_R2
    classDfSorted = classDf.sort_values(['Mean_R2'], ascending = False)

    # print('Top 5 grid search results:\n', classDfSorted.head(5))
    return(classDfSorted.head(1)) 

In [13]:
# generate a ee.List to save the seeds
seedList = np.arange(9, 100, 1).tolist()
print(colored('The seeds are:', 'blue', attrs=['bold']),seedList)
print(colored('Model is running!', 'blue', attrs=['bold']))
for seed in seedList:
    # load the traindata table for each subsample
    inputVariableTable = ee.FeatureCollection('users/leonidmoore/ForestBiomass/SpawnMap/TrainTables/HM2_Grid_subsampled_Natural_Train_Table_seed_'+str(seed))
    # check the information of the FeatureCollection with predictors and covariates
    # print(nullExcludedTable.first().getInfo())
    # print(inputVariableTable.limit(1).getInfo())
    topModelParameter = gridSearchEarthEngine(inputTrainTable = inputVariableTable,
                                              propertyOfInterest = propertyOfInterest,
                                              classProperty = 'SpawnDensity',
                                              randomNumber = 48,
                                              nTrees = 200,
                                              Seeds=seed)
    # write the top parameters table to local folder
    # topModelParameter.to_csv('RemoteSensingModel/GridSearchResult/SD2_Potential_Biomass_Modeling_Grid_Search_Seed_'+str(seed)+'.csv',header=True,mode='w+')
    topModelParameter.to_csv('Data/SatelliteDerivedModel/GridSearchResult/HM2_Grid_subsampled_Natural_Potential_Biomass_Modeling_Grid_Search_Seed_'+str(seed)+'.csv',header=True,mode='w+')
    # show the progress for the grid seach by the seed number
    print(colored('Grid search for seed:'+str(seed)+' is done!', 'blue', attrs=['bold']))


[1m[34mThe seeds are:[0m [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
[1m[34mModel is running![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
9
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:9 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
10
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:10 is done![0m
A randomDiscrete approach

[1m[34mGrid search for seed:37 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
38
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:38 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
39
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:39 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
40
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:40 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
41
fun

[1m[34mGrid search for seed:67 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
68
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:68 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
69
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:69 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
70
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:70 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
71
fun

[1m[34mGrid search for seed:97 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
98
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:98 is done![0m
A randomDiscrete approach has been applied to do grid search the paramter space! 
  The random model number is: 48 !
99
function use 20 as the default nTrees, 
 You can define you own nTree value in the function argument settings!
[1m[34mGrid search for seed:99 is done![0m


## STEP 4 Potential biomass mapping

### 4.1 Prepare the toggled composite

In [14]:
# load the potential tree cover and rename it to 'PresentTreeCover'
potentialTreeCover = ee.Image('users/leonidmoore/ForestBiomass/Bastin_et_al_2019_Potential_Forest_Cover_Adjusted').rename("PresentTreeCover")

In [15]:
# define the list of retained predictors
retainedPropeties = ['Aridity_Index',
                      'CHELSA_Annual_Mean_Temperature',
                      'CHELSA_Annual_Precipitation',
                      'CHELSA_Isothermality',
                      'CHELSA_Max_Temperature_of_Warmest_Month',
                      'CHELSA_Mean_Diurnal_Range',
                      'CHELSA_Mean_Temperature_of_Coldest_Quarter',
                      'CHELSA_Mean_Temperature_of_Driest_Quarter',
                      'CHELSA_Mean_Temperature_of_Warmest_Quarter',
                      'CHELSA_Mean_Temperature_of_Wettest_Quarter',
                      'CHELSA_Min_Temperature_of_Coldest_Month',
                      'CHELSA_Precipitation_Seasonality',
                      'CHELSA_Precipitation_of_Coldest_Quarter',
                      'CHELSA_Precipitation_of_Driest_Month',
                      'CHELSA_Precipitation_of_Driest_Quarter',
                      'CHELSA_Precipitation_of_Warmest_Quarter',
                      'CHELSA_Precipitation_of_Wettest_Month',
                      'CHELSA_Precipitation_of_Wettest_Quarter',
                      'CHELSA_Temperature_Annual_Range',
                      'CHELSA_Temperature_Seasonality',
                      'Depth_to_Water_Table',
                      'EarthEnvTopoMed_Eastness',
                      'EarthEnvTopoMed_Elevation',
                      'EarthEnvTopoMed_Northness',
                      'EarthEnvTopoMed_ProfileCurvature',
                      'EarthEnvTopoMed_Roughness',
                      'EarthEnvTopoMed_Slope',
                      'SG_Absolute_depth_to_bedrock',
                      'WorldClim2_SolarRadiation_AnnualMean',
                      'WorldClim2_WindSpeed_AnnualMean',
                      'EarthEnvCloudCover_MODCF_interannualSD',
                      'EarthEnvCloudCover_MODCF_intraannualSD',
                      'EarthEnvCloudCover_MODCF_meanannual',
                      'EarthEnvTopoMed_AspectCosine',
                      'EarthEnvTopoMed_AspectSine',
                      'SG_Clay_Content_0_100cm',
                      'SG_Coarse_fragments_0_100cm',
                      'SG_Sand_Content_0_100cm',
                      'SG_Silt_Content_0_100cm',
                      'SG_Soil_pH_H2O_0_100cm']
print(retainedPropeties[0:5])

['Aridity_Index', 'CHELSA_Annual_Mean_Temperature', 'CHELSA_Annual_Precipitation', 'CHELSA_Isothermality', 'CHELSA_Max_Temperature_of_Warmest_Month']


In [16]:
# replace the human activity layers in the compositeImageRaw
compositeImageUpdated = compositeImage.select(retainedPropeties).addBands(potentialTreeCover)
# present the composite band names
print(colored('The band names are:', 'blue', attrs=['bold']),compositeImageUpdated.bandNames().getInfo())

[1m[34mThe band names are:[0m ['Aridity_Index', 'CHELSA_Annual_Mean_Temperature', 'CHELSA_Annual_Precipitation', 'CHELSA_Isothermality', 'CHELSA_Max_Temperature_of_Warmest_Month', 'CHELSA_Mean_Diurnal_Range', 'CHELSA_Mean_Temperature_of_Coldest_Quarter', 'CHELSA_Mean_Temperature_of_Driest_Quarter', 'CHELSA_Mean_Temperature_of_Warmest_Quarter', 'CHELSA_Mean_Temperature_of_Wettest_Quarter', 'CHELSA_Min_Temperature_of_Coldest_Month', 'CHELSA_Precipitation_Seasonality', 'CHELSA_Precipitation_of_Coldest_Quarter', 'CHELSA_Precipitation_of_Driest_Month', 'CHELSA_Precipitation_of_Driest_Quarter', 'CHELSA_Precipitation_of_Warmest_Quarter', 'CHELSA_Precipitation_of_Wettest_Month', 'CHELSA_Precipitation_of_Wettest_Quarter', 'CHELSA_Temperature_Annual_Range', 'CHELSA_Temperature_Seasonality', 'Depth_to_Water_Table', 'EarthEnvTopoMed_Eastness', 'EarthEnvTopoMed_Elevation', 'EarthEnvTopoMed_Northness', 'EarthEnvTopoMed_ProfileCurvature', 'EarthEnvTopoMed_Roughness', 'EarthEnvTopoMed_Slope', 'SG_A

### 4.2 Machine learning mapping for all scalers (SD2)

In [17]:
# define a loop through the seed list
seedList = np.arange(0, 100, 1).tolist()
# define the dependent variables list
print(colored('The models are:', 'blue', attrs=['bold']),seedList)
print(colored('Model is running:\nWith paramter sets:', 'blue', attrs=['bold']))
# for seed in seedList: range(0,len(seedList))
for seed in seedList:
    # load the points data with the covariates
    trainTable = ee.FeatureCollection('users/leonidmoore/ForestBiomass/SpawnMap/TrainTables/HM2_Grid_subsampled_Natural_Train_Table_seed_'+str(seed))
    # print(trainTable.size().getInfo())
    parameterTable = pd.read_csv('Data/SatelliteDerivedModel/GridSearchResult/HM2_Grid_subsampled_Natural_Potential_Biomass_Modeling_Grid_Search_Seed_'+str(seed)+'.csv', float_precision='round_trip')
    # not recomend to run the code below
    # print(parameterTable.head())
    # extract the paramters
    variablesPerSplitVal = int(parameterTable['variablesPerSplit'].iat[0]) # mtry
    minLeafPopulationVal = int(parameterTable['minLeafPopulation'].iat[0]) # minrow
    maxNodesVal = int(parameterTable['maxNodes'].iat[0]) # mac depth
    print('seed',seed,variablesPerSplitVal,minLeafPopulationVal,maxNodesVal)
    # define the random forest classifier
    rfClassifier = ee.Classifier.smileRandomForest(numberOfTrees = 200,
                                                   variablesPerSplit = variablesPerSplitVal, # mtry
                                                   minLeafPopulation = minLeafPopulationVal, # minrow
                                                   maxNodes = maxNodesVal, # max depth
                                                   bagFraction = 0.632,
                                                   seed = seed).setOutputMode('REGRESSION')
    trainedClassifier = rfClassifier.train(features = trainTable,
                                           classProperty = 'SpawnDensity',
                                           inputProperties = propertyOfInterest)
    # execute the prediction to generate the map
    existingCarbonDensityMap = compositeImageUpdated.classify(trainedClassifier)
    # print(predictedWoodDensityMap.getInfo())
    predictionExport = ee.batch.Export.image.toAsset(image = existingCarbonDensityMap,
                                                     description = '20221108_HM2_Potential_Biomass_Density_Map_To_Asset_'+str(seed),
                                                     assetId = 'users/leonidmoore/ForestBiomass/SpawnMap/PredictedMaps/Predicted_HM2_Potential_Biomass_Map_with_Seed_'+str(seed),
                                                     region = unboundedGeo,
                                                     crs = 'EPSG:4326',
                                                     crsTransform = [0.008333333333333333,0,-180,0,-0.008333333333333333,90],
                                                     maxPixels = 1e13)

    # print(predictionExportAsset)
    # start the export task
    predictionExport.start()
    # show the task status
    predictionExport.status()

[1m[34mThe models are:[0m [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
[1m[34mModel is running:
With paramter sets:[0m
seed 0 15 18 90
seed 1 18 14 100
seed 2 18 12 80
seed 3 21 18 80
seed 4 18 10 100
seed 5 21 14 80
seed 6 21 16 90
seed 7 18 16 100
seed 8 21 16 90
seed 9 18 14 80
seed 10 21 14 100
seed 11 21 14 100
seed 12 21 12 90
seed 13 15 12 100
seed 14 21 16 100
seed 15 18 16 90
seed 16 21 12 90
seed 17 21 16 80
seed 18 15 12 100
seed 19 18 20 90
seed 20 21 20 80
seed 21 18 20 100
seed 22 18 16 100
seed 23 18 16 100
seed 24 21 20 90
seed 25 21 8 100
seed 26 21 12 100
seed 27 21 14 100
seed 28 21 16 90
seed 29 21 14 80
seed 30 21 14 10

### 4.3 Stack all potential maps into an Image and export the mean etc. (HM2)

In [18]:
# load the images predicted by the ensemble models
# define an empty image
firstImage = ee.Image('users/leonidmoore/ForestBiomass/SpawnMap/PredictedMaps/Predicted_HM2_Potential_Biomass_Map_with_Seed_0').rename('Model_0').toFloat()
# load the other images and add thme as bands to the first image above
modelList = np.arange(1, 100, 1).tolist()
for ml in modelList:
    perModelImage = ee.Image('users/leonidmoore/ForestBiomass/SpawnMap/PredictedMaps/Predicted_HM2_Potential_Biomass_Map_with_Seed_'+str(ml)).rename('Model_'+str(ml)).toFloat()
    firstImage = firstImage.addBands(perModelImage)

print(colored('The band names are:', 'blue', attrs=['bold']),firstImage.bandNames().getInfo())

# calculate the mean and variation images
meanImage = firstImage.reduce(ee.Reducer.mean())
variImage = firstImage.reduce(ee.Reducer.stdDev()).divide(meanImage)
# get the 95% quantile
percentileImage = firstImage.reduce(ee.Reducer.percentile([2.5,97.5],['lower','upper']))
# add those two images into the GEE assets
meanExport = ee.batch.Export.image.toAsset(image = meanImage.toFloat(),
                                           description = '20221107_HM2_Potential_Density_Ensemble_Mean_Map_To_Asset',
                                           assetId = 'users/leonidmoore/ForestBiomass/GroundSourcedModel/EnsambledMaps/Predicted_HM2_Potential_density_Ensambled_Mean',
                                           region = unboundedGeo,
                                           crs = 'EPSG:4326',
                                           crsTransform = [0.008333333333333333,0,-180,0,-0.008333333333333333,90],
                                           maxPixels = 1e13)


# start the export task
meanExport.start()
# show the task status
meanExport.status()

variExport = ee.batch.Export.image.toAsset(image = variImage.toFloat(),
                                           description = '20221107_HM2_Potential_Density_Variation_Coef_Map_To_Asset',
                                           assetId = 'users/leonidmoore/ForestBiomass/GroundSourcedModel/EnsambledMaps/Predicted_HM2_Potential_density_Ensambled_Variation_Coefficient',
                                           region = unboundedGeo,
                                           crs = 'EPSG:4326',
                                           crsTransform = [0.008333333333333333,0,-180,0,-0.008333333333333333,90],
                                           maxPixels = 1e13)

# start the export task
variExport.start()
# show the task status
variExport.status()

percentileExport = ee.batch.Export.image.toAsset(image = percentileImage.toFloat(),
                                                 description = '20221107_HM2_Potential_Density_Percentile_Map_To_Asset',
                                                 assetId = 'users/leonidmoore/ForestBiomass/GroundSourcedModel/EnsambledMaps/Predicted_HM2_Potential_density_Ensambled_Percentile',
                                                 region = unboundedGeo,
                                                 crs = 'EPSG:4326',
                                                 crsTransform = [0.008333333333333333,0,-180,0,-0.008333333333333333,90],
                                                 maxPixels = 1e13)

# start the export task
percentileExport.start()
# show the task status
percentileExport.status()
# PRINT THE INFORMATION THAT THE EXPORT IS RUNNING ON GOOGLE EARTH ENGINE 
print(colored('Export is running on Google Earth Engine!\nPlease check it on the Google Earth Engine UI.', 'blue', attrs=['bold']))

[1m[34mThe band names are:[0m ['Model_0', 'Model_1', 'Model_2', 'Model_3', 'Model_4', 'Model_5', 'Model_6', 'Model_7', 'Model_8', 'Model_9', 'Model_10', 'Model_11', 'Model_12', 'Model_13', 'Model_14', 'Model_15', 'Model_16', 'Model_17', 'Model_18', 'Model_19', 'Model_20', 'Model_21', 'Model_22', 'Model_23', 'Model_24', 'Model_25', 'Model_26', 'Model_27', 'Model_28', 'Model_29', 'Model_30', 'Model_31', 'Model_32', 'Model_33', 'Model_34', 'Model_35', 'Model_36', 'Model_37', 'Model_38', 'Model_39', 'Model_40', 'Model_41', 'Model_42', 'Model_43', 'Model_44', 'Model_45', 'Model_46', 'Model_47', 'Model_48', 'Model_49', 'Model_50', 'Model_51', 'Model_52', 'Model_53', 'Model_54', 'Model_55', 'Model_56', 'Model_57', 'Model_58', 'Model_59', 'Model_60', 'Model_61', 'Model_62', 'Model_63', 'Model_64', 'Model_65', 'Model_66', 'Model_67', 'Model_68', 'Model_69', 'Model_70', 'Model_71', 'Model_72', 'Model_73', 'Model_74', 'Model_75', 'Model_76', 'Model_77', 'Model_78', 'Model_79', 'Model_80', 'Mod