# Interpolation/Extrapolation Python code 

### This is the code for the interpolation or extrapolation calculation on Google Earth Engine through python

### Import libraries we need for calculation

In [1]:
# import the libraries
import ee
import pandas as pd
import os
import numpy as np
import random
from random import sample
from scipy.spatial import ConvexHull
from sklearn.decomposition import PCA
from itertools import combinations
import itertools 
import geopandas as gpd
from sklearn.metrics import r2_score
from termcolor import colored # this is allocate colour and fonts type for the print title and text
from IPython.display import display, HTML

In [2]:
#set the working directory of local drive for Grid search result table loading
# os.getcwd()

In [3]:
# Intialize the ee API connection
ee.Initialize()

### Because the Interpolation and extropolation has two models one for present models while one for the potential models

In [4]:
# define the list of column names
selectedCols = ['Aridity_Index',
                'CHELSA_Annual_Mean_Temperature',
                'CHELSA_Annual_Precipitation',
                'CHELSA_Isothermality',
                'CHELSA_Max_Temperature_of_Warmest_Month',
                'CHELSA_Mean_Diurnal_Range',
                'CHELSA_Mean_Temperature_of_Coldest_Quarter',
                'CHELSA_Mean_Temperature_of_Driest_Quarter',
                'CHELSA_Mean_Temperature_of_Warmest_Quarter',
                'CHELSA_Mean_Temperature_of_Wettest_Quarter',
                'CHELSA_Min_Temperature_of_Coldest_Month',
                'CHELSA_Precipitation_Seasonality',
                'CHELSA_Precipitation_of_Coldest_Quarter',
                'CHELSA_Precipitation_of_Driest_Month',
                'CHELSA_Precipitation_of_Driest_Quarter',
                'CHELSA_Precipitation_of_Warmest_Quarter',
                'CHELSA_Precipitation_of_Wettest_Month',
                'CHELSA_Precipitation_of_Wettest_Quarter',
                'CHELSA_Temperature_Annual_Range',
                'CHELSA_Temperature_Seasonality',
                'Depth_to_Water_Table',
                'EarthEnvTopoMed_Eastness',
                'EarthEnvTopoMed_Elevation',
                'EarthEnvTopoMed_Northness',
                'EarthEnvTopoMed_ProfileCurvature',
                'EarthEnvTopoMed_Roughness',
                'EarthEnvTopoMed_Slope',
                'SG_Absolute_depth_to_bedrock',
                'WorldClim2_SolarRadiation_AnnualMean',
                'WorldClim2_WindSpeed_AnnualMean',
                'EarthEnvCloudCover_MODCF_interannualSD',
                'EarthEnvCloudCover_MODCF_intraannualSD',
                'EarthEnvCloudCover_MODCF_meanannual',
                'EarthEnvTopoMed_AspectCosine',
                'EarthEnvTopoMed_AspectSine',
                'LandCoverClass_Cultivated_and_Managed_Vegetation',
                'Human_Disturbance',
                'LandCoverClass_Urban_Builtup',
                'SG_Clay_Content_0_100cm',
                'SG_Coarse_fragments_0_100cm',
                'SG_Sand_Content_0_100cm',
                'SG_Silt_Content_0_100cm',
                'SG_Soil_pH_H2O_0_100cm',
                'cropland',
                'grazing',
                "pasture",
                "rangeland",
                "PresentTreeCover"]

In [5]:
# Import the data and view a summary of it
# load my own data table
presentData = pd.read_csv('Data/vData/PCA_ConvexHull_IntExt/20230126_Merged_Covariates_sampled_dataset_outliers_cleaned_for_Figure.csv').dropna()
presentData = presentData[selectedCols+['WDPA']]
# presentData = presentData.astype({"EarthEnvCloudCover_MODCF_interannualSD":'float',
#                                   "EarthEnvCloudCover_MODCF_intraannualSD":'float',
#                                   "EarthEnvCloudCover_MODCF_meanannual":'float',
#                                   "LandCoverClass_Cultivated_and_Managed_Vegetation":'float',
#                                   "LandCoverClass_Urban_Builtup":'float',
#                                   "WDPA":'int'})

# drop the na columns
presentData.info()
presentData.describe()
presentData.head(15)
# Instantiate the composite that was used to sample the points
# compositeImage = ee.Image("WORLDCLIM/V1/BIO")
fullCompositeImage = ee.Image("users/leonidmoore/ForestBiomass/20200915_Forest_Biomass_Predictors_Image").toDouble()
# presentCompositeImage = fullCompositeImage.select(selectedCols).toDouble().addBands(fullCompositeImage.select('WDPA').toInt64())
presentCompositeImage = fullCompositeImage.select(selectedCols+['WDPA'])
print('Composite Bands',presentCompositeImage.bandNames().getInfo())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 514170 entries, 0 to 526095
Data columns (total 49 columns):
 #   Column                                            Non-Null Count   Dtype  
---  ------                                            --------------   -----  
 0   Aridity_Index                                     514170 non-null  float64
 1   CHELSA_Annual_Mean_Temperature                    514170 non-null  float64
 2   CHELSA_Annual_Precipitation                       514170 non-null  float64
 3   CHELSA_Isothermality                              514170 non-null  float64
 4   CHELSA_Max_Temperature_of_Warmest_Month           514170 non-null  float64
 5   CHELSA_Mean_Diurnal_Range                         514170 non-null  float64
 6   CHELSA_Mean_Temperature_of_Coldest_Quarter        514170 non-null  float64
 7   CHELSA_Mean_Temperature_of_Driest_Quarter         514170 non-null  float64
 8   CHELSA_Mean_Temperature_of_Warmest_Quarter        514170 non-null  float64
 9   CHEL

In [6]:
# Input the proportion of variance that you would like to cover when running the script
propOfVariance = 90

In [7]:
def assessExtrapolation(importedData, compositeImage, propOfVariance):
    
    # Excise the columns of interest from the data frame
    variablesOfInterest = importedData #drop(['system:index', '.geo'], axis=1)
    
    # Compute the mean and standard deviation of each band, then standardize the point data
    meanVector = variablesOfInterest.mean()
    stdVector = variablesOfInterest.std()
    standardizedData = (variablesOfInterest-meanVector)/stdVector
    
    # Then standardize the composite from which the points were sampled
    meanList = meanVector.tolist()
    stdList = stdVector.tolist()
    bandNames = list(meanVector.index)
    meanImage = ee.Image(meanList).rename(bandNames)
    stdImage = ee.Image(stdList).rename(bandNames)
    standardizedImage = compositeImage.subtract(meanImage).divide(stdImage)
    
    # Run a PCA on the point samples
    pcaOutput = PCA()
    pcaOutput.fit(standardizedData)
    
    # Save the cumulative variance represented by each PC
    cumulativeVariance = np.cumsum(np.round(pcaOutput.explained_variance_ratio_, decimals=4)*100)
    
    # Make a list of PC names for future organizational purposes
    pcNames = ['PC'+str(x) for x in range(1,variablesOfInterest.shape[1]+1)]
    
    # Get the PC loadings as a data frame
    loadingsDF = pd.DataFrame(pcaOutput.components_,columns=[str(x)+'_Loads' for x in bandNames],index=pcNames)
    
    # Get the original data transformed into PC space
    transformedData = pd.DataFrame(pcaOutput.fit_transform(standardizedData,standardizedData),columns=pcNames)
    
    # Make principal components images, multiplying the standardized image by each of the eigenvectors
    # Collect each one of the images in a single image collection;
    
    # First step: make an image collection wherein each image is a PC loadings image
    listOfLoadings = ee.List(loadingsDF.values.tolist());
    eePCNames = ee.List(pcNames)
    zippedList = eePCNames.zip(listOfLoadings)
    def makeLoadingsImage(zippedValue):
        return ee.Image.constant(ee.List(zippedValue).get(1)).rename(bandNames).set('PC',ee.List(zippedValue).get(0))
    loadingsImageCollection = ee.ImageCollection(zippedList.map(makeLoadingsImage))
    
    # Second step: multiply each of the loadings image by the standardized image and reduce it using a "sum"
    # to finalize the matrix multiplication
    def finalizePCImages(loadingsImage):
        return ee.Image(loadingsImage).multiply(standardizedImage).reduce('sum').rename([ee.String(ee.Image(loadingsImage).get('PC'))]).set('PC',ee.String(ee.Image(loadingsImage).get('PC')))
    principalComponentsImages = loadingsImageCollection.map(finalizePCImages)
    
    # Choose how many principal components are of interest in this analysis based on amount of
    # variance explained
    numberOfComponents = sum(i < propOfVariance for i in cumulativeVariance)+1
    print('Number of Principal Components being used:',numberOfComponents)
    
    # Compute the combinations of the principal components being used to compute the 2-D convex hulls
    tupleCombinations = list(combinations(list(pcNames[0:numberOfComponents]),2))
    print('Number of Combinations being used:',len(tupleCombinations))
    
    # Generate convex hulls for an example of the principal components of interest
    cHullCoordsList = list()
    for c in tupleCombinations:
        firstPC = c[0]
        secondPC = c[1]
        outputCHull = ConvexHull(transformedData[[firstPC,secondPC]])
        listOfCoordinates = transformedData.loc[outputCHull.vertices][[firstPC,secondPC]].values.tolist()
        flattenedList = [val for sublist in listOfCoordinates for val in sublist]
        cHullCoordsList.append(flattenedList)
    
    # Reformat the image collection to an image with band names that can be selected programmatically
    pcImage = principalComponentsImages.toBands().rename(pcNames)
    
    # Generate an image collection with each PC selected with it's matching PC
    listOfPCs = ee.List(tupleCombinations)
    listOfCHullCoords = ee.List(cHullCoordsList)
    zippedListPCsAndCHulls = listOfPCs.zip(listOfCHullCoords)
    
    def makeToClassifyImages(zippedListPCsAndCHulls):
        imageToClassify = pcImage.select(ee.List(zippedListPCsAndCHulls).get(0)).set('CHullCoords',ee.List(zippedListPCsAndCHulls).get(1))
        classifiedImage = imageToClassify.rename('u','v').classify(ee.Classifier.spectralRegion([imageToClassify.get('CHullCoords')]))
        return classifiedImage
    classifedImages = ee.ImageCollection(zippedListPCsAndCHulls.map(makeToClassifyImages))
    finalImageToExport = classifedImages.sum().divide(ee.Image.constant(len(tupleCombinations)))
    
    return finalImageToExport

In [8]:
# Apply the function
finalImageToExportPresent = assessExtrapolation(presentData, presentCompositeImage, propOfVariance)

Number of Principal Components being used: 19
Number of Combinations being used: 171


In [9]:
# define the boudary of the exportation. keep it as 
unboundedGeo = ee.Geometry.Polygon([-180, 88, 0, 88, 180, 88, 180, -88, 0, -88, -180, -88], None, False)

### Export the calucation results to Google earth engine Asset folder

In [10]:
# export the present IntExt calcualtion results into the local folder
taskPresent = ee.batch.Export.image.toAsset(
    image = finalImageToExportPresent,
    description = 'GS_IntExt_Image',
    assetId = 'users/leonidmoore/ForestBiomass/IntExt/IntExt_of_GS_Models',
    region = unboundedGeo,
    crs = 'EPSG:4326',
    crsTransform = [0.008333333333333333,0,-180,0,-0.008333333333333333,90],
    maxPixels = 1e13)
# execute the calculation which will be present at the Google earth engine web browser interface
taskPresent.start()
# show the task status
taskPresent.status()

{'state': 'READY',
 'description': 'GS_IntExt_Image',
 'creation_timestamp_ms': 1675775214892,
 'update_timestamp_ms': 1675775214892,
 'start_timestamp_ms': 0,
 'task_type': 'EXPORT_IMAGE',
 'id': '7UCXEM2GGIHZNOIB6CB6FISK',
 'name': 'projects/earthengine-legacy/operations/7UCXEM2GGIHZNOIB6CB6FISK'}